diff --git a/cpp/src/phonenumbers/phonenumberutil.cc b/cpp/src/phonenumbers/phonenumberutil.cc index 3863e7031..0511b215c 100644 --- a/cpp/src/phonenumbers/phonenumberutil.cc +++ b/cpp/src/phonenumbers/phonenumberutil.cc @@ -208,9 +208,9 @@ string CreateExtnPattern(const string& single_extn_symbols) { // The first regular expression covers RFC 3966 format, where the extension is // added using ";ext=". The second more generic one starts with optional white // space and ends with an optional full stop (.), followed by zero or more - // spaces/tabs and then the numbers themselves. The third one covers the - // special case of American numbers where the extension is written with a hash - // at the end, such as "- 503#". + // spaces/tabs/commas and then the numbers themselves. The third one covers + // the special case of American numbers where the extension is written with a + // hash at the end, such as "- 503#". // Note that the only capturing groups should be around the digits that you // want to capture as part of the extension, or else parsing will fail! // Canonical-equivalence doesn't seem to be an option with RE2, so we allow @@ -451,8 +451,8 @@ class PhoneNumberRegExpsAndMappings { // will be run as a case-insensitive regexp match. Wide character versions are // also provided after each ASCII version. // For parsing, we are slightly more lenient in our interpretation than for - // matching. Here we allow a "comma" as a possible extension indicator. When - // matching, this is hardly ever used to indicate this. + // matching. Here we allow "comma" and "semicolon" as possible extension + // indicators. When matching, these are hardly ever used to indicate this. const string extn_patterns_for_parsing_; public: @@ -570,7 +570,7 @@ class PhoneNumberRegExpsAndMappings { punctuation_and_star_sign_, kDigits, "]*")), extn_patterns_for_parsing_( - CreateExtnPattern(StrCat(",", kSingleExtnSymbolsForMatching))), + CreateExtnPattern(StrCat(",;", kSingleExtnSymbolsForMatching))), regexp_factory_(new RegExpFactory()), regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)), diallable_char_mappings_(), diff --git a/cpp/test/phonenumbers/phonenumbermatcher_test.cc b/cpp/test/phonenumbers/phonenumbermatcher_test.cc index 282b10426..49818f167 100644 --- a/cpp/test/phonenumbers/phonenumbermatcher_test.cc +++ b/cpp/test/phonenumbers/phonenumbermatcher_test.cc @@ -253,6 +253,10 @@ class PhoneNumberMatcherTest : public testing::Test { // With trailing numbers after a comma. The 45 should not be considered an // extension. context_pairs.push_back(NumberContext("", ", 45 days a year")); + // When matching we don't consider semicolon along with legitimate extension + // symbol to indicate an extension. The 7246433 should not be considered an + // extension. + context_pairs.push_back(NumberContext("", ";x 7246433")); // With a postfix stripped off as it looks like the start of another number. context_pairs.push_back(NumberContext("Call ", "/x12 more")); diff --git a/cpp/test/phonenumbers/phonenumberutil_test.cc b/cpp/test/phonenumbers/phonenumberutil_test.cc index 1b440ef58..68f67c9b1 100644 --- a/cpp/test/phonenumbers/phonenumberutil_test.cc +++ b/cpp/test/phonenumbers/phonenumberutil_test.cc @@ -2824,6 +2824,9 @@ TEST_F(PhoneNumberUtilTest, IsNumberMatchMatches) { EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234", "+6433316005#1234")); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234", + "+6433316005;1234")); // Test proto buffers. PhoneNumber nz_number; nz_number.set_country_code(64); @@ -3792,6 +3795,15 @@ TEST_F(PhoneNumberUtilTest, ParseExtensions) { phone_util_.Parse("(800) 901-3355 , ext 7246433", RegionCode::US(), &test_number)); EXPECT_EQ(us_with_extension, test_number); + EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, + phone_util_.Parse("(800) 901-3355 ; 7246433", RegionCode::US(), + &test_number)); + EXPECT_EQ(us_with_extension, test_number); + // To test an extension character without surrounding spaces. + EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, + phone_util_.Parse("(800) 901-3355;7246433", RegionCode::US(), + &test_number)); + EXPECT_EQ(us_with_extension, test_number); EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR, phone_util_.Parse("(800) 901-3355 ,extension 7246433", RegionCode::US(), diff --git a/java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config b/java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config index eaa40e0a7..8fea85a6c 100644 Binary files a/java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config and b/java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config differ diff --git a/java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config b/java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config index a94e83f5f..a46903a1f 100644 Binary files a/java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config and b/java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config differ diff --git a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java index 63f292204..c652ad9c9 100644 --- a/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java +++ b/java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java @@ -313,9 +313,9 @@ public class PhoneNumberUtil { // One-character symbols that can be used to indicate an extension. String singleExtnSymbolsForMatching = "x\uFF58#\uFF03~\uFF5E"; // For parsing, we are slightly more lenient in our interpretation than for matching. Here we - // allow a "comma" as a possible extension indicator. When matching, this is hardly ever used to - // indicate this. - String singleExtnSymbolsForParsing = "," + singleExtnSymbolsForMatching; + // allow "comma" and "semicolon" as possible extension indicators. When matching, these are + // hardly ever used to indicate this. + String singleExtnSymbolsForParsing = ",;" + singleExtnSymbolsForMatching; EXTN_PATTERNS_FOR_PARSING = createExtnPattern(singleExtnSymbolsForParsing); EXTN_PATTERNS_FOR_MATCHING = createExtnPattern(singleExtnSymbolsForMatching); @@ -328,9 +328,9 @@ public class PhoneNumberUtil { private static String createExtnPattern(String singleExtnSymbols) { // There are three regular expressions here. The first covers RFC 3966 format, where the // extension is added using ";ext=". The second more generic one starts with optional white - // space and ends with an optional full stop (.), followed by zero or more spaces/tabs and then - // the numbers themselves. The other one covers the special case of American numbers where the - // extension is written with a hash at the end, such as "- 503#". + // space and ends with an optional full stop (.), followed by zero or more spaces/tabs/commas + // and then the numbers themselves. The other one covers the special case of American numbers + // where the extension is written with a hash at the end, such as "- 503#" // Note that the only capturing groups should be around the digits that you want to capture as // part of the extension, or else parsing will fail! // Canonical-equivalence doesn't seem to be an option with Android java, so we allow two options diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java index 5bd13b8b2..175f52846 100644 --- a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java @@ -981,6 +981,9 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase { "As I said on 03/10/2011, you may call me at ", "")); // With trailing numbers after a comma. The 45 should not be considered an extension. contextPairs.add(new NumberContext("", ", 45 days a year")); + // When matching we don't consider semicolon along with legitimate extension symbol to indicate + // an extension. The 7246433 should not be considered an extension. + contextPairs.add(new NumberContext("", ";x 7246433")); // With a postfix stripped off as it looks like the start of another number. contextPairs.add(new NumberContext("Call ", "/x12 more")); diff --git a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java index 812a04c44..145dbe14f 100644 --- a/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java +++ b/java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java @@ -2262,6 +2262,9 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { usWithExtension.setCountryCode(1).setNationalNumber(8009013355L).setExtension("7246433"); assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 x 7246433", RegionCode.US)); assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 , ext 7246433", RegionCode.US)); + assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 ; 7246433", RegionCode.US)); + // To test an extension character without surrounding spaces. + assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355;7246433", RegionCode.US)); assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 ,extension 7246433", RegionCode.US)); assertEquals(usWithExtension, @@ -2405,6 +2408,8 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase { // Test numbers with extensions. assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH, phoneUtil.isNumberMatch("+64 3 331-6005 extn 1234", "+6433316005#1234")); + assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH, + phoneUtil.isNumberMatch("+64 3 331-6005 ext. 1234", "+6433316005;1234")); // Test proto buffers. assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH, phoneUtil.isNumberMatch(NZ_NUMBER, "+6403 331 6005")); diff --git a/java/pending_code_changes.txt b/java/pending_code_changes.txt index c1d6ecf04..81be55990 100644 --- a/java/pending_code_changes.txt +++ b/java/pending_code_changes.txt @@ -3,3 +3,5 @@ Code changes: of phone number objects. These have been marked deprecated for months. Any users of these methods should call PhoneNumberUtil.parse first to create a PhoneNumber object, and pass this in. + - Support semicolon as extension character while parsing phone numbers. This + is not applicable when you are trying to find the phone numbers. diff --git a/javascript/i18n/phonenumbers/phonenumberutil.js b/javascript/i18n/phonenumbers/phonenumberutil.js index 5beff9595..f3727e86f 100644 --- a/javascript/i18n/phonenumbers/phonenumberutil.js +++ b/javascript/i18n/phonenumbers/phonenumberutil.js @@ -763,13 +763,14 @@ i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ = * also provided after each ASCII version. There are three regular expressions * here. The first covers RFC 3966 format, where the extension is added using * ';ext='. The second more generic one starts with optional white space and - * ends with an optional full stop (.), followed by zero or more spaces/tabs and - * then the numbers themselves. The other one covers the special case of - * American numbers where the extension is written with a hash at the end, such - * as '- 503#'. Note that the only capturing groups should be around the digits - * that you want to capture as part of the extension, or else parsing will fail! - * We allow two options for representing the accented o - the character itself, - * and one in the unicode decomposed form with the combining acute accent. + * ends with an optional full stop (.), followed by zero or more spaces/tabs + * /commas and then the numbers themselves. The other one covers the special + * case of American numbers where the extension is written with a hash at the + * end, such as '- 503#'. Note that the only capturing groups should be around + * the digits that you want to capture as part of the extension, or else parsing + * will fail! We allow two options for representing the accented o - the + * character itself, and one in the unicode decomposed form with the combining + * acute accent. * * @const * @type {string} @@ -780,7 +781,7 @@ i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ = i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '|' + '[ \u00A0\\t,]*' + '(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|' + - '[,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' + + '[;,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' + '[:\\.\uFF0E]?[ \u00A0\\t,-]*' + i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '#?|' + '[- ]+([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,5})#'; diff --git a/javascript/i18n/phonenumbers/phonenumberutil_test.js b/javascript/i18n/phonenumbers/phonenumberutil_test.js index bf2134792..c8d0e5c24 100644 --- a/javascript/i18n/phonenumbers/phonenumberutil_test.js +++ b/javascript/i18n/phonenumbers/phonenumberutil_test.js @@ -2928,6 +2928,11 @@ function testParseExtensions() { phoneUtil.parse('(800) 901-3355 x 7246433', RegionCode.US))); assertTrue(usWithExtension.equals( phoneUtil.parse('(800) 901-3355 , ext 7246433', RegionCode.US))); + assertTrue(usWithExtension.equals( + phoneUtil.parse('(800) 901-3355 ; 7246433', RegionCode.US))); + // To test an extension character without surrounding spaces. + assertTrue(usWithExtension.equals( + phoneUtil.parse('(800) 901-3355;7246433', RegionCode.US))); assertTrue(usWithExtension.equals( phoneUtil.parse('(800) 901-3355 ,extension 7246433', RegionCode.US))); assertTrue(usWithExtension.equals( @@ -3120,6 +3125,9 @@ function testIsNumberMatchMatches() { assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH, phoneUtil.isNumberMatch('+64 3 331-6005 extn 1234', '+6433316005#1234')); + assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH, + phoneUtil.isNumberMatch('+64 3 331-6005 ext. 1234', + '+6433316005;1234')); // Test proto buffers. assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH, phoneUtil.isNumberMatch(NZ_NUMBER, '+6403 331 6005'));