Browse Source

Support semicolon as extension character while parsing phone numbers. (#1458)

* Support semicolon as extension character while parsing

* Add notes to pending_code_changes.txt

* JS port: Support semicolon as extension character while parsing

* Update comments in phonenumberutil.js
pull/1455/merge
penmetsaa 9 years ago
committed by GitHub
parent
commit
6347995ee6
11 changed files with 55 additions and 20 deletions
  1. +6
    -6
      cpp/src/phonenumbers/phonenumberutil.cc
  2. +4
    -0
      cpp/test/phonenumbers/phonenumbermatcher_test.cc
  3. +12
    -0
      cpp/test/phonenumbers/phonenumberutil_test.cc
  4. BIN
      java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config
  5. BIN
      java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config
  6. +6
    -6
      java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
  7. +3
    -0
      java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java
  8. +5
    -0
      java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java
  9. +2
    -0
      java/pending_code_changes.txt
  10. +9
    -8
      javascript/i18n/phonenumbers/phonenumberutil.js
  11. +8
    -0
      javascript/i18n/phonenumbers/phonenumberutil_test.js

+ 6
- 6
cpp/src/phonenumbers/phonenumberutil.cc View File

@ -208,9 +208,9 @@ string CreateExtnPattern(const string& single_extn_symbols) {
// The first regular expression covers RFC 3966 format, where the extension is
// added using ";ext=". The second more generic one starts with optional white
// space and ends with an optional full stop (.), followed by zero or more
// spaces/tabs and then the numbers themselves. The third one covers the
// special case of American numbers where the extension is written with a hash
// at the end, such as "- 503#".
// spaces/tabs/commas and then the numbers themselves. The third one covers
// the special case of American numbers where the extension is written with a
// hash at the end, such as "- 503#".
// Note that the only capturing groups should be around the digits that you
// want to capture as part of the extension, or else parsing will fail!
// Canonical-equivalence doesn't seem to be an option with RE2, so we allow
@ -451,8 +451,8 @@ class PhoneNumberRegExpsAndMappings {
// will be run as a case-insensitive regexp match. Wide character versions are
// also provided after each ASCII version.
// For parsing, we are slightly more lenient in our interpretation than for
// matching. Here we allow a "comma" as a possible extension indicator. When
// matching, this is hardly ever used to indicate this.
// matching. Here we allow "comma" and "semicolon" as possible extension
// indicators. When matching, these are hardly ever used to indicate this.
const string extn_patterns_for_parsing_;
public:
@ -570,7 +570,7 @@ class PhoneNumberRegExpsAndMappings {
punctuation_and_star_sign_, kDigits,
"]*")),
extn_patterns_for_parsing_(
CreateExtnPattern(StrCat(",", kSingleExtnSymbolsForMatching))),
CreateExtnPattern(StrCat(",;", kSingleExtnSymbolsForMatching))),
regexp_factory_(new RegExpFactory()),
regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)),
diallable_char_mappings_(),


+ 4
- 0
cpp/test/phonenumbers/phonenumbermatcher_test.cc View File

@ -253,6 +253,10 @@ class PhoneNumberMatcherTest : public testing::Test {
// With trailing numbers after a comma. The 45 should not be considered an
// extension.
context_pairs.push_back(NumberContext("", ", 45 days a year"));
// When matching we don't consider semicolon along with legitimate extension
// symbol to indicate an extension. The 7246433 should not be considered an
// extension.
context_pairs.push_back(NumberContext("", ";x 7246433"));
// With a postfix stripped off as it looks like the start of another number.
context_pairs.push_back(NumberContext("Call ", "/x12 more"));


+ 12
- 0
cpp/test/phonenumbers/phonenumberutil_test.cc View File

@ -2824,6 +2824,9 @@ TEST_F(PhoneNumberUtilTest, IsNumberMatchMatches) {
EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH,
phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234",
"+6433316005#1234"));
EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH,
phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234",
"+6433316005;1234"));
// Test proto buffers.
PhoneNumber nz_number;
nz_number.set_country_code(64);
@ -3792,6 +3795,15 @@ TEST_F(PhoneNumberUtilTest, ParseExtensions) {
phone_util_.Parse("(800) 901-3355 , ext 7246433", RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
phone_util_.Parse("(800) 901-3355 ; 7246433", RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
// To test an extension character without surrounding spaces.
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
phone_util_.Parse("(800) 901-3355;7246433", RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
phone_util_.Parse("(800) 901-3355 ,extension 7246433",
RegionCode::US(),


BIN
java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config View File


BIN
java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config View File


+ 6
- 6
java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java View File

@ -313,9 +313,9 @@ public class PhoneNumberUtil {
// One-character symbols that can be used to indicate an extension.
String singleExtnSymbolsForMatching = "x\uFF58#\uFF03~\uFF5E";
// For parsing, we are slightly more lenient in our interpretation than for matching. Here we
// allow a "comma" as a possible extension indicator. When matching, this is hardly ever used to
// indicate this.
String singleExtnSymbolsForParsing = "," + singleExtnSymbolsForMatching;
// allow "comma" and "semicolon" as possible extension indicators. When matching, these are
// hardly ever used to indicate this.
String singleExtnSymbolsForParsing = ",;" + singleExtnSymbolsForMatching;
EXTN_PATTERNS_FOR_PARSING = createExtnPattern(singleExtnSymbolsForParsing);
EXTN_PATTERNS_FOR_MATCHING = createExtnPattern(singleExtnSymbolsForMatching);
@ -328,9 +328,9 @@ public class PhoneNumberUtil {
private static String createExtnPattern(String singleExtnSymbols) {
// There are three regular expressions here. The first covers RFC 3966 format, where the
// extension is added using ";ext=". The second more generic one starts with optional white
// space and ends with an optional full stop (.), followed by zero or more spaces/tabs and then
// the numbers themselves. The other one covers the special case of American numbers where the
// extension is written with a hash at the end, such as "- 503#".
// space and ends with an optional full stop (.), followed by zero or more spaces/tabs/commas
// and then the numbers themselves. The other one covers the special case of American numbers
// where the extension is written with a hash at the end, such as "- 503#"
// Note that the only capturing groups should be around the digits that you want to capture as
// part of the extension, or else parsing will fail!
// Canonical-equivalence doesn't seem to be an option with Android java, so we allow two options


+ 3
- 0
java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java View File

@ -981,6 +981,9 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase {
"As I said on 03/10/2011, you may call me at ", ""));
// With trailing numbers after a comma. The 45 should not be considered an extension.
contextPairs.add(new NumberContext("", ", 45 days a year"));
// When matching we don't consider semicolon along with legitimate extension symbol to indicate
// an extension. The 7246433 should not be considered an extension.
contextPairs.add(new NumberContext("", ";x 7246433"));
// With a postfix stripped off as it looks like the start of another number.
contextPairs.add(new NumberContext("Call ", "/x12 more"));


+ 5
- 0
java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java View File

@ -2262,6 +2262,9 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
usWithExtension.setCountryCode(1).setNationalNumber(8009013355L).setExtension("7246433");
assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 x 7246433", RegionCode.US));
assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 , ext 7246433", RegionCode.US));
assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355 ; 7246433", RegionCode.US));
// To test an extension character without surrounding spaces.
assertEquals(usWithExtension, phoneUtil.parse("(800) 901-3355;7246433", RegionCode.US));
assertEquals(usWithExtension,
phoneUtil.parse("(800) 901-3355 ,extension 7246433", RegionCode.US));
assertEquals(usWithExtension,
@ -2405,6 +2408,8 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
// Test numbers with extensions.
assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch("+64 3 331-6005 extn 1234", "+6433316005#1234"));
assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch("+64 3 331-6005 ext. 1234", "+6433316005;1234"));
// Test proto buffers.
assertEquals(PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch(NZ_NUMBER, "+6403 331 6005"));


+ 2
- 0
java/pending_code_changes.txt View File

@ -3,3 +3,5 @@ Code changes:
of phone number objects. These have been marked deprecated for months. Any
users of these methods should call PhoneNumberUtil.parse first to create a
PhoneNumber object, and pass this in.
- Support semicolon as extension character while parsing phone numbers. This
is not applicable when you are trying to find the phone numbers.

+ 9
- 8
javascript/i18n/phonenumbers/phonenumberutil.js View File

@ -763,13 +763,14 @@ i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ =
* also provided after each ASCII version. There are three regular expressions
* here. The first covers RFC 3966 format, where the extension is added using
* ';ext='. The second more generic one starts with optional white space and
* ends with an optional full stop (.), followed by zero or more spaces/tabs and
* then the numbers themselves. The other one covers the special case of
* American numbers where the extension is written with a hash at the end, such
* as '- 503#'. Note that the only capturing groups should be around the digits
* that you want to capture as part of the extension, or else parsing will fail!
* We allow two options for representing the accented o - the character itself,
* and one in the unicode decomposed form with the combining acute accent.
* ends with an optional full stop (.), followed by zero or more spaces/tabs
* /commas and then the numbers themselves. The other one covers the special
* case of American numbers where the extension is written with a hash at the
* end, such as '- 503#'. Note that the only capturing groups should be around
* the digits that you want to capture as part of the extension, or else parsing
* will fail! We allow two options for representing the accented o - the
* character itself, and one in the unicode decomposed form with the combining
* acute accent.
*
* @const
* @type {string}
@ -780,7 +781,7 @@ i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ =
i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '|' +
'[ \u00A0\\t,]*' +
'(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|' +
'[,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' +
'[;,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' +
'[:\\.\uFF0E]?[ \u00A0\\t,-]*' +
i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '#?|' +
'[- ]+([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,5})#';


+ 8
- 0
javascript/i18n/phonenumbers/phonenumberutil_test.js View File

@ -2928,6 +2928,11 @@ function testParseExtensions() {
phoneUtil.parse('(800) 901-3355 x 7246433', RegionCode.US)));
assertTrue(usWithExtension.equals(
phoneUtil.parse('(800) 901-3355 , ext 7246433', RegionCode.US)));
assertTrue(usWithExtension.equals(
phoneUtil.parse('(800) 901-3355 ; 7246433', RegionCode.US)));
// To test an extension character without surrounding spaces.
assertTrue(usWithExtension.equals(
phoneUtil.parse('(800) 901-3355;7246433', RegionCode.US)));
assertTrue(usWithExtension.equals(
phoneUtil.parse('(800) 901-3355 ,extension 7246433', RegionCode.US)));
assertTrue(usWithExtension.equals(
@ -3120,6 +3125,9 @@ function testIsNumberMatchMatches() {
assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch('+64 3 331-6005 extn 1234',
'+6433316005#1234'));
assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch('+64 3 331-6005 ext. 1234',
'+6433316005;1234'));
// Test proto buffers.
assertEquals(i18n.phonenumbers.PhoneNumberUtil.MatchType.EXACT_MATCH,
phoneUtil.isNumberMatch(NZ_NUMBER, '+6403 331 6005'));


Loading…
Cancel
Save