Browse Source

Using possibleLengths info for length calculations in Java (#1355)

Changing PhoneNumberUtil to use the possibleLengths information, not the reg-exes.

Note the API is not changing, but the metadata is now somewhat stricter for
many countries, since before we applied only a minimum and maximum length for
most countries, and now we specify exactly which lengths are possible.

This has a flow-on effect when parsing, since we decide whether to do certain
operations like strip a national prefix based on whether the number is a
possible length before/after - when parsing, if the number is shorter than the *national* pattern, we no longer strip the national prefix.

Affected countries:
AD (7 digits now invalid)
AM (7 digits now invalid)
AR (9 digits now invalid)
AZ (8 digits now invalid)
BG (4 digits now valid for local-only numbers)
BJ (5-7 digits now invalid)
CC/CX (5 digit numbers now possible: this should always have been the case, but the generalDesc was wrong and didn't reflect its child elements. We now calculate it based on them, which allows 5 digit numbers.)
CO (9 digits now invalid)
CR (9 digits now invalid)
ET (8 digits now invalid)
GE (7 and 8 digits now invalid)
GH (8 digits now invalid)
IL (5 and 6 digits now invalid)
IM/JE/GG (7, 8 and 9 digits now invalid, shortest national number length now 10, so parsing affected for numbers shorter than this)
IS (8 digits now invalid)
KG (7,8 digits now invalid)
KR (11 digits now invalid)
LA (7 digits now invalid)
LI (8 digits now invalid)
LY (8 digits now invalid)
MV (8 and 9 digits now invalid)
MW (8 digits now invalid)
MX (9 digits now invalid)
NP (9 digits now invalid)
SE (11 digits now invalid)
SG (9 digits now invalid)
SL (7 digits now invalid)
SM (7-9 digits now invalid)
UA (8 digits now invalid)
UG (8 digits now invalid)
UZ (8 digits now invalid)
pull/1361/head
lararennie 9 years ago
committed by GitHub
parent
commit
ae71e8b726
17 changed files with 118 additions and 46 deletions
  1. BIN
      java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config
  2. BIN
      java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config
  3. +39
    -40
      java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
  4. +6
    -2
      java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java
  5. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_AD
  6. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_BB
  7. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_BR
  8. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CA
  9. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CC
  10. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CX
  11. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_FR
  12. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_GB
  13. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_GG
  14. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_SE
  15. BIN
      java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US
  16. +6
    -0
      java/pending_code_changes.txt
  17. +67
    -4
      resources/PhoneNumberMetadataForTesting.xml

BIN
java/carrier/src/com/google/i18n/phonenumbers/carrier/data/config View File


BIN
java/geocoder/src/com/google/i18n/phonenumbers/geocoding/data/config View File


+ 39
- 40
java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java View File

@ -1306,8 +1306,8 @@ public class PhoneNumberUtil {
// short numbers, which are always dialled in national format.
PhoneMetadata regionMetadata = getMetadataForRegion(regionCallingFrom);
if (canBeInternationallyDialled(numberNoExt)
&& !isShorterThanPossibleNormalNumber(regionMetadata,
getNationalSignificantNumber(numberNoExt))) {
&& testNumberLength(getNationalSignificantNumber(numberNoExt),
regionMetadata.getGeneralDesc()) != ValidationResult.TOO_SHORT) {
formattedNumber = format(numberNoExt, PhoneNumberFormat.INTERNATIONAL);
} else {
formattedNumber = format(numberNoExt, PhoneNumberFormat.NATIONAL);
@ -2113,19 +2113,19 @@ public class PhoneNumberUtil {
return metadataSource.getMetadataForNonGeographicalRegion(countryCallingCode);
}
boolean isNumberPossibleForDesc(String nationalNumber, PhoneNumberDesc numberDesc) {
Matcher possibleNumberPatternMatcher =
regexCache.getPatternForRegex(numberDesc.getPossibleNumberPattern())
.matcher(nationalNumber);
return possibleNumberPatternMatcher.matches();
}
boolean isNumberMatchingDesc(String nationalNumber, PhoneNumberDesc numberDesc) {
// Check if any possible number lengths are present; if so, we use them to avoid checking the
// validation pattern if they don't match. If they are absent, this means they match the general
// description, which we have already checked before checking a specific number type.
int actualLength = nationalNumber.length();
List<Integer> possibleLengths = numberDesc.getPossibleLengthList();
if (possibleLengths.size() > 0 && !possibleLengths.contains(actualLength)) {
return false;
}
Matcher nationalNumberPatternMatcher =
regexCache.getPatternForRegex(numberDesc.getNationalNumberPattern())
.matcher(nationalNumber);
return isNumberPossibleForDesc(nationalNumber, numberDesc)
&& nationalNumberPatternMatcher.matches();
return nationalNumberPatternMatcher.matches();
}
/**
@ -2362,32 +2362,35 @@ public class PhoneNumberUtil {
}
/**
* Helper method to check a number against a particular pattern and determine whether it matches,
* or is too short or too long. Currently, if a number pattern suggests that numbers of length 7
* and 10 are possible, and a number in between these possible lengths is entered, such as of
* length 8, this will return TOO_LONG.
* Helper method to check a number against possible lengths for this number, and determine whether
* it matches, or is too short or too long. Currently, if a number pattern suggests that numbers
* of length 7 and 10 are possible, and a number in between these possible lengths is entered,
* such as of length 8, this will return TOO_LONG.
*/
private ValidationResult testNumberLengthAgainstPattern(Pattern numberPattern, String number) {
Matcher numberMatcher = numberPattern.matcher(number);
if (numberMatcher.matches()) {
private ValidationResult testNumberLength(String number, PhoneNumberDesc phoneNumberDesc) {
List<Integer> possibleLengths = phoneNumberDesc.getPossibleLengthList();
List<Integer> localLengths = phoneNumberDesc.getPossibleLengthLocalOnlyList();
int actualLength = number.length();
if (localLengths.contains(actualLength)) {
return ValidationResult.IS_POSSIBLE;
}
if (numberMatcher.lookingAt()) {
return ValidationResult.TOO_LONG;
} else {
// There should always be "possibleLengths" set for every element. This will be a build-time
// check once ShortNumberMetadata.xml is migrated to contain this information as well.
int minimumLength = possibleLengths.get(0);
if (minimumLength == actualLength) {
return ValidationResult.IS_POSSIBLE;
} else if (minimumLength > actualLength) {
return ValidationResult.TOO_SHORT;
} else if (possibleLengths.get(possibleLengths.size() - 1) < actualLength) {
return ValidationResult.TOO_LONG;
}
}
/**
* Helper method to check whether a number is too short to be a regular length phone number in a
* region.
*/
private boolean isShorterThanPossibleNormalNumber(PhoneMetadata regionMetadata, String number) {
Pattern possibleNumberPattern = regexCache.getPatternForRegex(
regionMetadata.getGeneralDesc().getPossibleNumberPattern());
return testNumberLengthAgainstPattern(possibleNumberPattern, number) ==
ValidationResult.TOO_SHORT;
// Note that actually the number is not too long if possibleLengths does not contain the length:
// we know it is less than the highest possible number length, and higher than the lowest
// possible number length. However, we don't currently have an enum to express this, so we
// return TOO_LONG in the short-term.
// We skip the first element; we've already checked it.
return possibleLengths.subList(1, possibleLengths.size()).contains(actualLength)
? ValidationResult.IS_POSSIBLE : ValidationResult.TOO_LONG;
}
/**
@ -2424,9 +2427,7 @@ public class PhoneNumberUtil {
String regionCode = getRegionCodeForCountryCode(countryCode);
// Metadata cannot be null because the country calling code is valid.
PhoneMetadata metadata = getMetadataForRegionOrCallingCode(countryCode, regionCode);
Pattern possibleNumberPattern =
regexCache.getPatternForRegex(metadata.getGeneralDesc().getPossibleNumberPattern());
return testNumberLengthAgainstPattern(possibleNumberPattern, nationalNumber);
return testNumberLength(nationalNumber, metadata.getGeneralDesc());
}
/**
@ -2596,15 +2597,12 @@ public class PhoneNumberUtil {
regexCache.getPatternForRegex(generalDesc.getNationalNumberPattern());
maybeStripNationalPrefixAndCarrierCode(
potentialNationalNumber, defaultRegionMetadata, null /* Don't need the carrier code */);
Pattern possibleNumberPattern =
regexCache.getPatternForRegex(generalDesc.getPossibleNumberPattern());
// If the number was not valid before but is valid now, or if it was too long before, we
// consider the number with the country calling code stripped to be a better result and
// keep that instead.
if ((!validNumberPattern.matcher(fullNumber).matches()
&& validNumberPattern.matcher(potentialNationalNumber).matches())
|| testNumberLengthAgainstPattern(possibleNumberPattern, fullNumber.toString())
== ValidationResult.TOO_LONG) {
|| testNumberLength(fullNumber.toString(), generalDesc) == ValidationResult.TOO_LONG) {
nationalNumber.append(potentialNationalNumber);
if (keepRawInput) {
phoneNumber.setCountryCodeSource(CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
@ -3013,7 +3011,8 @@ public class PhoneNumberUtil {
// We require that the NSN remaining after stripping the national prefix and carrier code be
// long enough to be a possible length for the region. Otherwise, we don't do the stripping,
// since the original number could be a valid short number.
if (!isShorterThanPossibleNormalNumber(regionMetadata, potentialNationalNumber.toString())) {
if (testNumberLength(potentialNationalNumber.toString(), regionMetadata.getGeneralDesc())
!= ValidationResult.TOO_SHORT) {
normalizedNationalNumber = potentialNationalNumber;
if (keepRawInput) {
phoneNumber.setPreferredDomesticCarrierCode(carrierCode.toString());


+ 6
- 2
java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java View File

@ -145,8 +145,12 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
assertEquals("[13-689]\\d{9}|2[0-35-9]\\d{8}",
metadata.getGeneralDesc().getNationalNumberPattern());
assertEquals("\\d{7}(?:\\d{3})?", metadata.getGeneralDesc().getPossibleNumberPattern());
assertTrue(metadata.getGeneralDesc().exactlySameAs(metadata.getFixedLine()));
// Fixed-line data should be inherited from the general desc for the national number pattern,
// since it wasn't overridden.
assertEquals(metadata.getGeneralDesc().getNationalNumberPattern(),
metadata.getFixedLine().getNationalNumberPattern());
assertEquals("\\d{10}", metadata.getTollFree().getPossibleNumberPattern());
assertEquals(1, metadata.getGeneralDesc().getPossibleLengthCount());
assertEquals(10, metadata.getGeneralDesc().getPossibleLength(0));
// Possible lengths are the same as the general description, so aren't stored separately in the
// toll free element as well.
@ -1315,7 +1319,7 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
assertTrue(phoneUtil.isPossibleNumber("253-0000", RegionCode.US));
assertTrue(phoneUtil.isPossibleNumber("+1 650 253 0000", RegionCode.GB));
assertTrue(phoneUtil.isPossibleNumber("+44 20 7031 3000", RegionCode.GB));
assertTrue(phoneUtil.isPossibleNumber("(020) 7031 3000", RegionCode.GB));
assertTrue(phoneUtil.isPossibleNumber("(020) 7031 300", RegionCode.GB));
assertTrue(phoneUtil.isPossibleNumber("7031 3000", RegionCode.GB));
assertTrue(phoneUtil.isPossibleNumber("3331 6005", RegionCode.NZ));
assertTrue(phoneUtil.isPossibleNumber("+800 1234 5678", RegionCode.UN001));


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_AD View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_BB View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_BR View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CA View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CC View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_CX View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_FR View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_GB View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_GG View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_SE View File


BIN
java/libphonenumber/test/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProtoForTesting_US View File


+ 6
- 0
java/pending_code_changes.txt View File

@ -4,6 +4,12 @@ Build changes:
- Fix geocoding file generation to handle language codes that are not just
two letters long properly and to process filenames in the same order.
Code changes:
- Switching the internal implementation of isPossible and related functions
to use the new possibleLengths metadata. This affects a lot of countries,
making isPossible more restrictive as more precise data is available. It
also affects parsing ambiguous and invalid numbers, as we decide whether
to strip a possible national prefix (1) or country code based on the length
of the number and whether it is possible before or after this.
- Formatting, naming (LOGGER -> logger) and comment tweaks to follow style
guide
- Removal of unneeded canBeGeocoded method in the


+ 67
- 4
resources/PhoneNumberMetadataForTesting.xml View File

@ -23,6 +23,13 @@
<territories>
<!-- Andorra -->
<territory id="AD" countryCode="376" internationalPrefix="00">
<generalDesc>
<nationalNumberPattern>\d{6}</nationalNumberPattern>
<possibleNumberPattern>\d{6}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="6"/>
</fixedLine>
</territory>
<!-- United Arab Emirates -->
@ -169,12 +176,27 @@
<!-- Barbados -->
<territory id="BB" countryCode="1" internationalPrefix="011">
<generalDesc>
<nationalNumberPattern>246\d{7}</nationalNumberPattern>
<possibleNumberPattern>\d{7,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="7"/>
</fixedLine>
</territory>
<!-- Brazil -->
<!-- This country is used to test ShortNumberInfo, so at least the country calling code must be
recognised by the library. -->
recognised by the library. It is also used for formatInOriginalFormat tests, so some length
metadata is needed. -->
<territory id="BR" countryCode="55">
<generalDesc>
<nationalNumberPattern>\d{8,10}</nationalNumberPattern>
<possibleNumberPattern>\d{8,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="8"/>
</fixedLine>
</territory>
<!-- Bahamas -->
@ -262,13 +284,27 @@
<!-- Canada -->
<territory id="CA" countryCode="1" internationalPrefix="011">
<generalDesc>
<nationalNumberPattern>226\d{7}</nationalNumberPattern>
<possibleNumberPattern>\d{7,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="7"/>
</fixedLine>
</territory>
<!-- Cocos Islands -->
<!-- Country calling code shared with Australia. -->
<!-- This country is used to test ShortNumberInfo, so at least the country calling code must be
recognised by the library. -->
recognised by the library, and some length information is needed for parsing. -->
<territory id="CC" countryCode="61">
<generalDesc>
<nationalNumberPattern>\d{6,10}</nationalNumberPattern>
<possibleNumberPattern>\d{6,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="6"/>
</fixedLine>
</territory>
<!-- China -->
@ -326,8 +362,15 @@
<!-- Christmas Islands -->
<!-- Country calling code shared with Australia. -->
<!-- This country is used to test ShortNumberInfo, so at least the country calling code must be
recognised by the library. -->
recognised by the library, and some length information is needed for parsing. -->
<territory id="CX" countryCode="61" internationalPrefix="00">
<generalDesc>
<nationalNumberPattern>\d{8,10}</nationalNumberPattern>
<possibleNumberPattern>\d{8,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="8"/>
</fixedLine>
</territory>
<!-- Germany -->
@ -415,6 +458,9 @@
<nationalNumberPattern>3\d{6}</nationalNumberPattern>
<possibleNumberPattern>\d{7}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="7"/>
</fixedLine>
</territory>
<!-- United Kingdom -->
@ -445,7 +491,7 @@
</generalDesc>
<fixedLine>
<nationalNumberPattern>[1-6]\d{9}</nationalNumberPattern>
<possibleLengths national="10" localOnly="6"/>
<possibleLengths national="9,10" localOnly="6,7,8"/>
</fixedLine>
<mobile>
<nationalNumberPattern>7[1-57-9]\d{8}</nationalNumberPattern>
@ -488,6 +534,13 @@
<!-- This country is used to test ShortNumberInfo, so at least the country calling code must be
recognised by the library, and it must be the same as that of the United Kingdom. -->
<territory id="GG" countryCode="44">
<generalDesc>
<nationalNumberPattern>\d{6,10}</nationalNumberPattern>
<possibleNumberPattern>\d{6,10}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="6"/>
</fixedLine>
</territory>
<!-- Hungary -->
@ -903,6 +956,13 @@
<!-- Sweden -->
<territory id="SE" countryCode="46" internationalPrefix="00">
<generalDesc>
<nationalNumberPattern>\d{9}</nationalNumberPattern>
<possibleNumberPattern>\d{9}</possibleNumberPattern>
</generalDesc>
<fixedLine>
<possibleLengths national="9"/>
</fixedLine>
</territory>
<!-- Singapore -->
@ -976,6 +1036,9 @@
<possibleNumberPattern>\d{7}(?:\d{3})?</possibleNumberPattern>
<exampleNumber>1234567890</exampleNumber>
</generalDesc>
<fixedLine>
<possibleLengths national="10" localOnly="7"/>
</fixedLine>
<noInternationalDialling>
<!-- This range is added for testing purposes only. -->
<nationalNumberPattern>800\d{7}</nationalNumberPattern>


Loading…
Cancel
Save