Browse Source

JAVA: Metadata changes and bug fixes; libphonenumber v5.0

pull/567/head
David Beaumont 14 years ago
committed by Mihaela Rosca
parent
commit
222b42422c
7 changed files with 128 additions and 51 deletions
  1. +13
    -1
      java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java
  2. +8
    -0
      java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
  3. BIN
      java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GA
  4. BIN
      java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_IL
  5. +17
    -2
      java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java
  6. +45
    -4
      java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java
  7. +45
    -44
      java/release_notes.txt

+ 13
- 1
java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberMatcher.java View File

@ -433,7 +433,8 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
/**
* Small helper interface such that the number groups can be checked according to different
* criteria.
* criteria, both for our default way of performing formatting and for any alternate formats we
* may want to check.
*/
interface NumberGroupingChecker {
/**
@ -553,6 +554,17 @@ final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
return true;
}
// If this didn't pass, see if there are any alternate formats, and try them instead.
PhoneMetadata alternateFormats =
MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
if (alternateFormats != null) {
for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
return true;
}
}
}
return false;
}


+ 8
- 0
java/libphonenumber/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java View File

@ -265,9 +265,17 @@ public class PhoneNumberUtil {
// carrier codes, for example in Brazilian phone numbers. We also allow multiple "+" characters at
// the start.
// Corresponds to the following:
// [digits]{minLengthNsn}|
// plus_sign*(([punctuation]|[star])*[digits]){3,}([punctuation]|[star]|[digits]|[alpha])*
//
// The first reg-ex is to allow short numbers (two digits long) to be parsed if they are entered
// as "15" etc, but only if there is no punctuation in them. The second expression restricts the
// number of digits to three or more, but then allows them to be in international form, and to
// have alpha-characters and punctuation.
//
// Note VALID_PUNCTUATION starts with a -, so must be the first in the range.
private static final String VALID_PHONE_NUMBER =
DIGITS + "{" + MIN_LENGTH_FOR_NSN + "}" + "|" +
"[" + PLUS_CHARS + "]*+(?:[" + VALID_PUNCTUATION + STAR_SIGN + "]*" + DIGITS + "){3,}[" +
VALID_PUNCTUATION + STAR_SIGN + VALID_ALPHA + DIGITS + "]*";


BIN
java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_GA View File


BIN
java/libphonenumber/src/com/google/i18n/phonenumbers/data/PhoneNumberMetadataProto_IL View File


+ 17
- 2
java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java View File

@ -400,7 +400,12 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase {
new NumberTest("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17", RegionCode.US),
new NumberTest("2012-0102 08", RegionCode.US), // Very strange formatting.
new NumberTest("2012-01-02 08", RegionCode.US),
new NumberTest("1800-10-10 22", RegionCode.AU), // Breakdown assistance number.
// Breakdown assistance number with unexpected formatting.
new NumberTest("1800-1-0-10 22", RegionCode.AU),
new NumberTest("030-3-2 23 12 34", RegionCode.DE),
new NumberTest("03 0 -3 2 23 12 34", RegionCode.DE),
new NumberTest("(0)3 0 -3 2 23 12 34", RegionCode.DE),
new NumberTest("0 3 0 -3 2 23 12 34", RegionCode.DE),
};
/**
@ -413,6 +418,11 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase {
// Should be found by strict grouping but not exact grouping, as the last two groups are
// formatted together as a block.
new NumberTest("0800-2491234", RegionCode.DE),
// Doesn't match any formatting in the test file, but almost matches an alternate format (the
// last two groups have been squashed together here).
new NumberTest("0900-1 123123", RegionCode.DE),
new NumberTest("(0)900-1 123123", RegionCode.DE),
new NumberTest("0 900-1 123123", RegionCode.DE),
};
/**
@ -439,6 +449,11 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase {
new NumberTest("0494949 ext. 49", RegionCode.DE),
new NumberTest("01 (33) 3461 2234", RegionCode.MX), // Optional NP present
new NumberTest("(33) 3461 2234", RegionCode.MX), // Optional NP omitted
new NumberTest("1800-10-10 22", RegionCode.AU), // Breakdown assistance number.
// Doesn't match any formatting in the test file, but matches an alternate format exactly.
new NumberTest("0900-1 123 123", RegionCode.DE),
new NumberTest("(0)900-1 123 123", RegionCode.DE),
new NumberTest("0 900-1 123 123", RegionCode.DE),
};
public void testMatchesWithPossibleLeniency() throws Exception {
@ -863,7 +878,7 @@ public class PhoneNumberMatcherTest extends TestMetadataTestCase {
contextPairs.add(new NumberContext("It's cheap! Call ", " before 6:30"));
// With a second number later.
contextPairs.add(new NumberContext("Call ", " or +1800-123-4567!"));
contextPairs.add(new NumberContext("Call me on June 21 at", "")); // with a Month-Day date
contextPairs.add(new NumberContext("Call me on June 2 at", "")); // with a Month-Day date
// With publication pages.
contextPairs.add(new NumberContext(
"As quoted by Alfonso 12-15 (2009), you may call me at ", ""));


+ 45
- 4
java/libphonenumber/test/com/google/i18n/phonenumbers/PhoneNumberUtilTest.java View File

@ -1256,16 +1256,20 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
}
public void testIsViablePhoneNumber() {
assertFalse(PhoneNumberUtil.isViablePhoneNumber("1"));
// Only one or two digits before strange non-possible punctuation.
assertFalse(PhoneNumberUtil.isViablePhoneNumber("12. March"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("1+1+1"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("80+0"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("00"));
// Three digits is viable.
// Two digits is viable.
assertTrue(PhoneNumberUtil.isViablePhoneNumber("00"));
assertTrue(PhoneNumberUtil.isViablePhoneNumber("111"));
// Alpha numbers.
assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-pizza"));
assertTrue(PhoneNumberUtil.isViablePhoneNumber("0800-4-PIZZA"));
// We need at least three digits before any alpha characters.
assertFalse(PhoneNumberUtil.isViablePhoneNumber("08-PIZZA"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("8-PIZZA"));
assertFalse(PhoneNumberUtil.isViablePhoneNumber("12. March"));
}
public void testIsViablePhoneNumberNonAscii() {
@ -1600,6 +1604,10 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
// Test star numbers. Although this is not strictly valid, we would like to make sure we can
// parse the output we produce when formatting the number.
assertEquals(JP_STAR_NUMBER, phoneUtil.parse("+81 *2345", RegionCode.JP));
PhoneNumber shortNumber = new PhoneNumber();
shortNumber.setCountryCode(64).setNationalNumber(12L);
assertEquals(shortNumber, phoneUtil.parse("12", RegionCode.NZ));
}
public void testParseNumberWithAlphaCharacters() throws Exception {
@ -1765,6 +1773,36 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
NumberParseException.ErrorType.NOT_A_NUMBER,
e.getErrorType());
}
try {
String sentencePhoneNumber = "1 Still not a number";
phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ);
fail("This should not parse without throwing an exception " + sentencePhoneNumber);
} catch (NumberParseException e) {
// Expected this exception.
assertEquals("Wrong error type stored in exception.",
NumberParseException.ErrorType.NOT_A_NUMBER,
e.getErrorType());
}
try {
String sentencePhoneNumber = "1 MICROSOFT";
phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ);
fail("This should not parse without throwing an exception " + sentencePhoneNumber);
} catch (NumberParseException e) {
// Expected this exception.
assertEquals("Wrong error type stored in exception.",
NumberParseException.ErrorType.NOT_A_NUMBER,
e.getErrorType());
}
try {
String sentencePhoneNumber = "12 MICROSOFT";
phoneUtil.parse(sentencePhoneNumber, RegionCode.NZ);
fail("This should not parse without throwing an exception " + sentencePhoneNumber);
} catch (NumberParseException e) {
// Expected this exception.
assertEquals("Wrong error type stored in exception.",
NumberParseException.ErrorType.NOT_A_NUMBER,
e.getErrorType());
}
try {
String tooLongPhoneNumber = "01495 72553301873 810104";
phoneUtil.parse(tooLongPhoneNumber, RegionCode.GB);
@ -2209,7 +2247,7 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
// Invalid numbers that can't be parsed.
assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER,
phoneUtil.isNumberMatch("43", "3 331 6043"));
phoneUtil.isNumberMatch("4", "3 331 6043"));
assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER,
phoneUtil.isNumberMatch("+43", "+64 3 331 6005"));
assertEquals(PhoneNumberUtil.MatchType.NOT_A_NUMBER,
@ -2321,7 +2359,10 @@ public class PhoneNumberUtilTest extends TestMetadataTestCase {
assertTrue(phoneUtil.isAlphaNumber("1800 six-flags"));
assertTrue(phoneUtil.isAlphaNumber("1800 six-flags ext. 1234"));
assertTrue(phoneUtil.isAlphaNumber("+800 six-flags"));
assertTrue(phoneUtil.isAlphaNumber("180 six-flags"));
assertFalse(phoneUtil.isAlphaNumber("1800 123-1234"));
assertFalse(phoneUtil.isAlphaNumber("1 six-flags"));
assertFalse(phoneUtil.isAlphaNumber("18 six-flags"));
assertFalse(phoneUtil.isAlphaNumber("1800 123-1234 extension: 1234"));
assertFalse(phoneUtil.isAlphaNumber("+800 1234-1234"));
}


+ 45
- 44
java/release_notes.txt View File

@ -1,23 +1,30 @@
July 12th, 2012: libphonenumber-5.0
* Code changes:
- Support for alternate formats when finding phone numbers.
- Allowing two-digit numbers to be parsed if they are entered in national-format with no
punctuation
* Metadata changes
- IL, GA
* Other
- Reflowed this file to 100 char width to make it unambiguous as to what the standard should be.
July 6th, 2012: libphonenumber-4.9.1
* Metadata changes
- AR, BA, BF, CR, DE, EC, ES, KZ, MK, NC, NG, PF, SB, UZ, non-geo entity 882
- Geocoding data updates for country calling codes 54 (AR) and 81 (JP), new
data for 234 (NG)
- Geocoding data updates for country calling codes 54 (AR) and 81 (JP), new data for 234 (NG)
June 21st, 2012: libphonenumber-4.9
* Bug fix
- formatInOriginalFormat fixed not to add a star sign if it was not present
originally.
- formatInOriginalFormat fixed not to add a star sign if it was not present originally.
* Metadata changes
- BF, CZ, ES, KW
- Non-geographical entities with calling code 882 (BebbiCell, Maritime
Communications, Oration Technologies, Telespazio and Thuraya)
- Non-geographical entities with calling code 882 (BebbiCell, Maritime Communications, Oration
Technologies, Telespazio and Thuraya)
- Geocoding data updates for country calling codes 221, 224, 226, 242, 244, 245
* Functionality changes:
- Minimum allowed length for a national significant number (NSN) when parsing
changed from 3 to 2.
- Support parsing of RFC3966-formatted strings with an isdn-subaddress or extra
parameters specified.
- Minimum allowed length for a national significant number (NSN) when parsing changed from 3 to 2.
- Support parsing of RFC3966-formatted strings with an isdn-subaddress or extra parameters
specified.
- Allow soft hyphen to appear in phone numbers (\u00AD)
* Testing changes:
- Add extra unit tests for non-geographical phone number entities
@ -123,33 +130,31 @@ November 24th, 2011: libphonenumber-4.3
November 10th, 2011: libphonenumber-4.2
* Code changes
- Providing an "exact match" isEmergencyNumber method
- Improvement to PhoneNumberMatcher: requires national prefix to be present
when matching national-format numbers, unless matching for a region where it
is explicitly marked in the metadata that they may be omitted. Applies to
leniency level VALID and higher.
- Improvement to PhoneNumberMatcher: requires national prefix to be present when matching
national-format numbers, unless matching for a region where it is explicitly marked in the
metadata that they may be omitted. Applies to leniency level VALID and higher.
- Change formatNumberForMobileDialing not to modify the phoneNumber passed in.
* Metadata changes
- Emergency numbers added for all remaining countries
- Collected data on which numbers we format with a national prefix are
commonly written without one and added this
- Collected data on which numbers we format with a national prefix are commonly written without one
and added this
- Updates for AR, AT, BH, CZ, GR, IR, KM, LT, MX, PT, SE, SO, UG
- Addition of SX (Sint Maarten)
October 19th, 2011: libphonenumber-4.1
* Code changes
- Update code and metadata for countries with IDD "8~10" to accept phone
numbers where the "~" is omitted.
- Modify formatInOriginalFormat to use raw_input (when present) when the number
is considered as invalid by the library.
- Add ShortNumberUtil to deal with international short phone numbers, such as
short codes and emergency numbers.
- Update code and metadata for countries with IDD "8~10" to accept phone numbers where the "~" is
omitted.
- Modify formatInOriginalFormat to use raw_input (when present) when the number is considered as
invalid by the library.
- Add ShortNumberUtil to deal with international short phone numbers, such as short codes and
emergency numbers.
- Increase the maximum phone-number length accepted when parsing (now set to
16).
* Metadata changes
- Updates: BF, BN, CN, DE, DK, DO, FR, IN, KI, KW, MC, MD, ML, PA, QA, SB, UK,
WS
- Updates: BF, BN, CN, DE, DK, DO, FR, IN, KI, KW, MC, MD, ML, PA, QA, SB, UK, WS
- Emergency number information also added to: AE, AF, AL, AM, AR, AT, AU, BA,
BB, BD, BE, BG, BH, BO, BR, BY, CA, CH, CL, CN, CO, CR, CY, CZ, DE, DJ, DK,
DO, DZ, EC, EE, EG, ES, FI, FJ, FO, FR, GB, GE, GF, GH, GI, GL, GR, GT, GY,
@ -161,10 +166,9 @@ October 19th, 2011: libphonenumber-4.1
October 6th, 2011: libphonenumber-4.0
* Code changes
- New function formatNumberForMobileDialing, which attempts to format a number in
such a way that the call can be connected from a mobile phone. If this is
impossible, for example for numbers that cannot be internationally dialled,
then an empty string is returned.
- New function formatNumberForMobileDialing, which attempts to format a number in such a way that
the call can be connected from a mobile phone. If this is impossible, for example for numbers
that cannot be internationally dialled, then an empty string is returned.
- Fallback functionality to English for non-CJK languages for geocoding
* Metadata changes
@ -178,8 +182,7 @@ September 13th, 2011: libphonenumber-3.9
- Enable AsYouTypeFormatter to handle long IDD and NDD.
- Allow the presence of an IDD following a +.
- Fix formatting of phone numbers which contain only 0s in the national number.
- Refactored some code in geocoding including AreaCodeMap and the storage
strategies.
- Refactored some code in geocoding including AreaCodeMap and the storage strategies.
* Metadata changes
- Updates: AM, BE, BH, BJ, BR, BT, BZ, CI, CL, CN, DE, DK, DM, DZ, EC, EG, FJ,
@ -194,16 +197,14 @@ August 11th, 2011: libphonenumber-3.8
* Code changes
- Fix to demo to not throw null-ptr exceptions for invalid NANPA numbers
- Fixed AYTF to not accept plus signs in the middle of input
- PhoneNumberMatcher improvements - added STRICT_GROUPING and EXACT_GROUPING
levels, numbers followed/preceded by a currency symbol will not match,
multiple numbers separated by phone-number punctuation will now match. ", "
is no longer accepted as an extension symbol when matching, only when
parsing. "x" is only accepted as a carrier code or extension marker, not
otherwise.
- Changes to handling of leading zeroes - these will not be silently ignored
anymore, but will be stored as part of the number.
- PhoneNumberOfflineGeocoder - new method to get the description of a number that assumes
the validity of the number has already been checked and will not re-verify it.
- PhoneNumberMatcher improvements - added STRICT_GROUPING and EXACT_GROUPING levels, numbers
followed/preceded by a currency symbol will not match, multiple numbers separated by phone-number
punctuation will now match. ", " is no longer accepted as an extension symbol when matching, only
when parsing. "x" is only accepted as a carrier code or extension marker, not otherwise.
- Changes to handling of leading zeroes - these will not be silently ignored anymore, but will be
stored as part of the number.
- PhoneNumberOfflineGeocoder - new method to get the description of a number that assumes the
validity of the number has already been checked and will not re-verify it.
- Split geocoding US binary data into multiple files.
* Metadata changes
@ -461,16 +462,16 @@ August 16th, 2010
August 4th, 2010
* Further improve startup performance
- Preload no country specific metadata at startup.
- Stop creating the file containing mapping from country calling code to region code
and loading it at startup; instead, do the initialization in PhoneNumberUtil.
- Stop creating the file containing mapping from country calling code to region code and loading
it at startup; instead, do the initialization in PhoneNumberUtil.
July 31th, 2010
* Improve startup performance
- Separate generated metadata binary file to one file per region
- Preload US at start up, and load other region at the time when needed
- Create a file containing mapping from country calling code to region code,
and load it at startup
- Create a file containing mapping from country calling code to region code, and load it at
startup
- Same change also applied to unittests
July 30th, 2010


Loading…
Cancel
Save