From b868df3ea94d2200a5ac098c3a8b463e7d947aef Mon Sep 17 00:00:00 2001 From: David Humphrey Date: Tue, 30 Jan 2018 14:29:15 -0500 Subject: [PATCH] Expand Leniency to include all verification functions --- .../i18n/phonenumbers/phonenumbermatcher.js | 98 ++++++++++++++----- .../i18n/phonenumbers/phonenumberutil.js | 96 +++++++++++++++++- 2 files changed, 167 insertions(+), 27 deletions(-) diff --git a/javascript/i18n/phonenumbers/phonenumbermatcher.js b/javascript/i18n/phonenumbers/phonenumbermatcher.js index dd6549f09..8b1bd6f1d 100644 --- a/javascript/i18n/phonenumbers/phonenumbermatcher.js +++ b/javascript/i18n/phonenumbers/phonenumbermatcher.js @@ -265,15 +265,7 @@ i18n.phonenumbers.PhoneNumberMatcher = function(util, text, country, leniency, m */ this.preferredRegion = country; /** The degree of validation requested. NOTE: Java `findNumbers` always uses VALID, so we hard code that here */ - this.leniency = { - verify: function(number, candidate, util) { - if (!util.isValidNumber(number) - || !i18n.phonenumbers.PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) { - return false; - } - return i18n.phonenumbers.PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util); - } - }; + this.leniency = leniency; /** The maximum number of retries after matching an invalid number. */ this.maxTries = maxTries; @@ -508,25 +500,27 @@ i18n.phonenumbers.PhoneNumberMatcher.prototype.parseAndVerify = function(candida return null; } - // If leniency is set to VALID (always is in Java code) or stricter, we also want to skip numbers that are surrounded + // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. // If the candidate is not at the start of the text, and does not start with phone-number // punctuation, check the previous character. - if (offset > 0) { - var leadClassMatches = (new RegExp("^" + LEAD_CLASS)).exec(candidate); - if(leadClassMatches && leadClassMatches.index !== 0) { - var previousChar = this.text.charAt(offset - 1); - // We return null if it is a latin letter or an invalid punctuation symbol. - if (isInvalidPunctuationSymbol(previousChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(previousChar)) { - return null; + if(this.leniency >= i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID) { + if (offset > 0) { + var leadClassMatches = (new RegExp("^" + LEAD_CLASS)).exec(candidate); + if(leadClassMatches && leadClassMatches.index !== 0) { + var previousChar = this.text.charAt(offset - 1); + // We return null if it is a latin letter or an invalid punctuation symbol. + if (isInvalidPunctuationSymbol(previousChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(previousChar)) { + return null; + } } } - } - var lastCharIndex = offset + candidate.length; - if (lastCharIndex < this.text.length) { - var nextChar = this.text.charAt(lastCharIndex); - if (isInvalidPunctuationSymbol(nextChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(nextChar)) { - return null; + var lastCharIndex = offset + candidate.length; + if (lastCharIndex < this.text.length) { + var nextChar = this.text.charAt(lastCharIndex); + if (isInvalidPunctuationSymbol(nextChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(nextChar)) { + return null; + } } } @@ -551,7 +545,8 @@ i18n.phonenumbers.PhoneNumberMatcher.prototype.parseAndVerify = function(candida return null; } - if (this.leniency.verify(number, candidate, phoneUtil)) { + var leniencyVerifyFn = i18n.phonenumbers.PhoneNumberUtil.Leniency.verifyFns[this.leniency]; + if (leniencyVerifyFn(number, candidate, phoneUtil)) { // We used parseAndKeepRawInput to create this number, but for now we don't return the extra // values parsed. TODO: stop clearing all values here and switch all users over // to using rawInput() rather than the rawString() of PhoneNumberMatch. @@ -609,3 +604,58 @@ i18n.phonenumbers.PhoneNumberMatcher.isNationalPrefixPresentIfRequired = functio } return true; }; + +i18n.phonenumbers.PhoneNumberMatcher.checkNumberGroupingIsValid = function( + number, candidate, util, checker) { + // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions) + // and optimise if necessary. + var normalizedCandidate = + PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); + var formattedNumberGroups = getNationalNumberGroups(util, number, null); + if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { + return true; + } + // If this didn't pass, see if there are any alternate formats, and try them instead. + var alternateFormats = + MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); + if (alternateFormats != null) { + var formats = alternateFormats.numberFormats(); + var alternateFormat; + for (var i = 0; i < formats.length; i++) { + alternateFormat = formats[i]; + formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); + if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { + return true; + } + } + } + return false; +} + +/** + * Helper method to get the national-number part of a number, formatted without any national + * prefix, and return it as a set of digit blocks that would be formatted together. + */ +function getNationalNumberGroups(util, number, formattingPattern) { + if (formattingPattern == null) { + // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits. + var rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); + // We remove the extension part from the formatted string before splitting it into different + // groups. + var endIndex = rfc3966Format.indexOf(';'); + if (endIndex < 0) { + endIndex = rfc3966Format.length; + } + // The country-code will have a '-' following it. + var startIndex = rfc3966Format.indexOf('-') + 1; + return rfc3966Format.substring(startIndex, endIndex).split("-"); + } else { + // We format the NSN only, and split that according to the separator. + var nationalSignificantNumber = util.getNationalSignificantNumber(number); + return util.formatNsnUsingPattern( + nationalSignificantNumber, + formattingPattern, + PhoneNumberFormat.RFC3966 + ).split("-"); + } +} \ No newline at end of file diff --git a/javascript/i18n/phonenumbers/phonenumberutil.js b/javascript/i18n/phonenumbers/phonenumberutil.js index a9b35c369..8ff42ec92 100644 --- a/javascript/i18n/phonenumbers/phonenumberutil.js +++ b/javascript/i18n/phonenumbers/phonenumberutil.js @@ -1002,6 +1002,98 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = { TOO_LONG: 3 }; +/** + * Leniency when {@linkplain PhoneNumberUtil#findNumbers finding} potential phone numbers in text + * segments. The levels here are ordered in increasing strictness. + */ + i18n.phonenumbers.PhoneNumberUtil.Leniency = { + /** + * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber) + * possible}, but not necessarily {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. + */ + POSSIBLE: 0, + /** + * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber) + * possible} and {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. Numbers written + * in national format must have their national-prefix present if it is usually written for a + * number of this type. + */ + VALID: 1, + /** + * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and + * are grouped in a possible way for this locale. For example, a US number written as + * "65 02 53 00 00" and "650253 0000" are not accepted at this leniency level, whereas + * "650 253 0000", "650 2530000" or "6502530000" are. + * Numbers with more than one '/' symbol in the national significant number are also dropped at + * this level. + *

+ * Warning: This level might result in lower coverage especially for regions outside of country + * code "+1". If you are not sure about which level to use, email the discussion group + * libphonenumber-discuss@googlegroups.com. + */ + STRICT_GROUPING: 2, + /** + * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and + * are grouped in the same way that we would have formatted it, or as a single block. For + * example, a US number written as "650 2530000" is not accepted at this leniency level, whereas + * "650 253 0000" or "6502530000" are. + * Numbers with more than one '/' symbol are also dropped at this level. + *

+ * Warning: This level might result in lower coverage especially for regions outside of country + * code "+1". If you are not sure about which level to use, email the discussion group + * libphonenumber-discuss@googlegroups.com. + */ + EXACT_GROUPING: 3, + + // Verification functions for each of the above. + verifyFns: [ + // POSSIBLE + function(number, candidate, util) { + return util.isPossibleNumber(number); + }, + // VALID + function(number, candidate, util) { + if (!util.isValidNumber(number) + || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) { + return false; + } + return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util); + }, + // STRICT_GROUPING + function(number, candidate, util) { + if (!util.isValidNumber(number) + || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util) + || PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate) + || !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) { + return false; + } + return PhoneNumberMatcher.checkNumberGroupingIsValid( + number, candidate, util, { + checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) { + return PhoneNumberMatcher.allNumberGroupsRemainGrouped( + util, number, normalizedCandidate, expectedNumberGroups); + } + }); + }, + // EXACT_GROUPING + function(number, candidate, util) { + if (!util.isValidNumber(number) + || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util) + || PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate) + || !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) { + return false; + } + return PhoneNumberMatcher.checkNumberGroupingIsValid( + number, candidate, util, { + checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) { + return PhoneNumberMatcher.allNumberGroupsAreExactlyPresent( + util, number, normalizedCandidate, expectedNumberGroups); + } + } + ); + } + ] +}; /** * Attempts to extract a possible number from the string passed in. This @@ -4568,9 +4660,7 @@ i18n.phonenumbers.PhoneNumberUtil.prototype.findNumbers = function(text, default } var maxTries = 9223372036854775807; // Long.MAX_VALUE is 9,223,372,036,854,775,807 - var leniency = function(){}; - - return new PhoneNumberMatcher(this, text, defaultRegion, /*Leniency.VALID*/ leniency, maxTries); + return new PhoneNumberMatcher(this, text, defaultRegion, i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID, maxTries); }; /**