Expand Leniency to include all verification functions

8 years ago · b868df3ea9
--- a/javascript/i18n/phonenumbers/phonenumbermatcher.js
+++ b/javascript/i18n/phonenumbers/phonenumbermatcher.js
@ -265,15 +265,7 @@ i18n.phonenumbers.PhoneNumberMatcher = function(util, text, country, leniency, m
     */
    this.preferredRegion = country;
    /** The degree of validation requested. NOTE: Java `findNumbers` always uses VALID, so we hard code that here */
    this.leniency = {
        verify: function(number, candidate, util) {
            if (!util.isValidNumber(number)
                || !i18n.phonenumbers.PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
              return false;
            }
            return i18n.phonenumbers.PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
        }
    };
    this.leniency = leniency;

    /** The maximum number of retries after matching an invalid number. */
    this.maxTries = maxTries;
@ -508,25 +500,27 @@ i18n.phonenumbers.PhoneNumberMatcher.prototype.parseAndVerify = function(candida
            return null;
        }

        // If leniency is set to VALID (always is in Java code) or stricter, we also want to skip numbers that are surrounded
        // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
        // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
        // If the candidate is not at the start of the text, and does not start with phone-number
        // punctuation, check the previous character.
        if (offset > 0) {
            var leadClassMatches = (new RegExp("^" + LEAD_CLASS)).exec(candidate);
            if(leadClassMatches && leadClassMatches.index !== 0) {
                var previousChar = this.text.charAt(offset - 1);
                // We return null if it is a latin letter or an invalid punctuation symbol.
                if (isInvalidPunctuationSymbol(previousChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(previousChar)) {
                    return null;
        if(this.leniency >= i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID) {
            if (offset > 0) {
                var leadClassMatches = (new RegExp("^" + LEAD_CLASS)).exec(candidate);
                if(leadClassMatches && leadClassMatches.index !== 0) {
                    var previousChar = this.text.charAt(offset - 1);
                    // We return null if it is a latin letter or an invalid punctuation symbol.
                    if (isInvalidPunctuationSymbol(previousChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(previousChar)) {
                        return null;
                    }
                }
            }
        }
        var lastCharIndex = offset + candidate.length;
        if (lastCharIndex < this.text.length) {
            var nextChar = this.text.charAt(lastCharIndex);
            if (isInvalidPunctuationSymbol(nextChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(nextChar)) {
                return null;
            var lastCharIndex = offset + candidate.length;
            if (lastCharIndex < this.text.length) {
                var nextChar = this.text.charAt(lastCharIndex);
                if (isInvalidPunctuationSymbol(nextChar) || i18n.phonenumbers.PhoneNumberMatcher.isLatinLetter(nextChar)) {
                    return null;
                }
            }
        }

@ -551,7 +545,8 @@ i18n.phonenumbers.PhoneNumberMatcher.prototype.parseAndVerify = function(candida
            return null;
        }

        if (this.leniency.verify(number, candidate, phoneUtil)) {
        var leniencyVerifyFn = i18n.phonenumbers.PhoneNumberUtil.Leniency.verifyFns[this.leniency];
        if (leniencyVerifyFn(number, candidate, phoneUtil)) {
            // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
            // values parsed. TODO: stop clearing all values here and switch all users over
            // to using rawInput() rather than the rawString() of PhoneNumberMatch.
@ -609,3 +604,58 @@ i18n.phonenumbers.PhoneNumberMatcher.isNationalPrefixPresentIfRequired = functio
    }
    return true;
 };

 i18n.phonenumbers.PhoneNumberMatcher.checkNumberGroupingIsValid = function(
    number, candidate, util, checker) {
  // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
  // and optimise if necessary.
  var normalizedCandidate =
      PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
  var formattedNumberGroups = getNationalNumberGroups(util, number, null);
  if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
    return true;
  }
  // If this didn't pass, see if there are any alternate formats, and try them instead.
  var alternateFormats =
      MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
  if (alternateFormats != null) {
      var formats = alternateFormats.numberFormats();
      var alternateFormat;
    for (var i = 0; i < formats.length; i++) {
        alternateFormat = formats[i];
        formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
        if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
            return true;
        }
    }
  }
  return false;
 }

 /**
 * Helper method to get the national-number part of a number, formatted without any national
 * prefix, and return it as a set of digit blocks that would be formatted together.
 */
 function getNationalNumberGroups(util, number, formattingPattern) {
    if (formattingPattern == null) {
        // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
        var rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
        // We remove the extension part from the formatted string before splitting it into different
        // groups.
        var endIndex = rfc3966Format.indexOf(';');
        if (endIndex < 0) {
            endIndex = rfc3966Format.length;
        }
        // The country-code will have a '-' following it.
        var startIndex = rfc3966Format.indexOf('-') + 1;
        return rfc3966Format.substring(startIndex, endIndex).split("-");
    } else {
        // We format the NSN only, and split that according to the separator.
        var nationalSignificantNumber = util.getNationalSignificantNumber(number);
        return util.formatNsnUsingPattern(
            nationalSignificantNumber,
            formattingPattern,
            PhoneNumberFormat.RFC3966
        ).split("-");
    }
 }
--- a/javascript/i18n/phonenumbers/phonenumberutil.js
+++ b/javascript/i18n/phonenumbers/phonenumberutil.js
@ -1002,6 +1002,98 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
  TOO_LONG: 3
 };

 /**
 * Leniency when {@linkplain PhoneNumberUtil#findNumbers finding} potential phone numbers in text
 * segments. The levels here are ordered in increasing strictness.
 */
 i18n.phonenumbers.PhoneNumberUtil.Leniency = {
  /**
   * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
   * possible}, but not necessarily {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}.
   */
  POSSIBLE: 0,
  /**
   * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
   * possible} and {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. Numbers written
   * in national format must have their national-prefix present if it is usually written for a
   * number of this type.
   */
  VALID: 1,
  /**
   * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
   * are grouped in a possible way for this locale. For example, a US number written as
   * "65 02 53 00 00" and "650253 0000" are not accepted at this leniency level, whereas
   * "650 253 0000", "650 2530000" or "6502530000" are.
   * Numbers with more than one '/' symbol in the national significant number are also dropped at
   * this level.
   * <p>
   * Warning: This level might result in lower coverage especially for regions outside of country
   * code "+1". If you are not sure about which level to use, email the discussion group
   * libphonenumber-discuss@googlegroups.com.
   */
  STRICT_GROUPING: 2,
  /**
   * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
   * are grouped in the same way that we would have formatted it, or as a single block. For
   * example, a US number written as "650 2530000" is not accepted at this leniency level, whereas
   * "650 253 0000" or "6502530000" are.
   * Numbers with more than one '/' symbol are also dropped at this level.
   * <p>
   * Warning: This level might result in lower coverage especially for regions outside of country
   * code "+1". If you are not sure about which level to use, email the discussion group
   * libphonenumber-discuss@googlegroups.com.
   */
  EXACT_GROUPING: 3,

  // Verification functions for each of the above.
  verifyFns: [
    // POSSIBLE
    function(number, candidate, util) {
      return util.isPossibleNumber(number);
    },
    // VALID
    function(number, candidate, util) {
      if (!util.isValidNumber(number)
          || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
        return false;
      }
      return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
    },
    // STRICT_GROUPING
    function(number, candidate, util) {
      if (!util.isValidNumber(number)
          || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
          || PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
          || !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) {
        return false;
      }
      return PhoneNumberMatcher.checkNumberGroupingIsValid(
          number, candidate, util, {
            checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
              return PhoneNumberMatcher.allNumberGroupsRemainGrouped(
                  util, number, normalizedCandidate, expectedNumberGroups);
            }
          });
    },
    // EXACT_GROUPING
    function(number, candidate, util) {
      if (!util.isValidNumber(number)
          || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
          || PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
          || !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) {
        return false;
      }
      return PhoneNumberMatcher.checkNumberGroupingIsValid(
          number, candidate, util, {
            checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
              return PhoneNumberMatcher.allNumberGroupsAreExactlyPresent(
                  util, number, normalizedCandidate, expectedNumberGroups);
            }
          }
      );
    }
  ]
 };

 /**
 * Attempts to extract a possible number from the string passed in. This
@ -4568,9 +4660,7 @@ i18n.phonenumbers.PhoneNumberUtil.prototype.findNumbers = function(text, default
  }

  var maxTries = 9223372036854775807; // Long.MAX_VALUE is 9,223,372,036,854,775,807
  var leniency = function(){};

  return new PhoneNumberMatcher(this, text, defaultRegion, /*Leniency.VALID*/ leniency, maxTries);
  return new PhoneNumberMatcher(this, text, defaultRegion, i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID, maxTries);
 };

 /**