Browse Source

Rework Leniency

pull/2107/head
David Humphrey 8 years ago
parent
commit
a2518dd68a
3 changed files with 66 additions and 50 deletions
  1. +16
    -0
      javascript/i18n/phonenumbers/phonenumbermatch_test.js
  2. +8
    -9
      javascript/i18n/phonenumbers/phonenumbermatcher.js
  3. +42
    -41
      javascript/i18n/phonenumbers/phonenumberutil.js

+ 16
- 0
javascript/i18n/phonenumbers/phonenumbermatch_test.js View File

@ -1,3 +1,19 @@
/*
* Copyright (C) 2011 The Libphonenumber Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
goog.require('goog.testing.jsunit'); goog.require('goog.testing.jsunit');
goog.require('i18n.phonenumbers.PhoneNumber'); goog.require('i18n.phonenumbers.PhoneNumber');
goog.require('i18n.phonenumbers.PhoneNumberMatch'); goog.require('i18n.phonenumbers.PhoneNumberMatch');


+ 8
- 9
javascript/i18n/phonenumbers/phonenumbermatcher.js View File

@ -104,7 +104,6 @@ var IS_LATIN = /[\u0000-~\u0080-þĀ-žƀ-Ɏ\u0300-\u036eḀ-Ỿ]/;
* Note that if there is a match, we will always check any text found up to the first match as * Note that if there is a match, we will always check any text found up to the first match as
* well. * well.
*/ */
// XXX: need to confirm that adding `g` flag is correct here, appears to be necessary
var INNER_MATCHES = [ var INNER_MATCHES = [
// Breaks on the slash - e.g. "651-234-2345/332-445-1234" // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
'\\/+(.*)', '\\/+(.*)',
@ -113,18 +112,18 @@ var INNER_MATCHES = [
'(\\([^(]*)', '(\\([^(]*)',
// Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
// We require a space on either side of the hyphen for it to be considered a separator. // We require a space on either side of the hyphen for it to be considered a separator.
// orginal was --> /(?:\p{Z}-|-\p{Z})\p{Z}*(.+)/,
// Java uses /(?:\p{Z}-|-\p{Z})\p{Z}*(.+)/, and this regex is es5 compatible
'(?:[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]\\-|\\-[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000])[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)', '(?:[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]\\-|\\-[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000])[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Various types of wide hyphens. Note we have decided not to enforce a space here, since it's // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
// possible that it's supposed to be used to break two numbers without spaces, and we haven't // possible that it's supposed to be used to break two numbers without spaces, and we haven't
// seen many instances of it used within a number. // seen many instances of it used within a number.
// original was --> /[\u2012-\u2015\uFF0D]\p{Z}*(.+)/,
// Java uses /[\u2012-\u2015\uFF0D]\p{Z}*(.+)/, and this regex is es5 compatible
'[\\u2012-\\u2015\\uFF0D][ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)', '[\\u2012-\\u2015\\uFF0D][ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
// original was --> /\.+\p{Z}*([^.]+)/,
// Java uses /\.+\p{Z}*([^.]+)/, and this regex is es5 compatible
'\\.+[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\-\\/-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)', '\\.+[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\-\\/-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Breaks on space - e.g. "3324451234 8002341234" // Breaks on space - e.g. "3324451234 8002341234"
// original was --> /\p{Z}+(\P{Z}+)/
// Java uses /\p{Z}+(\P{Z}+)/ and this regex is es5 compatible
'[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]+((?:[\\0-\\x1F!-\\x9F\\xA1-\\u167F\\u1681-\\u1FFF\\u200B-\\u2027\\u202A-\\u202E\\u2030-\\u205E\\u2060-\\u2FFF\\u3001-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)' '[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]+((?:[\\0-\\x1F!-\\x9F\\xA1-\\u167F\\u1681-\\u1FFF\\u200B-\\u2027\\u202A-\\u202E\\u2030-\\u205E\\u2060-\\u2FFF\\u3001-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)'
]; ];
@ -331,6 +330,8 @@ PhoneNumberMatcher.prototype.find = function(index) {
return null; return null;
}; };
// XXX: do I care about doing iterator() to wrap these? And/or
// should this have some more JS-like interface?
PhoneNumberMatcher.prototype.hasNext = function() { PhoneNumberMatcher.prototype.hasNext = function() {
if (this.state == State.NOT_READY) { if (this.state == State.NOT_READY) {
this.lastMatch = this.find(this.searchIndex); this.lastMatch = this.find(this.searchIndex);
@ -517,7 +518,7 @@ PhoneNumberMatcher.prototype.parseAndVerify = function(candidate, offset) {
// by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
// If the candidate is not at the start of the text, and does not start with phone-number // If the candidate is not at the start of the text, and does not start with phone-number
// punctuation, check the previous character. // punctuation, check the previous character.
if(this.leniency >= PhoneNumberUtil.Leniency.VALID) {
if(this.leniency.value >= PhoneNumberUtil.Leniency.VALID.value) {
if (offset > 0) { if (offset > 0) {
var leadClassRe = new RegExp("^" + LEAD_CLASS); var leadClassRe = new RegExp("^" + LEAD_CLASS);
var leadClassMatches = leadClassRe.exec(candidate); var leadClassMatches = leadClassRe.exec(candidate);
@ -564,9 +565,7 @@ PhoneNumberMatcher.prototype.parseAndVerify = function(candidate, offset) {
return null; return null;
} }
// XXX: simplify this
var leniencyVerifyFn = PhoneNumberUtil.Leniency.verifyFns[this.leniency];
if (leniencyVerifyFn(number, candidate, this.phoneUtil)) {
if (this.leniency.verify(number, candidate, this.phoneUtil)) {
// We used parseAndKeepRawInput to create this number, but for now we don't return the extra // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
// values parsed. TODO: stop clearing all values here and switch all users over // values parsed. TODO: stop clearing all values here and switch all users over
// to using rawInput() rather than the rawString() of PhoneNumberMatch. // to using rawInput() rather than the rawString() of PhoneNumberMatch.


+ 42
- 41
javascript/i18n/phonenumbers/phonenumberutil.js View File

@ -1011,14 +1011,28 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber) * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
* possible}, but not necessarily {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. * possible}, but not necessarily {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}.
*/ */
POSSIBLE: 0,
POSSIBLE: {
value: 0,
verify: function(number, candidate, util) {
return util.isPossibleNumber(number);
}
},
/** /**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber) * Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
* possible} and {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. Numbers written * possible} and {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. Numbers written
* in national format must have their national-prefix present if it is usually written for a * in national format must have their national-prefix present if it is usually written for a
* number of this type. * number of this type.
*/ */
VALID: 1,
VALID: {
value: 1,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
return false;
}
return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
}
},
/** /**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
* are grouped in a possible way for this locale. For example, a US number written as * are grouped in a possible way for this locale. For example, a US number written as
@ -1031,7 +1045,26 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* code "+1". If you are not sure about which level to use, email the discussion group * code "+1". If you are not sure about which level to use, email the discussion group
* libphonenumber-discuss@googlegroups.com. * libphonenumber-discuss@googlegroups.com.
*/ */
STRICT_GROUPING: 2,
STRICT_GROUPING: {
value: 2,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
|| !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util))
{
return false;
}
return PhoneNumberMatcher.checkNumberGroupingIsValid(
number, candidate, util, {
checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
return PhoneNumberMatcher.allNumberGroupsRemainGrouped(
util, number, normalizedCandidate, expectedNumberGroups);
}
}
);
}
},
/** /**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and * Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
* are grouped in the same way that we would have formatted it, or as a single block. For * are grouped in the same way that we would have formatted it, or as a single block. For
@ -1043,41 +1076,9 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* code "+1". If you are not sure about which level to use, email the discussion group * code "+1". If you are not sure about which level to use, email the discussion group
* libphonenumber-discuss@googlegroups.com. * libphonenumber-discuss@googlegroups.com.
*/ */
EXACT_GROUPING: 3,
// Verification functions for each of the above.
// XXX: this feels overly "clever", and probably I should refactor. Tried to follow Java's pattern here.
verifyFns: [
// POSSIBLE
function(number, candidate, util) {
return util.isPossibleNumber(number);
},
// VALID
function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
return false;
}
return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
},
// STRICT_GROUPING
function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
|| !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) {
return false;
}
return PhoneNumberMatcher.checkNumberGroupingIsValid(
number, candidate, util, {
checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
return PhoneNumberMatcher.allNumberGroupsRemainGrouped(
util, number, normalizedCandidate, expectedNumberGroups);
}
});
},
// EXACT_GROUPING
function(number, candidate, util) {
EXACT_GROUPING: {
value: 3,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number) if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util) || !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate) || PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
@ -1093,7 +1094,7 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
} }
); );
} }
]
}
}; };
/** /**
@ -4665,8 +4666,8 @@ i18n.phonenumbers.PhoneNumberUtil.prototype.findNumbers = function(text, default
} }
leniency = leniency || i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID; leniency = leniency || i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID;
maxTries = maxTries || 9223372036854775807; // Long.MAX_VALUE is 9,223,372,036,854,775,807
return new PhoneNumberMatcher(this, text, defaultRegion, i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID, maxTries);
maxTries = maxTries || 9223372036854775807; // Java Long.MAX_VALUE = 9,223,372,036,854,775,807
return new PhoneNumberMatcher(this, text, defaultRegion, PhoneNumberUtil.Leniency.VALID, maxTries);
}; };
/** /**


Loading…
Cancel
Save