Browse Source

Rework Leniency

pull/2107/head
David Humphrey 8 years ago
parent
commit
a2518dd68a
3 changed files with 66 additions and 50 deletions
  1. +16
    -0
      javascript/i18n/phonenumbers/phonenumbermatch_test.js
  2. +8
    -9
      javascript/i18n/phonenumbers/phonenumbermatcher.js
  3. +42
    -41
      javascript/i18n/phonenumbers/phonenumberutil.js

+ 16
- 0
javascript/i18n/phonenumbers/phonenumbermatch_test.js View File

@ -1,3 +1,19 @@
/*
* Copyright (C) 2011 The Libphonenumber Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
goog.require('goog.testing.jsunit');
goog.require('i18n.phonenumbers.PhoneNumber');
goog.require('i18n.phonenumbers.PhoneNumberMatch');


+ 8
- 9
javascript/i18n/phonenumbers/phonenumbermatcher.js View File

@ -104,7 +104,6 @@ var IS_LATIN = /[\u0000-~\u0080-þĀ-žƀ-Ɏ\u0300-\u036eḀ-Ỿ]/;
* Note that if there is a match, we will always check any text found up to the first match as
* well.
*/
// XXX: need to confirm that adding `g` flag is correct here, appears to be necessary
var INNER_MATCHES = [
// Breaks on the slash - e.g. "651-234-2345/332-445-1234"
'\\/+(.*)',
@ -113,18 +112,18 @@ var INNER_MATCHES = [
'(\\([^(]*)',
// Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
// We require a space on either side of the hyphen for it to be considered a separator.
// orginal was --> /(?:\p{Z}-|-\p{Z})\p{Z}*(.+)/,
// Java uses /(?:\p{Z}-|-\p{Z})\p{Z}*(.+)/, and this regex is es5 compatible
'(?:[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]\\-|\\-[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000])[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
// possible that it's supposed to be used to break two numbers without spaces, and we haven't
// seen many instances of it used within a number.
// original was --> /[\u2012-\u2015\uFF0D]\p{Z}*(.+)/,
// Java uses /[\u2012-\u2015\uFF0D]\p{Z}*(.+)/, and this regex is es5 compatible
'[\\u2012-\\u2015\\uFF0D][ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
// original was --> /\.+\p{Z}*([^.]+)/,
// Java uses /\.+\p{Z}*([^.]+)/, and this regex is es5 compatible
'\\.+[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]*((?:[\\0-\\-\\/-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)',
// Breaks on space - e.g. "3324451234 8002341234"
// original was --> /\p{Z}+(\P{Z}+)/
// Java uses /\p{Z}+(\P{Z}+)/ and this regex is es5 compatible
'[ \\xA0\\u1680\\u2000-\\u200A\\u2028\\u2029\\u202F\\u205F\\u3000]+((?:[\\0-\\x1F!-\\x9F\\xA1-\\u167F\\u1681-\\u1FFF\\u200B-\\u2027\\u202A-\\u202E\\u2030-\\u205E\\u2060-\\u2FFF\\u3001-\\uD7FF\\uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF])+)'
];
@ -331,6 +330,8 @@ PhoneNumberMatcher.prototype.find = function(index) {
return null;
};
// XXX: do I care about doing iterator() to wrap these? And/or
// should this have some more JS-like interface?
PhoneNumberMatcher.prototype.hasNext = function() {
if (this.state == State.NOT_READY) {
this.lastMatch = this.find(this.searchIndex);
@ -517,7 +518,7 @@ PhoneNumberMatcher.prototype.parseAndVerify = function(candidate, offset) {
// by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
// If the candidate is not at the start of the text, and does not start with phone-number
// punctuation, check the previous character.
if(this.leniency >= PhoneNumberUtil.Leniency.VALID) {
if(this.leniency.value >= PhoneNumberUtil.Leniency.VALID.value) {
if (offset > 0) {
var leadClassRe = new RegExp("^" + LEAD_CLASS);
var leadClassMatches = leadClassRe.exec(candidate);
@ -564,9 +565,7 @@ PhoneNumberMatcher.prototype.parseAndVerify = function(candidate, offset) {
return null;
}
// XXX: simplify this
var leniencyVerifyFn = PhoneNumberUtil.Leniency.verifyFns[this.leniency];
if (leniencyVerifyFn(number, candidate, this.phoneUtil)) {
if (this.leniency.verify(number, candidate, this.phoneUtil)) {
// We used parseAndKeepRawInput to create this number, but for now we don't return the extra
// values parsed. TODO: stop clearing all values here and switch all users over
// to using rawInput() rather than the rawString() of PhoneNumberMatch.


+ 42
- 41
javascript/i18n/phonenumbers/phonenumberutil.js View File

@ -1011,14 +1011,28 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
* possible}, but not necessarily {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}.
*/
POSSIBLE: 0,
POSSIBLE: {
value: 0,
verify: function(number, candidate, util) {
return util.isPossibleNumber(number);
}
},
/**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isPossibleNumber(PhoneNumber)
* possible} and {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid}. Numbers written
* in national format must have their national-prefix present if it is usually written for a
* number of this type.
*/
VALID: 1,
VALID: {
value: 1,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
return false;
}
return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
}
},
/**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
* are grouped in a possible way for this locale. For example, a US number written as
@ -1031,7 +1045,26 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* code "+1". If you are not sure about which level to use, email the discussion group
* libphonenumber-discuss@googlegroups.com.
*/
STRICT_GROUPING: 2,
STRICT_GROUPING: {
value: 2,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
|| !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util))
{
return false;
}
return PhoneNumberMatcher.checkNumberGroupingIsValid(
number, candidate, util, {
checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
return PhoneNumberMatcher.allNumberGroupsRemainGrouped(
util, number, normalizedCandidate, expectedNumberGroups);
}
}
);
}
},
/**
* Phone numbers accepted are {@linkplain PhoneNumberUtil#isValidNumber(PhoneNumber) valid} and
* are grouped in the same way that we would have formatted it, or as a single block. For
@ -1043,41 +1076,9 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
* code "+1". If you are not sure about which level to use, email the discussion group
* libphonenumber-discuss@googlegroups.com.
*/
EXACT_GROUPING: 3,
// Verification functions for each of the above.
// XXX: this feels overly "clever", and probably I should refactor. Tried to follow Java's pattern here.
verifyFns: [
// POSSIBLE
function(number, candidate, util) {
return util.isPossibleNumber(number);
},
// VALID
function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)) {
return false;
}
return PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util);
},
// STRICT_GROUPING
function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
|| !PhoneNumberMatcher.isNationalPrefixPresentIfRequired(number, util)) {
return false;
}
return PhoneNumberMatcher.checkNumberGroupingIsValid(
number, candidate, util, {
checkGroups: function(util, number, normalizedCandidate, expectedNumberGroups) {
return PhoneNumberMatcher.allNumberGroupsRemainGrouped(
util, number, normalizedCandidate, expectedNumberGroups);
}
});
},
// EXACT_GROUPING
function(number, candidate, util) {
EXACT_GROUPING: {
value: 3,
verify: function(number, candidate, util) {
if (!util.isValidNumber(number)
|| !PhoneNumberMatcher.containsOnlyValidXChars(number, candidate, util)
|| PhoneNumberMatcher.containsMoreThanOneSlashInNationalNumber(number, candidate)
@ -1093,7 +1094,7 @@ i18n.phonenumbers.PhoneNumberUtil.ValidationResult = {
}
);
}
]
}
};
/**
@ -4665,8 +4666,8 @@ i18n.phonenumbers.PhoneNumberUtil.prototype.findNumbers = function(text, default
}
leniency = leniency || i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID;
maxTries = maxTries || 9223372036854775807; // Long.MAX_VALUE is 9,223,372,036,854,775,807
return new PhoneNumberMatcher(this, text, defaultRegion, i18n.phonenumbers.PhoneNumberUtil.Leniency.VALID, maxTries);
maxTries = maxTries || 9223372036854775807; // Java Long.MAX_VALUE = 9,223,372,036,854,775,807
return new PhoneNumberMatcher(this, text, defaultRegion, PhoneNumberUtil.Leniency.VALID, maxTries);
};
/**


Loading…
Cancel
Save