Browse Source

Porting usage of possibleLengths from Java to C++ (#1358)

* Porting code to use the new possible length information for calculating
whether a phone number is possible, too short etc.
pull/1361/head
lararennie 9 years ago
committed by Lara Scheidegger
parent
commit
99fd0acf3c
2 changed files with 1010 additions and 956 deletions
  1. +63
    -50
      cpp/src/phonenumbers/phonenumberutil.cc
  2. +947
    -906
      cpp/src/phonenumbers/test_metadata.cc

+ 63
- 50
cpp/src/phonenumbers/phonenumberutil.cc View File

@ -52,7 +52,9 @@
namespace i18n { namespace i18n {
namespace phonenumbers { namespace phonenumbers {
using google::protobuf::RepeatedField;
using google::protobuf::RepeatedPtrField; using google::protobuf::RepeatedPtrField;
using std::find;
// static constants // static constants
const size_t PhoneNumberUtil::kMinLengthForNsn; const size_t PhoneNumberUtil::kMinLengthForNsn;
@ -263,17 +265,41 @@ void NormalizeHelper(const map<char32, char>& normalization_replacements,
number->assign(normalized_number); number->assign(normalized_number);
} }
PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
const RegExp& number_pattern, const string& number) {
string extracted_number;
if (number_pattern.FullMatch(number, &extracted_number)) {
// Helper method to check a number against possible lengths for this number, and
// determine whether it matches, or is too short or too long. Currently, if a
// number pattern suggests that numbers of length 7 and 10 are possible, and a
// number in between these possible lengths is entered, such as of length 8,
// this will return TOO_LONG.
PhoneNumberUtil::ValidationResult TestNumberLength(
const string& number, const PhoneNumberDesc& phone_number_desc) {
RepeatedField<int> possible_lengths = phone_number_desc.possible_length();
RepeatedField<int> local_lengths =
phone_number_desc.possible_length_local_only();
int actual_length = number.length();
if (find(local_lengths.begin(), local_lengths.end(), actual_length) !=
local_lengths.end()) {
return PhoneNumberUtil::IS_POSSIBLE; return PhoneNumberUtil::IS_POSSIBLE;
} }
if (number_pattern.PartialMatch(number, &extracted_number)) {
return PhoneNumberUtil::TOO_LONG;
} else {
// There should always be "possibleLengths" set for every element. This will
// be a build-time check once ShortNumberMetadata.xml is migrated to contain
// this information as well.
int minimum_length = possible_lengths.Get(0);
if (minimum_length == actual_length) {
return PhoneNumberUtil::IS_POSSIBLE;
} else if (minimum_length > actual_length) {
return PhoneNumberUtil::TOO_SHORT; return PhoneNumberUtil::TOO_SHORT;
} else if (*(possible_lengths.end() - 1) < actual_length) {
return PhoneNumberUtil::TOO_LONG;
} }
// Note that actually the number is not too long if possible_lengths does not
// contain the length: we know it is less than the highest possible number
// length, and higher than the lowest possible number length. However, we
// don't currently have an enum to express this, so we return TOO_LONG in the
// short-term.
// We skip the first element; we've already checked it.
return find(possible_lengths.begin() + 1, possible_lengths.end(),
actual_length) != possible_lengths.end()
? PhoneNumberUtil::IS_POSSIBLE : PhoneNumberUtil::TOO_LONG;
} }
} // namespace } // namespace
@ -661,8 +687,7 @@ PhoneNumberUtil::PhoneNumberUtil()
country_calling_code_to_region_map.end()); country_calling_code_to_region_map.end());
// Sort all the pairs in ascending order according to country calling code. // Sort all the pairs in ascending order according to country calling code.
std::sort(country_calling_code_to_region_code_map_->begin(), std::sort(country_calling_code_to_region_code_map_->begin(),
country_calling_code_to_region_code_map_->end(),
OrderByFirst());
country_calling_code_to_region_code_map_->end(), OrderByFirst());
} }
PhoneNumberUtil::~PhoneNumberUtil() { PhoneNumberUtil::~PhoneNumberUtil() {
@ -786,9 +811,9 @@ bool PhoneNumberUtil::HasValidCountryCallingCode(
// locate the pair with the same country_code in the sorted vector. // locate the pair with the same country_code in the sorted vector.
IntRegionsPair target_pair; IntRegionsPair target_pair;
target_pair.first = country_calling_code; target_pair.first = country_calling_code;
return (binary_search(country_calling_code_to_region_code_map_->begin(),
country_calling_code_to_region_code_map_->end(),
target_pair, OrderByFirst()));
return (std::binary_search(country_calling_code_to_region_code_map_->begin(),
country_calling_code_to_region_code_map_->end(),
target_pair, OrderByFirst()));
} }
// Returns a pointer to the phone metadata for the appropriate region or NULL // Returns a pointer to the phone metadata for the appropriate region or NULL
@ -1033,8 +1058,8 @@ void PhoneNumberUtil::FormatNumberForMobileDialing(
string national_number; string national_number;
GetNationalSignificantNumber(number_no_extension, &national_number); GetNationalSignificantNumber(number_no_extension, &national_number);
if (CanBeInternationallyDialled(number_no_extension) && if (CanBeInternationallyDialled(number_no_extension) &&
!IsShorterThanPossibleNormalNumber(region_metadata,
national_number)) {
TestNumberLength(national_number, region_metadata->general_desc()) !=
TOO_SHORT) {
Format(number_no_extension, INTERNATIONAL, formatted_number); Format(number_no_extension, INTERNATIONAL, formatted_number);
} else { } else {
Format(number_no_extension, NATIONAL, formatted_number); Format(number_no_extension, NATIONAL, formatted_number);
@ -1055,8 +1080,9 @@ void PhoneNumberUtil::FormatNumberForMobileDialing(
// national format, but don't have it when used for display. The // national format, but don't have it when used for display. The
// reverse is true for mobile numbers. As a result, we output them in // reverse is true for mobile numbers. As a result, we output them in
// the international format to make it work. // the international format to make it work.
((region_code == "MX" || region_code == "CL") &&
is_fixed_line_or_mobile)) &&
((region_code == "MX" ||
region_code == "CL") &&
is_fixed_line_or_mobile)) &&
CanBeInternationallyDialled(number_no_extension)) { CanBeInternationallyDialled(number_no_extension)) {
Format(number_no_extension, INTERNATIONAL, formatted_number); Format(number_no_extension, INTERNATIONAL, formatted_number);
} else { } else {
@ -1985,11 +2011,11 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper(
&potential_national_number, &potential_national_number,
&carrier_code); &carrier_code);
// We require that the NSN remaining after stripping the national prefix // We require that the NSN remaining after stripping the national prefix
// and carrier code be of a possible length for the region. Otherwise, we
// don't do the stripping, since the original number could be a valid short
// number.
if (!IsShorterThanPossibleNormalNumber(country_metadata,
potential_national_number)) {
// and carrier code be long enough to be a possible length for the region.
// Otherwise, we don't do the stripping, since the original number could be
// a valid short number.
if (TestNumberLength(potential_national_number,
country_metadata->general_desc()) != TOO_SHORT) {
normalized_national_number.assign(potential_national_number); normalized_national_number.assign(potential_national_number);
if (keep_raw_input) { if (keep_raw_input) {
temp_number.set_preferred_domestic_carrier_code(carrier_code); temp_number.set_preferred_domestic_carrier_code(carrier_code);
@ -2098,10 +2124,7 @@ PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason(
// Metadata cannot be NULL because the country calling code is valid. // Metadata cannot be NULL because the country calling code is valid.
const PhoneMetadata* metadata = const PhoneMetadata* metadata =
GetMetadataForRegionOrCallingCode(country_code, region_code); GetMetadataForRegionOrCallingCode(country_code, region_code);
const RegExp& possible_number_pattern = reg_exps_->regexp_cache_->GetRegExp(
StrCat("(", metadata->general_desc().possible_number_pattern(), ")"));
return TestNumberLengthAgainstPattern(possible_number_pattern,
national_number);
return TestNumberLength(national_number, metadata->general_desc());
} }
bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const { bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const {
@ -2203,19 +2226,22 @@ void PhoneNumberUtil::SetItalianLeadingZerosForPhoneNumber(
} }
} }
bool PhoneNumberUtil::IsNumberPossibleForDesc(
const string& national_number, const PhoneNumberDesc& number_desc) const {
return reg_exps_->regexp_cache_.get()->
GetRegExp(number_desc.possible_number_pattern())
.FullMatch(national_number);
}
bool PhoneNumberUtil::IsNumberMatchingDesc( bool PhoneNumberUtil::IsNumberMatchingDesc(
const string& national_number, const PhoneNumberDesc& number_desc) const { const string& national_number, const PhoneNumberDesc& number_desc) const {
return IsNumberPossibleForDesc(national_number, number_desc) &&
reg_exps_->regexp_cache_.get()->
GetRegExp(number_desc.national_number_pattern())
.FullMatch(national_number);
// Check if any possible number lengths are present; if so, we use them to
// avoid checking the validation pattern if they don't match. If they are
// absent, this means they match the general description, which we have
// already checked before checking a specific number type.
int actual_length = national_number.length();
if (number_desc.possible_length_size() > 0 &&
std::find(number_desc.possible_length().begin(),
number_desc.possible_length().end(),
actual_length) == number_desc.possible_length().end()) {
return false;
}
return reg_exps_->regexp_cache_
->GetRegExp(number_desc.national_number_pattern())
.FullMatch(national_number);
} }
PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberTypeHelper( PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberTypeHelper(
@ -2732,16 +2758,12 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode(
NULL); NULL);
VLOG(4) << "Number without country calling code prefix: " VLOG(4) << "Number without country calling code prefix: "
<< potential_national_number; << potential_national_number;
const RegExp& possible_number_pattern =
reg_exps_->regexp_cache_->GetRegExp(
StrCat("(", general_num_desc.possible_number_pattern(), ")"));
// If the number was not valid before but is valid now, or if it was too // If the number was not valid before but is valid now, or if it was too
// long before, we consider the number with the country code stripped to // long before, we consider the number with the country code stripped to
// be a better result and keep that instead. // be a better result and keep that instead.
if ((!valid_number_pattern.FullMatch(*national_number) && if ((!valid_number_pattern.FullMatch(*national_number) &&
valid_number_pattern.FullMatch(potential_national_number)) || valid_number_pattern.FullMatch(potential_national_number)) ||
TestNumberLengthAgainstPattern(possible_number_pattern,
*national_number) == TOO_LONG) {
TestNumberLength(*national_number, general_num_desc) == TOO_LONG) {
national_number->assign(potential_national_number); national_number->assign(potential_national_number);
if (keep_raw_input) { if (keep_raw_input) {
phone_number->set_country_code_source( phone_number->set_country_code_source(
@ -2897,15 +2919,6 @@ AsYouTypeFormatter* PhoneNumberUtil::GetAsYouTypeFormatter(
return new AsYouTypeFormatter(region_code); return new AsYouTypeFormatter(region_code);
} }
bool PhoneNumberUtil::IsShorterThanPossibleNormalNumber(
const PhoneMetadata* country_metadata, const string& number) const {
const RegExp& possible_number_pattern =
reg_exps_->regexp_cache_->GetRegExp(StrCat("(",
country_metadata->general_desc().possible_number_pattern(), ")"));
return TestNumberLengthAgainstPattern(possible_number_pattern, number) ==
PhoneNumberUtil::TOO_SHORT;
}
bool PhoneNumberUtil::CanBeInternationallyDialled( bool PhoneNumberUtil::CanBeInternationallyDialled(
const PhoneNumber& number) const { const PhoneNumber& number) const {
string region_code; string region_code;


+ 947
- 906
cpp/src/phonenumbers/test_metadata.cc
File diff suppressed because it is too large
View File


Loading…
Cancel
Save