Browse Source

Refactor phone context parsing and phone number normalizing logic.

refactor-rfc-normalize-cpp
Silvio Brändle 11 months ago
parent
commit
30c2a62e43
15 changed files with 567 additions and 146 deletions
  1. +3
    -1
      cpp/CMakeLists.txt
  2. +1
    -1
      cpp/src/phonenumbers/asyoutypeformatter.cc
  3. +5
    -0
      cpp/src/phonenumbers/constants.h
  4. +2
    -0
      cpp/src/phonenumbers/normalize_utf8.h
  5. +125
    -0
      cpp/src/phonenumbers/phonecontextparser.cc
  6. +73
    -0
      cpp/src/phonenumbers/phonecontextparser.h
  7. +46
    -43
      cpp/src/phonenumbers/phonenumbermatcher.cc
  8. +39
    -0
      cpp/src/phonenumbers/phonenumbernormalizer.cc
  9. +50
    -0
      cpp/src/phonenumbers/phonenumbernormalizer.h
  10. +41
    -83
      cpp/src/phonenumbers/phonenumberutil.cc
  11. +11
    -10
      cpp/src/phonenumbers/phonenumberutil.h
  12. +4
    -0
      cpp/src/phonenumbers/regexpsandmappings.h
  13. +113
    -0
      cpp/test/phonenumbers/phonecontextparser_test.cc
  14. +54
    -0
      cpp/test/phonenumbers/phonenumbernormalizer_test.cc
  15. +0
    -8
      cpp/test/phonenumbers/phonenumberutil_test.cc

+ 3
- 1
cpp/CMakeLists.txt View File

@ -267,9 +267,11 @@ set (
"src/phonenumbers/base/strings/string_piece.cc"
"src/phonenumbers/default_logger.cc"
"src/phonenumbers/logger.cc"
"src/phonenumbers/phonecontextparser.cc"
"src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumber.cc"
"src/phonenumbers/phonenumber.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumbernormalizer.cc"
"src/phonenumbers/phonenumberutil.cc"
"src/phonenumbers/regex_based_matcher.cc"
"src/phonenumbers/regexpsandmappings.cc"
@ -428,7 +430,7 @@ include_directories ("src")
# Collate dependencies
#----------------------------------------------------------------
set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::strings absl::synchronization)
set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::statusor absl::strings absl::synchronization)
if (USE_BOOST)
list (APPEND LIBRARY_DEPS ${Boost_LIBRARIES})


+ 1
- 1
cpp/src/phonenumbers/asyoutypeformatter.cc View File

@ -711,7 +711,7 @@ char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
} else {
string number;
UnicodeString(next_char).toUTF8String(number);
phone_util_.NormalizeDigitsOnly(&number);
phone_util_.phone_number_normalizer_->NormalizeDigitsOnly(&number);
accrued_input_without_formatting_.append(next_char);
national_number_.append(number);
normalized_char = number[0];


+ 5
- 0
cpp/src/phonenumbers/constants.h View File

@ -21,7 +21,9 @@ namespace i18n {
namespace phonenumbers {
class Constants {
friend class PhoneContextParser;
friend class PhoneNumberMatcherRegExps;
friend class PhoneNumberNormalizer;
friend class PhoneNumberRegExpsAndMappings;
friend class PhoneNumberUtil;
@ -33,6 +35,7 @@ class Constants {
static constexpr char kRfc3966ExtnPrefix[] = ";ext=";
static constexpr char kRfc3966VisualSeparator[] = "[\\-\\.\\(\\)]?";
static constexpr char kRfc3966PhoneContext[] = ";phone-context=";
static constexpr char kDigits[] = "\\p{Nd}";
@ -53,6 +56,8 @@ class Constants {
// The minimum and maximum length of the national significant number.
static constexpr size_t kMinLengthForNsn = 2;
// The maximum length of the country calling code.
static constexpr size_t kMaxLengthCountryCode = 3;
static constexpr char kPlusChars[] = "+\xEF\xBC\x8B"; /* "++" */


+ 2
- 0
cpp/src/phonenumbers/normalize_utf8.h View File

@ -14,6 +14,8 @@
#include <string>
#include <unicode/uchar.h>
#include "phonenumbers/utf/unicodetext.h"
namespace i18n {


+ 125
- 0
cpp/src/phonenumbers/phonecontextparser.cc View File

@ -0,0 +1,125 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "phonenumbers/phonecontextparser.h"
#include <string>
#include "phonenumbers/constants.h"
namespace i18n {
namespace phonenumbers {
PhoneContextParser::PhoneContextParser(
std::unique_ptr<std::vector<int>> country_calling_codes,
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps,
std::shared_ptr<PhoneNumberNormalizer> normalizer)
: country_calling_codes_(std::move(country_calling_codes)),
reg_exps_(reg_exps),
normalizer_(normalizer) {}
std::optional<absl::string_view> PhoneContextParser::ExtractPhoneContext(
const absl::string_view phone_number) {
size_t index_of_phone_context =
phone_number.find(Constants::kRfc3966PhoneContext);
if (index_of_phone_context == std::string::npos) {
return std::nullopt;
}
size_t phone_context_start =
index_of_phone_context + strlen(Constants::kRfc3966PhoneContext);
// If phone-context parameter is empty
if (phone_context_start >= phone_number.length()) {
return "";
}
size_t phone_context_end = phone_number.find(';', phone_context_start);
// If phone-context is the last parameter
if (phone_context_end == std::string::npos) {
return phone_number.substr(phone_context_start);
}
return phone_number.substr(phone_context_start,
phone_context_end - phone_context_start);
}
bool PhoneContextParser::isValid(absl::string_view phone_context) {
if (phone_context.empty()) {
return false;
}
// Does phone-context value match the global number digits pattern or the
// domain name pattern?
return reg_exps_->rfc3966_global_number_digits_pattern_->FullMatch(
std::string{phone_context}) ||
reg_exps_->rfc3966_domainname_pattern_->FullMatch(
std::string{phone_context});
}
bool PhoneContextParser::isValidCountryCode(int country_code) {
return std::find(country_calling_codes_->begin(),
country_calling_codes_->end(),
country_code) != country_calling_codes_->end();
}
PhoneContextParser::PhoneContext PhoneContextParser::ParsePhoneContext(
absl::string_view phone_context) {
PhoneContextParser::PhoneContext phone_context_object;
phone_context_object.raw_context = phone_context;
phone_context_object.country_code = std::nullopt;
// Ignore phone-context values that do not start with a plus sign. Could be a
// domain name.
if (!phone_context.empty() &&
phone_context.at(0) == Constants::kPlusSign[0]) {
return phone_context_object;
}
// Remove the plus sign from the phone context and normalize the digits.
std::string normalized_phone_context = std::string(phone_context.substr(1));
normalizer_->NormalizeDigitsOnly(&normalized_phone_context);
if (normalized_phone_context.empty() ||
normalized_phone_context.length() > Constants::kMaxLengthCountryCode) {
return phone_context_object;
}
int potential_country_code = std::stoi(normalized_phone_context, nullptr, 10);
if (!isValidCountryCode(potential_country_code)) {
return phone_context_object;
}
phone_context_object.country_code = potential_country_code;
return phone_context_object;
}
absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>>
PhoneContextParser::Parse(absl::string_view phone_number) {
std::optional<absl::string_view> phone_context =
ExtractPhoneContext(phone_number);
if (!phone_context.has_value()) {
return std::nullopt;
}
if (!isValid(phone_context.value())) {
return absl::InvalidArgumentError("Phone context is invalid.");
}
return ParsePhoneContext(phone_context.value());
}
} // namespace phonenumbers
} // namespace i18n

+ 73
- 0
cpp/src/phonenumbers/phonecontextparser.h View File

@ -0,0 +1,73 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_
#define I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_
#include <memory>
#include <optional>
#include <vector>
#include <string>
#include "absl/status/statusor.h"
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/regexpsandmappings.h"
namespace i18n {
namespace phonenumbers {
class PhoneContextParser {
friend class PhoneNumberUtil;
friend class PhoneContextParserTest;
private:
struct PhoneContext {
// The raw value of the phone-context parameter.
std::string raw_context;
// The country code of the phone-context parameter if the phone-context is
// exactly and only a + followed by a valid country code.
std::optional<int> country_code;
};
PhoneContextParser(std::unique_ptr<std::vector<int>> country_calling_codes,
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps,
std::shared_ptr<PhoneNumberNormalizer> normalizer);
absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>> Parse(
absl::string_view phone_number);
std::unique_ptr<std::vector<int>> country_calling_codes_;
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
std::shared_ptr<PhoneNumberNormalizer> normalizer_;
// Extracts the value of the phone-context parameter, following the
// specification of RFC3966.
static std::optional<absl::string_view> ExtractPhoneContext(
absl::string_view phone_number);
// Checks whether the phone context value follows the specification of
// RFC3966.
bool isValid(absl::string_view phone_context);
bool isValidCountryCode(int country_code);
// Parses the phone context value into a PhoneContext object.
PhoneContext ParsePhoneContext(absl::string_view phone_context);
};
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_

+ 46
- 43
cpp/src/phonenumbers/phonenumbermatcher.cc View File

@ -79,43 +79,6 @@ bool IsInvalidPunctuationSymbol(char32 character) {
return character == '%' || u_charType(character) == U_CURRENCY_SYMBOL;
}
bool ContainsOnlyValidXChars(const PhoneNumber& number, const string& candidate,
const PhoneNumberUtil& util) {
// The characters 'x' and 'X' can be (1) a carrier code, in which case they
// always precede the national significant number or (2) an extension sign,
// in which case they always precede the extension number. We assume a
// carrier code is more than 1 digit, so the first case has to have more than
// 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1
// 'x' or 'X'.
size_t found;
found = candidate.find_first_of("xX");
// We ignore the character if 'x' or 'X' appears as the last character of
// the string.
while (found != string::npos && found < candidate.length() - 1) {
// We only look for 'x' or 'X' in ASCII form.
char next_char = candidate[found + 1];
if (next_char == 'x' || next_char == 'X') {
// This is the carrier code case, in which the 'X's always precede the
// national significant number.
++found;
if (util.IsNumberMatchWithOneString(
number, candidate.substr(found, candidate.length() - found))
!= PhoneNumberUtil::NSN_MATCH) {
return false;
}
} else {
string normalized_extension(candidate.substr(found,
candidate.length() - found));
util.NormalizeDigitsOnly(&normalized_extension);
if (normalized_extension != number.extension()) {
return false;
}
}
found = candidate.find_first_of("xX", found + 1);
}
return true;
}
bool AllNumberGroupsRemainGrouped(
const PhoneNumberUtil& util,
const PhoneNumber& number,
@ -283,7 +246,7 @@ class PhoneNumberMatcherRegExps : public Singleton<PhoneNumberMatcherRegExps> {
lead_limit_(Limit(0, 2)),
punctuation_limit_(Limit(0, 4)),
digit_block_limit_(PhoneNumberUtil::kMaxLengthForNsn +
PhoneNumberUtil::kMaxLengthCountryCode),
Constants::kMaxLengthCountryCode),
block_limit_(Limit(0, digit_block_limit_)),
punctuation_(StrCat("[", Constants::kValidPunctuation, "]",
punctuation_limit_)),
@ -395,6 +358,46 @@ class AlternateFormats : public Singleton<AlternateFormats> {
DISALLOW_COPY_AND_ASSIGN(AlternateFormats);
};
class XCharValidator {
public:
static bool ContainsOnlyValidXChars(const PhoneNumber& number, const string& candidate,
const PhoneNumberUtil& util) {
// The characters 'x' and 'X' can be (1) a carrier code, in which case they
// always precede the national significant number or (2) an extension sign,
// in which case they always precede the extension number. We assume a
// carrier code is more than 1 digit, so the first case has to have more than
// 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1
// 'x' or 'X'.
size_t found;
found = candidate.find_first_of("xX");
// We ignore the character if 'x' or 'X' appears as the last character of
// the string.
while (found != string::npos && found < candidate.length() - 1) {
// We only look for 'x' or 'X' in ASCII form.
char next_char = candidate[found + 1];
if (next_char == 'x' || next_char == 'X') {
// This is the carrier code case, in which the 'X's always precede the
// national significant number.
++found;
if (util.IsNumberMatchWithOneString(
number, candidate.substr(found, candidate.length() - found))
!= PhoneNumberUtil::NSN_MATCH) {
return false;
}
} else {
string normalized_extension(candidate.substr(found,
candidate.length() - found));
util.phone_number_normalizer_->NormalizeDigitsOnly(&normalized_extension);
if (normalized_extension != number.extension()) {
return false;
}
}
found = candidate.find_first_of("xX", found + 1);
}
return true;
}
};
PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
const string& text,
const string& region_code,
@ -531,13 +534,13 @@ bool PhoneNumberMatcher::VerifyAccordingToLeniency(
return phone_util_.IsPossibleNumber(number);
case PhoneNumberMatcher::VALID:
if (!phone_util_.IsValidNumber(number) ||
!ContainsOnlyValidXChars(number, candidate, phone_util_)) {
!XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_)) {
return false;
}
return IsNationalPrefixPresentIfRequired(number);
case PhoneNumberMatcher::STRICT_GROUPING: {
if (!phone_util_.IsValidNumber(number) ||
!ContainsOnlyValidXChars(number, candidate, phone_util_) ||
!XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_) ||
ContainsMoreThanOneSlashInNationalNumber(
number, candidate, phone_util_) ||
!IsNationalPrefixPresentIfRequired(number)) {
@ -552,7 +555,7 @@ bool PhoneNumberMatcher::VerifyAccordingToLeniency(
}
case PhoneNumberMatcher::EXACT_GROUPING: {
if (!phone_util_.IsValidNumber(number) ||
!ContainsOnlyValidXChars(number, candidate, phone_util_) ||
!XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_) ||
ContainsMoreThanOneSlashInNationalNumber(
number, candidate, phone_util_) ||
!IsNationalPrefixPresentIfRequired(number)) {
@ -815,7 +818,7 @@ bool PhoneNumberMatcher::IsNationalPrefixPresentIfRequired(
string raw_input_copy(number.raw_input());
// Check if we found a national prefix and/or carrier code at the start of
// the raw input, and return the result.
phone_util_.NormalizeDigitsOnly(&raw_input_copy);
phone_util_.phone_number_normalizer_->NormalizeDigitsOnly(&raw_input_copy);
return phone_util_.MaybeStripNationalPrefixAndCarrierCode(
*metadata,
&raw_input_copy,
@ -898,7 +901,7 @@ bool PhoneNumberMatcher::ContainsMoreThanOneSlashInNationalNumber(
PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN) {
string normalized_country_code =
candidate.substr(0, first_slash_in_body);
util.NormalizeDigitsOnly(&normalized_country_code);
util.phone_number_normalizer_->NormalizeDigitsOnly(&normalized_country_code);
if (normalized_country_code == SimpleItoa(number.country_code())) {
// Any more slashes and this is illegal.
return candidate.find('/', second_slash_in_body + 1) != string::npos;


+ 39
- 0
cpp/src/phonenumbers/phonenumbernormalizer.cc View File

@ -0,0 +1,39 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/base/logging.h"
#include "phonenumbers/constants.h"
#include "phonenumbers/normalize_utf8.h"
namespace i18n {
namespace phonenumbers {
PhoneNumberNormalizer::PhoneNumberNormalizer(
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps)
: reg_exps_(reg_exps) {}
void PhoneNumberNormalizer::NormalizeDigitsOnly(std::string* number) const {
DCHECK(number);
const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp(
absl::StrCat("[^", Constants::kDigits, "]"));
// Delete everything that isn't valid digits.
non_digits_pattern.GlobalReplace(number, "");
// Normalize all decimal digits to ASCII digits.
number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
}
} // namespace phonenumbers
} // namespace i18n

+ 50
- 0
cpp/src/phonenumbers/phonenumbernormalizer.h View File

@ -0,0 +1,50 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_
#define I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_
#include <memory>
#include <string>
#include "phonenumbers/regexpsandmappings.h"
namespace i18n {
namespace phonenumbers {
class PhoneNumberNormalizer {
friend class AsYouTypeFormatter;
friend class PhoneContextParser;
friend class PhoneNumberMatcher;
friend class PhoneNumberUtil;
friend class XCharValidator;
friend class PhoneContextParserTest;
friend class PhoneNumberNormalizerTest;
private:
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
explicit PhoneNumberNormalizer(
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps);
// Normalizes a string of characters representing a phone number. This
// converts wide-ascii and arabic-indic numerals to European numerals, and
// strips punctuation and alpha characters.
void NormalizeDigitsOnly(std::string* number) const;
};
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_

+ 41
- 83
cpp/src/phonenumbers/phonenumberutil.cc View File

@ -22,6 +22,7 @@
#include <cstring>
#include <iterator>
#include <map>
#include <optional>
#include <utility>
#include <vector>
@ -33,10 +34,10 @@
#include "phonenumbers/default_logger.h"
#include "phonenumbers/matcher_api.h"
#include "phonenumbers/metadata.h"
#include "phonenumbers/normalize_utf8.h"
#include "phonenumbers/phonemetadata.pb.h"
#include "phonenumbers/phonenumber.h"
#include "phonenumbers/phonenumber.pb.h"
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/regex_based_matcher.h"
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_cache.h"
@ -56,7 +57,6 @@ using gtl::OrderByFirst;
// static constants
const size_t PhoneNumberUtil::kMaxLengthForNsn;
const size_t PhoneNumberUtil::kMaxLengthCountryCode;
const int PhoneNumberUtil::kNanpaCountryCode;
// static
@ -65,7 +65,6 @@ const char PhoneNumberUtil::kRegionCodeForNonGeoEntity[] = "001";
namespace {
const char kRfc3966Prefix[] = "tel:";
const char kRfc3966PhoneContext[] = ";phone-context=";
const char kRfc3966IsdnSubaddress[] = ";isub=";
// Default extension prefix to use when formatting. This will be put in front of
@ -346,7 +345,8 @@ PhoneNumberUtil::PhoneNumberUtil()
nanpa_regions_(new absl::node_hash_set<string>()),
region_to_metadata_map_(new absl::node_hash_map<string, PhoneMetadata>()),
country_code_to_non_geographical_metadata_map_(
new absl::node_hash_map<int, PhoneMetadata>) {
new absl::node_hash_map<int, PhoneMetadata>),
phone_number_normalizer_(new PhoneNumberNormalizer(reg_exps_)) {
Logger::set_logger_impl(logger_.get());
// TODO: Update the java version to put the contents of the init
// method inside the constructor as well to keep both in sync.
@ -392,6 +392,19 @@ PhoneNumberUtil::PhoneNumberUtil()
if (country_calling_code == kNanpaCountryCode) {
nanpa_regions_->insert(region_code);
}
// Create a vector of country calling codes to be used by the phone context
// parser.
auto country_calling_codes_ = std::make_unique<std::vector<int>>();
for (std::vector<IntRegionsPair>::const_iterator it =
country_calling_code_to_region_code_map_->begin();
it != country_calling_code_to_region_code_map_->end(); ++it) {
country_calling_codes_->push_back(it->first);
}
phone_context_parser_ = std::unique_ptr<PhoneContextParser>(
new PhoneContextParser(std::move(country_calling_codes_), reg_exps_,
phone_number_normalizer_));
}
country_calling_code_to_region_code_map_->insert(
@ -1049,7 +1062,7 @@ void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number,
break;
}
candidate_national_prefix_rule.erase(index_of_first_group);
NormalizeDigitsOnly(&candidate_national_prefix_rule);
phone_number_normalizer_->NormalizeDigitsOnly(&candidate_national_prefix_rule);
}
if (candidate_national_prefix_rule.empty()) {
// National prefix not used when formatting this number.
@ -1085,7 +1098,7 @@ bool PhoneNumberUtil::RawInputContainsNationalPrefix(
const string& national_prefix,
const string& region_code) const {
string normalized_national_number(raw_input);
NormalizeDigitsOnly(&normalized_national_number);
phone_number_normalizer_->NormalizeDigitsOnly(&normalized_national_number);
if (HasPrefixString(normalized_national_number, national_prefix)) {
// Some Japanese numbers (e.g. 00777123) might be mistaken to contain
// the national prefix when written without it (e.g. 0777123) if we just
@ -1671,78 +1684,29 @@ bool PhoneNumberUtil::CheckRegionForParsing(
return true;
}
// Extracts the value of the phone-context parameter of number_to_extract_from
// where the index of ";phone-context=" is parameter index_of_phone_context,
// following the syntax defined in RFC3966.
// Returns the extracted string_view (possibly empty), or a nullopt if no
// phone-context parameter is found.
absl::optional<string> PhoneNumberUtil::ExtractPhoneContext(
const string& number_to_extract_from,
const size_t index_of_phone_context) const {
// If no phone-context parameter is present
if (index_of_phone_context == std::string::npos) {
return absl::nullopt;
}
size_t phone_context_start =
index_of_phone_context + strlen(kRfc3966PhoneContext);
// If phone-context parameter is empty
if (phone_context_start >= number_to_extract_from.length()) {
return "";
}
size_t phone_context_end =
number_to_extract_from.find(';', phone_context_start);
// If phone-context is not the last parameter
if (phone_context_end != std::string::npos) {
return number_to_extract_from.substr(
phone_context_start, phone_context_end - phone_context_start);
} else {
return number_to_extract_from.substr(phone_context_start);
}
}
// Returns whether the value of phoneContext follows the syntax defined in
// RFC3966.
bool PhoneNumberUtil::IsPhoneContextValid(
const absl::optional<string> phone_context) const {
if (!phone_context.has_value()) {
return true;
}
if (phone_context.value().empty()) {
return false;
}
// Does phone-context value match pattern of global-number-digits or
// domainname
return reg_exps_->rfc3966_global_number_digits_pattern_->FullMatch(
std::string{phone_context.value()}) ||
reg_exps_->rfc3966_domainname_pattern_->FullMatch(
std::string{phone_context.value()});
}
// Converts number_to_parse to a form that we can parse and write it to
// national_number if it is written in RFC3966; otherwise extract a possible
// number out of it and write to national_number.
// output_number if it is written in RFC3966; otherwise extract a possible
// number out of it and write to output_number.
PhoneNumberUtil::ErrorType PhoneNumberUtil::BuildNationalNumberForParsing(
const string& number_to_parse, string* national_number) const {
size_t index_of_phone_context = number_to_parse.find(kRfc3966PhoneContext);
const string& number_to_parse, string* output_number) const {
size_t index_of_phone_context = number_to_parse.find(Constants::kRfc3966PhoneContext);
absl::optional<string> phone_context =
ExtractPhoneContext(number_to_parse, index_of_phone_context);
if (!IsPhoneContextValid(phone_context)) {
absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>>
phone_context = phone_context_parser_->Parse(number_to_parse);
if (!phone_context.ok()) {
VLOG(2) << "The phone-context value is invalid.";
return NOT_A_NUMBER;
}
if (phone_context.has_value()) {
if (phone_context->has_value()) {
// If the phone context contains a phone number prefix, we need to capture
// it, whereas domains will be ignored.
if (phone_context.value().at(0) == Constants::kPlusSign[0]) {
if (phone_context->value().raw_context.at(0) == Constants::kPlusSign[0]) {
// Additional parameters might follow the phone context. If so, we will
// remove them here because the parameters after phone context are not
// important for parsing the phone number.
StrAppend(national_number, phone_context.value());
StrAppend(output_number, phone_context->value().raw_context);
}
// Now append everything between the "tel:" prefix and the phone-context.
@ -1751,25 +1715,25 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::BuildNationalNumberForParsing(
// missing, as we have seen in some of the phone number inputs. In that
// case, we append everything from the beginning.
size_t index_of_rfc_prefix = number_to_parse.find(kRfc3966Prefix);
int index_of_national_number = (index_of_rfc_prefix != string::npos) ?
int index_of_number = (index_of_rfc_prefix != string::npos) ?
static_cast<int>(index_of_rfc_prefix + strlen(kRfc3966Prefix)) : 0;
StrAppend(
national_number,
output_number,
number_to_parse.substr(
index_of_national_number,
index_of_phone_context - index_of_national_number));
index_of_number,
index_of_phone_context - index_of_number));
} else {
// Extract a possible number from the string passed in (this strips leading
// characters that could not be the start of a phone number.)
ExtractPossibleNumber(number_to_parse, national_number);
ExtractPossibleNumber(number_to_parse, output_number);
}
// Delete the isdn-subaddress and everything after it if it is present. Note
// extension won't appear at the same time with isdn-subaddress according to
// paragraph 5.3 of the RFC3966 spec.
size_t index_of_isdn = national_number->find(kRfc3966IsdnSubaddress);
size_t index_of_isdn = output_number->find(kRfc3966IsdnSubaddress);
if (index_of_isdn != string::npos) {
national_number->erase(index_of_isdn);
output_number->erase(index_of_isdn);
}
// If both phone context and isdn-subaddress are absent but other parameters
// are present, the parameters are left in nationalNumber. This is because
@ -2285,13 +2249,7 @@ void PhoneNumberUtil::GetCountryMobileToken(int country_calling_code,
}
void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
DCHECK(number);
const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp(
StrCat("[^", Constants::kDigits, "]"));
// Delete everything that isn't valid digits.
non_digits_pattern.GlobalReplace(number, "");
// Normalize all decimal digits to ASCII digits.
number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
phone_number_normalizer_->NormalizeDigitsOnly(number);
}
void PhoneNumberUtil::NormalizeDiallableCharsOnly(string* number) const {
@ -2334,7 +2292,7 @@ void PhoneNumberUtil::Normalize(string* number) const {
if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) {
NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number);
}
NormalizeDigitsOnly(number);
phone_number_normalizer_->NormalizeDigitsOnly(number);
}
// Checks to see if the string of characters could possibly be a phone number at
@ -2365,7 +2323,7 @@ bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern,
string extracted_digit;
if (reg_exps_->capturing_digit_pattern_->PartialMatch(
number_copy->ToString(), &extracted_digit)) {
NormalizeDigitsOnly(&extracted_digit);
phone_number_normalizer_->NormalizeDigitsOnly(&extracted_digit);
if (extracted_digit == "0") {
return false;
}
@ -2553,7 +2511,7 @@ int PhoneNumberUtil::ExtractCountryCode(string* national_number) const {
// Country codes do not begin with a '0'.
return 0;
}
for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) {
for (size_t i = 1; i <= Constants::kMaxLengthCountryCode; ++i) {
safe_strto32(national_number->substr(0, i), &potential_country_code);
string region_code;
GetRegionCodeForCountryCode(potential_country_code, &region_code);


+ 11
- 10
cpp/src/phonenumbers/phonenumberutil.h View File

@ -29,7 +29,9 @@
#include "phonenumbers/base/basictypes.h"
#include "phonenumbers/base/memory/scoped_ptr.h"
#include "phonenumbers/base/memory/singleton.h"
#include "phonenumbers/phonecontextparser.h"
#include "phonenumbers/phonenumber.pb.h"
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/regexpsandmappings.h"
class TelephoneNumber;
@ -64,6 +66,7 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
friend class ShortNumberInfo;
friend class ShortNumberInfoTest;
friend class Singleton<PhoneNumberUtil>;
friend class XCharValidator;
public:
// This type is neither copyable nor movable.
@ -789,8 +792,6 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
// The ITU says the maximum length should be 15, but we have found longer
// numbers in Germany.
static const size_t kMaxLengthForNsn = 17;
// The maximum length of the country calling code.
static const size_t kMaxLengthCountryCode = 3;
// Regular expression of characters typically used to start a second phone
// number for the purposes of parsing. This allows us to strip off parts of
@ -806,7 +807,7 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
scoped_ptr<MatcherApi> matcher_api_;
// Helper class holding useful regular expressions and character mappings.
scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
// A mapping from a country calling code to a RegionCode object which denotes
// the region represented by that country calling code. Note regions under
@ -831,6 +832,12 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
scoped_ptr<absl::node_hash_map<int, PhoneMetadata> >
country_code_to_non_geographical_metadata_map_;
// An instance of PhoneContextParser.
std::unique_ptr<PhoneContextParser> phone_context_parser_;
// An instance of PhoneNumberNormalizer.
std::shared_ptr<PhoneNumberNormalizer> phone_number_normalizer_;
PhoneNumberUtil();
// Returns a regular expression for the possible extensions that may be found
@ -968,14 +975,8 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
bool check_region,
PhoneNumber* phone_number) const;
absl::optional<string> ExtractPhoneContext(
const string& number_to_extract_from,
size_t index_of_phone_context) const;
bool IsPhoneContextValid(absl::optional<string> phone_context) const;
ErrorType BuildNationalNumberForParsing(const string& number_to_parse,
string* national_number) const;
string* output_number) const;
bool IsShorterThanPossibleNormalNumber(const PhoneMetadata* country_metadata,
const string& number) const;


+ 4
- 0
cpp/src/phonenumbers/regexpsandmappings.h View File

@ -30,7 +30,11 @@ namespace i18n {
namespace phonenumbers {
class PhoneNumberRegExpsAndMappings {
friend class PhoneContextParser;
friend class PhoneNumberNormalizer;
friend class PhoneNumberUtil;
friend class PhoneContextParserTest;
friend class PhoneNumberNormalizerTest;
private:
void InitializeMapsAndSets();


+ 113
- 0
cpp/test/phonenumbers/phonecontextparser_test.cc View File

@ -0,0 +1,113 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "phonenumbers/phonecontextparser.h"
#include <gtest/gtest.h>
#include "phonenumbers/phonenumbernormalizer.h"
#include "phonenumbers/regexpsandmappings.h"
namespace i18n {
namespace phonenumbers {
using testing::Eq;
class PhoneContextParserTest : public testing::Test {
public:
// This type is neither copyable nor movable.
PhoneContextParserTest(const PhoneContextParserTest&) = delete;
PhoneContextParserTest& operator=(const PhoneContextParserTest&) = delete;
protected:
PhoneContextParserTest()
: country_calling_codes_(
std::make_unique<std::vector<int>>(std::vector<int>{64})),
reg_exps_(new PhoneNumberRegExpsAndMappings()),
normalizer_(new PhoneNumberNormalizer(reg_exps_)),
context_parser_(new PhoneContextParser(
std::move(country_calling_codes_), reg_exps_, normalizer_)) {}
std::unique_ptr<std::vector<int>> country_calling_codes_;
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
std::shared_ptr<PhoneNumberNormalizer> normalizer_;
std::unique_ptr<PhoneContextParser> context_parser_;
absl::StatusOr<std::optional<PhoneContextParser::PhoneContext>> Parse(
absl::string_view phone_number) {
return context_parser_->Parse(phone_number);
}
};
TEST_F(PhoneContextParserTest, ParsePhoneContext) {
auto parse_result = Parse("tel:03-331-6005;phone-context=+64");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+64", parse_result.value()->raw_context);
EXPECT_EQ(64, parse_result.value()->country_code);
auto parse_result = Parse("tel:03-331-6005;phone-context=example.com");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("example.com", parse_result.value()->raw_context);
EXPECT_EQ(std::nullopt, parse_result.value()->country_code);
auto parse_result = Parse("03-331-6005;phone-context=+64;");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+64", parse_result.value()->raw_context);
EXPECT_EQ(64, parse_result.value()->country_code);
auto parse_result = Parse("+64-3-331-6005;phone-context=+64;");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+64", parse_result.value()->raw_context);
EXPECT_EQ(64, parse_result.value()->country_code);
auto parse_result =
Parse("tel:03-331-6005;foo=bar;phone-context=+64;baz=qux");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+64", parse_result.value()->raw_context);
EXPECT_EQ(64, parse_result.value()->country_code);
auto parse_result = Parse("tel:03-331-6005");
ASSERT_TRUE(parse_result.ok());
ASSERT_EQ(std::nullopt, parse_result);
auto parse_result = Parse("tel:03-331-6005;phone-context=+0");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+0", parse_result.value()->raw_context);
EXPECT_EQ(std::nullopt, parse_result.value()->country_code);
auto parse_result = Parse("tel:03-331-6005;phone-context=+1234");
ASSERT_TRUE(parse_result.ok());
ASSERT_TRUE(parse_result->has_value());
EXPECT_EQ("+1234", parse_result.value()->raw_context);
EXPECT_EQ(std::nullopt, parse_result.value()->country_code);
}
TEST_F(PhoneContextParserTest, ParsePhoneContextInvalid) {
auto parse_result = Parse("tel:03-331-6005;phone-context=");
EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code());
auto parse_result = Parse("tel:03-331-6005;phone-context=;");
EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code());
auto parse_result = Parse("tel:03-331-6005;phone-context=0");
EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code());
}
} // namespace phonenumbers
} // namespace i18n

+ 54
- 0
cpp/test/phonenumbers/phonenumbernormalizer_test.cc View File

@ -0,0 +1,54 @@
// Copyright (C) 2025 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "phonenumbers/phonenumbernormalizer.h"
#include <gtest/gtest.h>
#include "phonenumbers/regexpsandmappings.h"
namespace i18n {
namespace phonenumbers {
using testing::Eq;
class PhoneNumberNormalizerTest : public testing::Test {
public:
// This type is neither copyable nor movable.
PhoneNumberNormalizerTest(const PhoneNumberNormalizerTest&) = delete;
PhoneNumberNormalizerTest& operator=(const PhoneNumberNormalizerTest&) =
delete;
protected:
PhoneNumberNormalizerTest()
: reg_exps_(new PhoneNumberRegExpsAndMappings()),
normalizer_(new PhoneNumberNormalizer(reg_exps_)) {}
std::shared_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
std::shared_ptr<PhoneNumberNormalizer> normalizer_;
void NormalizeDigitsOnly(std::string* number) {
normalizer_->NormalizeDigitsOnly(number);
}
};
TEST_F(PhoneNumberNormalizerTest, NormaliseStripAlphaCharacters) {
string input_number("034-56&+a#234");
NormalizeDigitsOnly(&input_number);
static const string kExpectedOutput("03456234");
EXPECT_EQ(kExpectedOutput, input_number)
<< "Conversion did not correctly remove alpha characters";
}
} // namespace phonenumbers
} // namespace i18n

+ 0
- 8
cpp/test/phonenumbers/phonenumberutil_test.cc View File

@ -2968,14 +2968,6 @@ TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) {
<< "Conversion did not correctly replace non-latin digits";
}
TEST_F(PhoneNumberUtilTest, NormaliseStripAlphaCharacters) {
string input_number("034-56&+a#234");
phone_util_.NormalizeDigitsOnly(&input_number);
static const string kExpectedOutput("03456234");
EXPECT_EQ(kExpectedOutput, input_number)
<< "Conversion did not correctly remove alpha characters";
}
TEST_F(PhoneNumberUtilTest, NormaliseStripNonDiallableCharacters) {
string input_number("03*4-56&+1a#234");
phone_util_.NormalizeDiallableCharsOnly(&input_number);


Loading…
Cancel
Save