diff --git a/cpp/src/default_logger.cc b/cpp/src/default_logger.cc new file mode 100644 index 000000000..a11679cb4 --- /dev/null +++ b/cpp/src/default_logger.cc @@ -0,0 +1,63 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#include + +#include "default_logger.h" + +using std::cerr; +using std::cout; +using std::endl; + +namespace i18n { +namespace phonenumbers { + +DefaultLogger::DefaultLogger(LogLevel level) : level_(level) {} + +DefaultLogger::~DefaultLogger() {} + +void DefaultLogger::Fatal(const string& msg) const { + if (level_ >= FATAL) { + cerr << "FATAL libphonenumber " << msg << endl; + } +} + +void DefaultLogger::Error(const string& msg) const { + if (level_ >= ERROR) { + cerr << "ERROR libphonenumber " << msg << endl; + } +} + +void DefaultLogger::Warning(const string& msg) const { + if (level_ >= WARNING) { + cerr << "WARNING libphonenumber " << msg << endl; + } +} + +void DefaultLogger::Info(const string& msg) const { + if (level_ >= INFO) { + cout << "INFO libphonenumber " << msg << endl; + } +} + +void DefaultLogger::Debug(const string& msg) const { + if (level_ >= DEBUG) { + cout << "DEBUG libphonenumber " << msg << endl; + } +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/default_logger.h b/cpp/src/default_logger.h new file mode 100644 index 000000000..917e6ba99 --- /dev/null +++ b/cpp/src/default_logger.h @@ -0,0 +1,56 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#ifndef I18N_PHONENUMBERS_DEFAULT_LOGGER_H_ +#define I18N_PHONENUMBERS_DEFAULT_LOGGER_H_ + +#include "logger_adapter.h" + +namespace i18n { +namespace phonenumbers { + +enum LogLevel { + FATAL, + ERROR, + WARNING, + INFO, + DEBUG, +}; + +class DefaultLogger : public LoggerAdapter { + public: + virtual ~DefaultLogger(); + + DefaultLogger(LogLevel level = WARNING); + + virtual void Fatal(const string& msg) const; + + virtual void Error(const string& msg) const; + + virtual void Warning(const string& msg) const; + + virtual void Info(const string& msg) const; + + virtual void Debug(const string& msg) const; + + private: + LogLevel level_; +}; + +} // namespace phonenumbers +} // namespace i18n + +# endif // I18N_PHONENUMBERS_DEFAULT_LOGGER_H_ diff --git a/cpp/src/logger_adapter.cc b/cpp/src/logger_adapter.cc new file mode 100644 index 000000000..edfaa0416 --- /dev/null +++ b/cpp/src/logger_adapter.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +# include "logger_adapter.h" + +namespace i18n { +namespace phonenumbers { + +LoggerAdapter::~LoggerAdapter() {} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/logger_adapter.h b/cpp/src/logger_adapter.h new file mode 100644 index 000000000..37122bc3b --- /dev/null +++ b/cpp/src/logger_adapter.h @@ -0,0 +1,48 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#ifndef I18N_PHONENUMBERS_LOGGER_ADAPTER_H_ +#define I18N_PHONENUMBERS_LOGGER_ADAPTER_H_ + +#include + +using std::string; + +namespace i18n { +namespace phonenumbers { + +// Implement this 'interface' to override the way logging is handled +// in the library. +class LoggerAdapter { + public: + virtual ~LoggerAdapter(); + + // Logging methods + virtual void Fatal(const string& msg) const = 0; + + virtual void Error(const string& msg) const = 0; + + virtual void Warning(const string& msg) const = 0; + + virtual void Info(const string& msg) const = 0; + + virtual void Debug(const string& msg) const = 0; +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_LOGGER_ADAPTER_H_ diff --git a/cpp/src/phonenumberutil.cc b/cpp/src/phonenumberutil.cc index 70b786d12..70816f7b7 100644 --- a/cpp/src/phonenumberutil.cc +++ b/cpp/src/phonenumberutil.cc @@ -1 +1,2152 @@ -// TODO +// Copyright (C) 2009 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Shaopeng Jia +// Open-sourced by: Philippe Liard + +#include "phonenumberutil.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "base/logging.h" +#include "base/singleton.h" +#include "default_logger.h" +#include "logger_adapter.h" +#include "metadata.h" +#include "phonemetadata.pb.h" +#include "phonenumber.pb.h" +#include "re2_cache.h" +#include "stringutil.h" +#include "utf/unicodetext.h" +#include "utf/utf.h" + +namespace i18n { +namespace phonenumbers { + +using std::cerr; +using std::cout; +using std::endl; +using std::ifstream; +using std::make_pair; +using std::sort; +using std::stringstream; + +using google::protobuf::RepeatedPtrField; +using re2::StringPiece; + +namespace { + +scoped_ptr logger; + +scoped_ptr re2_cache; + +// These objects are created in the function InitializeStaticMapsAndSets. +scoped_ptr > leading_zero_countries; + +// These mappings map a character (key) to a specific digit that should replace +// it for normalization purposes. +scoped_ptr > alpha_mappings; +// For performance reasons, amalgamate both into one map. +scoped_ptr > all_normalization_mappings; +// Separate map of all symbols that we wish to retain when formatting alpha +// numbers. This includes digits, ascii letters and number grouping symbols such +// as "-" and " ". +scoped_ptr > all_plus_number_grouping_symbols; + +// The kPlusSign signifies the international prefix. +const char kPlusSign[] = "+"; + +const char kPlusChars[] = "++"; +scoped_ptr plus_chars_pattern; + +// Pattern that makes it easy to distinguish whether a country has a unique +// international dialing prefix or not. If a country has a unique international +// prefix (e.g. 011 in USA), it will be represented as a string that contains a +// sequence of ASCII digits. If there are multiple available international +// prefixes in a country, they will be represented as a regex string that always +// contains character(s) other than ASCII digits. +// Note this regex also includes tilde, which signals waiting for the tone. +scoped_ptr unique_international_prefix; + +// Digits accepted in phone numbers. +// Both Arabic-Indic and Eastern Arabic-Indic are supported. +const char kValidDigits[] = "0-90-9٠-٩۰-۹"; +// We accept alpha characters in phone numbers, ASCII only. We store lower-case +// here only since our regular expressions are case-insensitive. +const char kValidAlpha[] = "a-z"; +scoped_ptr capturing_digit_pattern; +scoped_ptr capturing_ascii_digits_pattern; + +// Regular expression of acceptable characters that may start a phone number +// for the purposes of parsing. This allows us to strip away meaningless +// prefixes to phone numbers that may be mistakenly given to us. This +// consists of digits, the plus symbol and arabic-indic digits. This does +// not contain alpha characters, although they may be used later in the +// number. It also does not include other punctuation, as this will be +// stripped later during parsing and is of no information value when parsing +// a number. The string starting with this valid character is captured. +// This corresponds to VALID_START_CHAR in the java version. +scoped_ptr valid_start_char; +scoped_ptr valid_start_char_pattern; + +// Regular expression of characters typically used to start a second phone +// number for the purposes of parsing. This allows us to strip off parts of +// the number that are actually the start of another number, such as for: +// (530) 583-6985 x302/x2303 -> the second extension here makes this actually +// two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove +// the second extension so that the first number is parsed correctly. The string +// preceding this is captured. +// This corresponds to SECOND_NUMBER_START in the java version. +const char kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x"; +scoped_ptr capture_up_to_second_number_start_pattern; + +// Regular expression of trailing characters that we want to remove. We remove +// all characters that are not alpha or numerical characters. The hash +// character is retained here, as it may signify the previous block was an +// extension. Note the capturing block at the start to capture the rest of the +// number if this was a match. +// This corresponds to UNWANTED_END_CHARS in the java version. +const char kUnwantedEndChar[] = "[^\\p{N}\\p{L}#]"; +scoped_ptr unwanted_end_char_pattern; + +// Regular expression of acceptable punctuation found in phone numbers. This +// excludes punctuation found as a leading character only. This consists of +// dash characters, white space characters, full stops, slashes, square +// brackets, parentheses and tildes. It also includes the letter 'x' as that is +// found as a placeholder for carrier information in some phone numbers. +// Full-width variants are also present. +// To find out the unicode code-point of the characters below in vim, highlight +// the character and type 'ga'. Note that the - is used to express ranges of +// full-width punctuation below, as well as being present in the expression +// itself. In emacs, you can use M-x unicode-what to query information about the +// unicode character. +const char kValidPunctuation[] = + "-x‐-―−ー--/  ​⁠ ()()[].\\[\\]/~⁓∼~"; + +// Regular expression of viable phone numbers. This is location independent. +// Checks we have at least three leading digits, and only valid punctuation, +// alpha characters and digits in the phone number. Does not include extension +// data. The symbol 'x' is allowed here as valid punctuation since it is often +// used as a placeholder for carrier codes, for example in Brazilian phone +// numbers. We also allow multiple plus-signs at the start. +// Corresponds to the following: +// plus_sign*([punctuation]*[digits]){3,}([punctuation]|[digits]|[alpha])* +scoped_ptr valid_phone_number; + +// Default extension prefix to use when formatting. This will be put in front of +// any extension component of the number, after the main national number is +// formatted. For example, if you wish the default extension formatting to be " +// extn: 3456", then you should specify " extn: " here as the default extension +// prefix. This can be overridden by country-specific preferences. +const char kDefaultExtnPrefix[] = " ext. "; + +// Regexp of all possible ways to write extensions, for use when parsing. This +// will be run as a case-insensitive regexp match. Wide character versions are +// also provided after each ascii version. There are two regular expressions +// here: the more generic one starts with optional white space and ends with an +// optional full stop (.), followed by zero or more spaces/tabs and then the +// numbers themselves. The other one covers the special case of American numbers +// where the extension is written with a hash at the end, such as "- 503#". +// Note that the only capturing groups should be around the digits that you want +// to capture as part of the extension, or else parsing will fail! +scoped_ptr known_extn_patterns; +// Regexp of all known extension prefixes used by different countries followed +// by 1 or more valid digits, for use when parsing. +scoped_ptr extn_pattern; + +// We append optionally the extension pattern to the end here, as a valid phone +// number may have an extension prefix appended, followed by 1 or more digits. +scoped_ptr valid_phone_number_pattern; + +// We use this pattern to check if the phone number has at least three letters +// in it - if so, then we treat it as a number where some phone-number digits +// are represented by letters. +scoped_ptr valid_alpha_phone_pattern; + +scoped_ptr first_group_capturing_pattern; + +scoped_ptr carrier_code_pattern; + +void TransformRegularExpressionToRE2Syntax(string* regex) { + DCHECK(regex != NULL); + string& r = *regex; + + // Replace '$' with '\\' + for (string::iterator it = r.begin(); it != r.end(); ++it) + if (*it == '$') + *it = '\\'; +} + +// Returns a pointer to the description inside the metadata of the appropriate +// type. +const PhoneNumberDesc* GetNumberDescByType( + const PhoneMetadata& metadata, + PhoneNumberUtil::PhoneNumberType type) { + switch (type) { + case PhoneNumberUtil::PREMIUM_RATE: + return &metadata.premium_rate(); + case PhoneNumberUtil::TOLL_FREE: + return &metadata.toll_free(); + case PhoneNumberUtil::MOBILE: + return &metadata.mobile(); + case PhoneNumberUtil::FIXED_LINE: + case PhoneNumberUtil::FIXED_LINE_OR_MOBILE: + return &metadata.fixed_line(); + case PhoneNumberUtil::SHARED_COST: + return &metadata.shared_cost(); + case PhoneNumberUtil::VOIP: + return &metadata.voip(); + case PhoneNumberUtil::PERSONAL_NUMBER: + return &metadata.personal_number(); + case PhoneNumberUtil::PAGER: + return &metadata.pager(); + case PhoneNumberUtil::UAN: + return &metadata.uan(); + default: + return &metadata.general_desc(); + } +} + +// A helper function that is used by Format and FormatByPattern. +void FormatNumberByFormat(int country_code, + PhoneNumberUtil::PhoneNumberFormat number_format, + const string& formatted_national_number, + const string& formatted_extension, + string* formatted_number) { + switch (number_format) { + case PhoneNumberUtil::E164: + formatted_number->assign(StrCat(kPlusSign, + SimpleItoa(country_code), + formatted_national_number, + formatted_extension)); + return; + case PhoneNumberUtil::INTERNATIONAL: + formatted_number->assign(StrCat(kPlusSign, + SimpleItoa(country_code), + " ", + formatted_national_number, + formatted_extension)); + return; + case PhoneNumberUtil::NATIONAL: + default: + formatted_number->assign(StrCat(formatted_national_number, + formatted_extension)); + } +} + +// The number_for_leading_digits_match is a separate parameter, because for +// alpha numbers we want to pass in the numeric version to select the right +// formatting rule, but then we actually apply the formatting pattern to the +// national_number (which in this case has alpha characters in it). +// +// Note that carrierCode is optional - if an empty string, no carrier code +// replacement will take place. +void FormatAccordingToFormatsWithCarrier( + const string& number_for_leading_digits_match, + const RepeatedPtrField& available_formats, + PhoneNumberUtil::PhoneNumberFormat number_format, + const string& national_number, + const string& carrier_code, + string* formatted_number) { + DCHECK(formatted_number); + for (RepeatedPtrField::const_iterator + it = available_formats.begin(); it != available_formats.end(); ++it) { + int size = it->leading_digits_pattern_size(); + if (size > 0) { + StringPiece number_copy(number_for_leading_digits_match); + // We always use the last leading_digits_pattern, as it is the most + // detailed. + if (!RE2::Consume(&number_copy, + RE2Cache::ScopedAccess( + re2_cache.get(), + it->leading_digits_pattern(size - 1)))) { + continue; + } + } + RE2Cache::ScopedAccess pattern_to_match(re2_cache.get(), it->pattern()); + if (RE2::FullMatch(national_number, pattern_to_match)) { + string formatting_pattern(it->format()); + if (number_format == PhoneNumberUtil::NATIONAL && + carrier_code.length() > 0 && + it->domestic_carrier_code_formatting_rule().length() > 0) { + // Replace the $CC in the formatting rule with the desired carrier code. + string carrier_code_formatting_rule = + it->domestic_carrier_code_formatting_rule(); + RE2::Replace(&carrier_code_formatting_rule, *carrier_code_pattern, + carrier_code); + TransformRegularExpressionToRE2Syntax(&carrier_code_formatting_rule); + RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, + carrier_code_formatting_rule); + } else { + // Use the national prefix formatting rule instead. + string national_prefix_formatting_rule = + it->national_prefix_formatting_rule(); + if (number_format == PhoneNumberUtil::NATIONAL && + national_prefix_formatting_rule.length() > 0) { + // Apply the national_prefix_formatting_rule as the formatting_pattern + // contains only information on how the national significant number + // should be formatted at this point. + TransformRegularExpressionToRE2Syntax( + &national_prefix_formatting_rule); + RE2::Replace(&formatting_pattern, *first_group_capturing_pattern, + national_prefix_formatting_rule); + } + } + TransformRegularExpressionToRE2Syntax(&formatting_pattern); + formatted_number->assign(national_number); + RE2::GlobalReplace(formatted_number, pattern_to_match, + formatting_pattern); + return; + } + } + // If no pattern above is matched, we format the number as a whole. + formatted_number->assign(national_number); +} + +// Simple wrapper of FormatAccordingToFormatsWithCarrier for the common case of +// no carrier code. +void FormatAccordingToFormats( + const string& number_for_leading_digits_match, + const RepeatedPtrField& available_formats, + PhoneNumberUtil::PhoneNumberFormat number_format, + const string& national_number, + string* formatted_number) { + DCHECK(formatted_number); + FormatAccordingToFormatsWithCarrier(number_for_leading_digits_match, + available_formats, number_format, + national_number, "", formatted_number); +} + +// Returns true when one national number is the suffix of the other or both are +// the same. +bool IsNationalNumberSuffixOfTheOther(const PhoneNumber& first_number, + const PhoneNumber& second_number) { + const string& first_number_national_number = + SimpleItoa(first_number.national_number()); + const string& second_number_national_number = + SimpleItoa(second_number.national_number()); + // Note that HasSuffixString returns true if the numbers are equal. + return HasSuffixString(first_number_national_number, + second_number_national_number) || + HasSuffixString(second_number_national_number, + first_number_national_number); +} + +bool IsNumberMatchingDesc(const string& national_number, + const PhoneNumberDesc& number_desc) { + return (RE2::FullMatch(national_number, + RE2Cache::ScopedAccess(re2_cache.get(), + number_desc.possible_number_pattern())) && + RE2::FullMatch(national_number, + RE2Cache::ScopedAccess(re2_cache.get(), + number_desc.national_number_pattern()))); +} + +PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( + const string& national_number, const PhoneMetadata& metadata) { + const PhoneNumberDesc& general_desc = metadata.general_desc(); + if (!general_desc.has_national_number_pattern() || + !IsNumberMatchingDesc(national_number, general_desc)) { + logger->Debug("Number type unknown - " + "doesn't match general national number pattern."); + return PhoneNumberUtil::UNKNOWN; + } + if (IsNumberMatchingDesc(national_number, metadata.premium_rate())) { + logger->Debug("Number is a premium number."); + return PhoneNumberUtil::PREMIUM_RATE; + } + if (IsNumberMatchingDesc(national_number, metadata.toll_free())) { + logger->Debug("Number is a toll-free number."); + return PhoneNumberUtil::TOLL_FREE; + } + if (IsNumberMatchingDesc(national_number, metadata.shared_cost())) { + logger->Debug("Number is a shared cost number."); + return PhoneNumberUtil::SHARED_COST; + } + if (IsNumberMatchingDesc(national_number, metadata.voip())) { + logger->Debug("Number is a VOIP (Voice over IP) number."); + return PhoneNumberUtil::VOIP; + } + if (IsNumberMatchingDesc(national_number, metadata.personal_number())) { + logger->Debug("Number is a personal number."); + return PhoneNumberUtil::PERSONAL_NUMBER; + } + if (IsNumberMatchingDesc(national_number, metadata.pager())) { + logger->Debug("Number is a pager number."); + return PhoneNumberUtil::PAGER; + } + if (IsNumberMatchingDesc(national_number, metadata.uan())) { + logger->Debug("Number is a UAN."); + return PhoneNumberUtil::UAN; + } + + bool is_fixed_line = + IsNumberMatchingDesc(national_number, metadata.fixed_line()); + if (is_fixed_line) { + if (metadata.same_mobile_and_fixed_line_pattern()) { + logger->Debug("Fixed-line and mobile patterns equal, " + "number is fixed-line or mobile"); + return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; + } else if (IsNumberMatchingDesc(national_number, metadata.mobile())) { + logger->Debug("Fixed-line and mobile patterns differ, but number is " + "still fixed-line or mobile"); + return PhoneNumberUtil::FIXED_LINE_OR_MOBILE; + } + logger->Debug("Number is a fixed line number."); + return PhoneNumberUtil::FIXED_LINE; + } + // Otherwise, test to see if the number is mobile. Only do this if certain + // that the patterns for mobile and fixed line aren't the same. + if (!metadata.same_mobile_and_fixed_line_pattern() && + IsNumberMatchingDesc(national_number, metadata.mobile())) { + logger->Debug("Number is a mobile number."); + return PhoneNumberUtil::MOBILE; + } + logger->Debug("Number type unknown - doesn't match any specific number type" + " pattern."); + return PhoneNumberUtil::UNKNOWN; +} + +int DecodeUTF8Char(const char* in, char32* out) { + Rune r; + int len = chartorune(&r, in); + *out = r; + + return len; +} + +char32 ToUnicodeCodepoint(const char* unicode_char) { + char32 codepoint; + DecodeUTF8Char(unicode_char, &codepoint); + + return codepoint; +} + +// Initialisation helper function used to populate the regular expressions in a +// defined order. +void CreateRegularExpressions() { + unique_international_prefix.reset(new RE2("[\\d]+(?:[~⁓∼~][\\d]+)?")); + first_group_capturing_pattern.reset(new RE2("(\\$1)")); + carrier_code_pattern.reset(new RE2("\\$CC")); + capturing_digit_pattern.reset(new RE2(StrCat("([", kValidDigits, "])"))); + capturing_ascii_digits_pattern.reset(new RE2("(\\d+)")); + valid_start_char.reset(new string(StrCat( + "[", kPlusChars, kValidDigits, "]"))); + valid_start_char_pattern.reset(new RE2(*valid_start_char)); + capture_up_to_second_number_start_pattern.reset(new RE2( + kCaptureUpToSecondNumberStart)); + unwanted_end_char_pattern.reset(new RE2( + kUnwantedEndChar)); + valid_phone_number.reset(new string( + StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, + "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); + // Canonical-equivalence doesn't seem to be an option with RE2, so we allow + // two options for representing the ó - the character itself, and one in the + // unicode decomposed form with the combining acute accent. + known_extn_patterns.reset(new string( + StrCat("[  \\t,]*(?:ext(?:ensi(?:o\u0301?|ó))?n?|extn?|[,xx##~~]|" + "int|int|anexo)" + "[:\\..]?[  \\t,-]*([", + kValidDigits, "]{1,7})#?|[- ]+([", kValidDigits, "]{1,5})#"))); + extn_pattern.reset(new RE2(StrCat("(?i)(?:", *known_extn_patterns, ")$"))); + valid_phone_number_pattern.reset(new RE2( + StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?"))); + valid_alpha_phone_pattern.reset(new RE2( + StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))); + plus_chars_pattern.reset(new RE2(StrCat("[", kPlusChars, "]+"))); +} + +void InitializeStaticMapsAndSets() { + // Create global objects. + re2_cache.reset(new RE2Cache(64)); + leading_zero_countries.reset(new set); + all_plus_number_grouping_symbols.reset(new map); + alpha_mappings.reset(new map); + all_normalization_mappings.reset(new map); + + leading_zero_countries->insert(39); // Italy + leading_zero_countries->insert(47); // Norway + leading_zero_countries->insert(225); // Cote d'Ivoire + leading_zero_countries->insert(227); // Niger + leading_zero_countries->insert(228); // Togo + leading_zero_countries->insert(241); // Gabon + leading_zero_countries->insert(242); // Congo (Rep. of the) + leading_zero_countries->insert(268); // Swaziland + leading_zero_countries->insert(378); // San Marino + leading_zero_countries->insert(379); // Vatican City + leading_zero_countries->insert(501); // Belize + // Punctuation that we wish to respect in alpha numbers, as they show number + // groupings are mapped here. + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("-"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("‐"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("-"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("―"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("−"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("-"), '-')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("/"), '/')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("/"), '/')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint(" "), ' ')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("⁠"), ' ')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint(" "), ' ')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("."), '.')); + all_plus_number_grouping_symbols->insert( + make_pair(ToUnicodeCodepoint("."), '.')); + // Only the upper-case letters are added here - the lower-case versions are + // added programmatically. + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("B"), '2')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("C"), '2')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("D"), '3')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("E"), '3')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("F"), '3')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("G"), '4')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("H"), '4')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("I"), '4')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("J"), '5')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("K"), '5')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("L"), '5')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("M"), '6')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("N"), '6')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("O"), '6')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("P"), '7')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Q"), '7')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("R"), '7')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("S"), '7')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("T"), '8')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("U"), '8')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("V"), '8')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("W"), '9')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("X"), '9')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Y"), '9')); + alpha_mappings->insert(make_pair(ToUnicodeCodepoint("Z"), '9')); + map lower_case_mappings; + map alpha_letters; + for (map::const_iterator it = alpha_mappings->begin(); + it != alpha_mappings->end(); + ++it) { + // Convert all the upper-case ASCII letters to lower-case. + if (it->first < 128) { + char letter_as_upper = static_cast(it->first); + char32 letter_as_lower = static_cast(tolower(letter_as_upper)); + lower_case_mappings.insert(make_pair(letter_as_lower, it->second)); + // Add the letters in both variants to the alpha_letters map. This just + // pairs each letter with its upper-case representation so that it can be + // retained when normalising alpha numbers. + alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper)); + alpha_letters.insert(make_pair(it->first, letter_as_upper)); + } + } + // In the Java version we don't insert the lower-case mappings in the map, + // because we convert to upper case on the fly. Doing this here would involve + // pulling in all of ICU, which we don't want to do if we don't have to. + alpha_mappings->insert(lower_case_mappings.begin(), + lower_case_mappings.end()); + all_normalization_mappings->insert(alpha_mappings->begin(), + alpha_mappings->end()); + all_plus_number_grouping_symbols->insert(alpha_letters.begin(), + alpha_letters.end()); + // Add the ASCII digits so that they don't get deleted by NormalizeHelper(). + for (char c = '0'; c <= '9'; ++c) { + all_normalization_mappings->insert(make_pair(c, c)); + all_plus_number_grouping_symbols->insert(make_pair(c, c)); + } + CreateRegularExpressions(); +} + +// Normalizes a string of characters representing a phone number by replacing +// all characters found in the accompanying map with the values therein, and +// stripping all other characters if remove_non_matches is true. +// Parameters: +// number - a pointer to a string of characters representing a phone number to +// be normalized. +// normalization_replacements - a mapping of characters to what they should be +// replaced by in the normalized version of the phone number +// remove_non_matches - indicates whether characters that are not able to be +// replaced should be stripped from the number. If this is false, they will be +// left unchanged in the number. +void NormalizeHelper(const map& normalization_replacements, + bool remove_non_matches, + string* number) { + DCHECK(number); + UnicodeText number_as_unicode; + number_as_unicode.PointToUTF8(number->data(), number->size()); + string normalized_number; + for (UnicodeText::const_iterator it = number_as_unicode.begin(); + it != number_as_unicode.end(); + ++it) { + map::const_iterator found_glyph_pair = + normalization_replacements.find(*it); + if (found_glyph_pair != normalization_replacements.end()) { + normalized_number.push_back(found_glyph_pair->second); + } else if (!remove_non_matches) { + normalized_number.append(it.utf8_data()); + } + // If neither of the above are true, we remove this character. + } + number->assign(normalized_number); +} + +// Strips the IDD from the start of the number if present. Helper function used +// by MaybeStripInternationalPrefixAndNormalize. +bool ParsePrefixAsIdd(const RE2& idd_pattern, string* number) { + DCHECK(number); + StringPiece number_copy(*number); + // First attempt to strip the idd_pattern at the start, if present. We make a + // copy so that we can revert to the original string if necessary. + if (RE2::Consume(&number_copy, idd_pattern)) { + // Only strip this if the first digit after the match is not a 0, since + // country codes cannot begin with 0. + string extracted_digit; + if (RE2::PartialMatch(number_copy, + *capturing_digit_pattern, + &extracted_digit)) { + PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit); + if (extracted_digit == "0") { + return false; + } + } + number->assign(number_copy.ToString()); + return true; + } + return false; +} + +} // namespace + +// Fetch the metadata which are actually already available in the address space +// (embedded). +class DefaultMetadataProvider : public PhoneNumberUtil::MetadataProvider { + public: + virtual ~DefaultMetadataProvider() {} + + virtual pair operator()() { + return make_pair(metadata_get(), metadata_size()); + } +}; + +bool PhoneNumberUtil::LoadMetadata(PhoneMetadataCollection* metadata, + MetadataProvider& provider) { + + pair p = provider(); + const void* metadata_start = p.first; + unsigned size = p.second; + + if (!metadata->ParseFromArray(metadata_start, size)) { + cerr << "Could not parse binary data." << endl; + return false; + } + return true; +} + +void PhoneNumberUtil::SetLoggerAdapter(LoggerAdapter* logger_adapter) { + logger.reset(logger_adapter); +} + +PhoneNumberUtil::PhoneNumberUtil(MetadataProvider* provider) + : country_code_to_region_code_map_(new vector()), + nanpa_countries_(new set()), + country_to_metadata_map_(new map()) { + + if (logger == NULL) { + SetLoggerAdapter(new DefaultLogger()); + } + PhoneMetadataCollection metadata_collection; + DefaultMetadataProvider default_provider; + + if (!LoadMetadata(&metadata_collection, provider ? *provider + : default_provider)) { + logger->Fatal("Could not load metadata"); + return; + } + // Storing data in a temporary map to make it easier to find other countries + // that share a country code when inserting data. + map* > country_code_to_region_map; + for (RepeatedPtrField::const_iterator it = + metadata_collection.metadata().begin(); + it != metadata_collection.metadata().end(); + ++it) { + const PhoneMetadata& phone_metadata = *it; + const string& region_code = phone_metadata.id(); + country_to_metadata_map_->insert(make_pair(region_code, *it)); + int country_code = it->country_code(); + map*>::iterator country_code_in_map = + country_code_to_region_map.find(country_code); + if (country_code_in_map != country_code_to_region_map.end()) { + if (it->main_country_for_code()) { + country_code_in_map->second->push_front(region_code); + } else { + country_code_in_map->second->push_back(region_code); + } + } else { + // For most countries, there will be only one region code for the country + // code. + list* list_with_region_code = new list(); + list_with_region_code->push_back(region_code); + country_code_to_region_map.insert(make_pair(country_code, + list_with_region_code)); + } + if (country_code == kNanpaCountryCode) { + nanpa_countries_->insert(region_code); + } + } + + country_code_to_region_code_map_->insert( + country_code_to_region_code_map_->begin(), + country_code_to_region_map.begin(), + country_code_to_region_map.end()); + // Sort all the pairs in ascending order according to country calling code. + sort(country_code_to_region_code_map_->begin(), + country_code_to_region_code_map_->end(), + CompareFirst()); + + InitializeStaticMapsAndSets(); +} + +PhoneNumberUtil::~PhoneNumberUtil() { +} + +// Public wrapper function to get a PhoneNumberUtil instance with the default +// metadata file. +// static +PhoneNumberUtil* PhoneNumberUtil::GetInstance() { + return Singleton::get(); +} + +bool PhoneNumberUtil::IsValidRegionCode(const string& region_code) const { + return (country_to_metadata_map_->find(region_code) != + country_to_metadata_map_->end()); +} + +bool PhoneNumberUtil::HasValidRegionCode(const string& region_code, + int country_code, + const string& number) const { + if (!IsValidRegionCode(region_code)) { + logger->Info(string("Number ") + number + + " has invalid or missing country code (" + country_code + ")"); + return false; + } + return true; +} + +// Returns a pointer to the phone metadata for the appropriate region. +const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegion( + const string& region_code) const { + map::const_iterator it = + country_to_metadata_map_->find(region_code); + if (it != country_to_metadata_map_->end()) { + return &it->second; + } + return NULL; +} + +void PhoneNumberUtil::Format(const PhoneNumber& number, + PhoneNumberFormat number_format, + string* formatted_number) const { + DCHECK(formatted_number); + int country_code = number.country_code(); + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + if (number_format == E164) { + // Early exit for E164 case since no formatting of the national number needs + // to be applied. Extensions are not formatted. + FormatNumberByFormat(country_code, E164, national_significant_number, "", + formatted_number); + return; + } + // Note here that all NANPA formatting rules are contained by US, so we use + // that to format NANPA numbers. The same applies to Russian Fed countries - + // rules are contained by Russia. French Indian Ocean country rules are + // contained by Réunion. + string region_code; + GetRegionCodeForCountryCode(country_code, ®ion_code); + if (!HasValidRegionCode(region_code, country_code, + national_significant_number)) { + formatted_number->assign(national_significant_number); + return; + } + string formatted_extension; + MaybeGetFormattedExtension(number, region_code, &formatted_extension); + string formatted_national_number; + FormatNationalNumber(national_significant_number, region_code, number_format, + &formatted_national_number); + FormatNumberByFormat(country_code, number_format, + formatted_national_number, + formatted_extension, formatted_number); +} + +void PhoneNumberUtil::FormatByPattern( + const PhoneNumber& number, + PhoneNumberFormat number_format, + const RepeatedPtrField& user_defined_formats, + string* formatted_number) const { + static const RE2 national_prefix_pattern("\\$NP"); + static const RE2 first_group_pattern("\\$FG"); + DCHECK(formatted_number); + int country_code = number.country_code(); + // Note GetRegionCodeForCountryCode() is used because formatting information + // for countries which share a country code is contained by only one country + // for performance reasons. For example, for NANPA countries it will be + // contained in the metadata for US. + string region_code; + GetRegionCodeForCountryCode(country_code, ®ion_code); + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + if (!HasValidRegionCode(region_code, country_code, + national_significant_number)) { + formatted_number->assign(national_significant_number); + return; + } + RepeatedPtrField user_defined_formats_copy; + for (RepeatedPtrField::const_iterator it = + user_defined_formats.begin(); + it != user_defined_formats.end(); + ++it) { + string national_prefix_formatting_rule( + it->national_prefix_formatting_rule()); + if (!national_prefix_formatting_rule.empty()) { + const string& national_prefix = + GetMetadataForRegion(region_code)->national_prefix(); + NumberFormat* num_format_copy = user_defined_formats_copy.Add(); + num_format_copy->MergeFrom(*it); + if (!national_prefix.empty()) { + // Replace $NP with national prefix and $FG with the first group ($1). + RE2::Replace(&national_prefix_formatting_rule, national_prefix_pattern, + national_prefix); + RE2::Replace(&national_prefix_formatting_rule, first_group_pattern, + "$1"); + num_format_copy->set_national_prefix_formatting_rule( + national_prefix_formatting_rule); + } else { + // We don't want to have a rule for how to format the national prefix if + // there isn't one. + num_format_copy->clear_national_prefix_formatting_rule(); + } + } else { + user_defined_formats_copy.Add()->MergeFrom(*it); + } + } + + string formatted_number_without_extension; + FormatAccordingToFormats(national_significant_number, + user_defined_formats_copy, + number_format, national_significant_number, + &formatted_number_without_extension); + string formatted_extension; + MaybeGetFormattedExtension(number, region_code, &formatted_extension); + FormatNumberByFormat(country_code, number_format, + formatted_number_without_extension, formatted_extension, + formatted_number); +} + +void PhoneNumberUtil::FormatNationalNumberWithCarrierCode( + const PhoneNumber& number, + const string& carrier_code, + string* formatted_number) const { + int country_code = number.country_code(); + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + // Note GetRegionCodeForCountryCode() is used because formatting information + // for countries which share a country code is contained by only one country + // for performance reasons. For example, for NANPA countries it will be + // contained in the metadata for US. + string region_code; + GetRegionCodeForCountryCode(country_code, ®ion_code); + if (!HasValidRegionCode(region_code, country_code, + national_significant_number)) { + formatted_number->assign(national_significant_number); + } + string formatted_extension; + MaybeGetFormattedExtension(number, region_code, &formatted_extension); + string formatted_national_number; + FormatNationalNumberWithCarrier(national_significant_number, region_code, + NATIONAL, carrier_code, + &formatted_national_number); + FormatNumberByFormat(country_code, NATIONAL, formatted_national_number, + formatted_extension, formatted_number); +} + +void PhoneNumberUtil::FormatNationalNumberWithPreferredCarrierCode( + const PhoneNumber& number, + const string& fallback_carrier_code, + string* formatted_number) const { + FormatNationalNumberWithCarrierCode( + number, + number.has_preferred_domestic_carrier_code() + ? number.preferred_domestic_carrier_code() + : fallback_carrier_code, + formatted_number); +} + +void PhoneNumberUtil::FormatOutOfCountryCallingNumber( + const PhoneNumber& number, + const string& calling_from, + string* formatted_number) const { + DCHECK(formatted_number); + + if (!IsValidRegionCode(calling_from)) { + logger->Info("Trying to format number from invalid region. International" + " formatting applied."); + Format(number, INTERNATIONAL, formatted_number); + return; + } + int country_code = number.country_code(); + string region_code; + GetRegionCodeForCountryCode(country_code, ®ion_code); + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + if (!HasValidRegionCode(region_code, country_code, + national_significant_number)) { + formatted_number->assign(national_significant_number); + return; + } + if (country_code == kNanpaCountryCode) { + if (IsNANPACountry(calling_from)) { + // For NANPA countries, return the national format for these countries but + // prefix it with the country code. + string national_number; + Format(number, NATIONAL, &national_number); + formatted_number->assign(StrCat(SimpleItoa(country_code), " ", + national_number)); + return; + } + } else if (country_code == GetCountryCodeForRegion(calling_from)) { + // If neither country is a NANPA country, then we check to see if the + // country code of the number and the country code of the country we are + // calling from are the same. + // For countries that share a country calling code, the country code need + // not be dialled. This also applies when dialling within a country, so + // this if clause covers both these cases. Technically this is the case + // for dialling from la Réunion to other overseas departments of France + // (French Guiana, Martinique, Guadeloupe), but not vice versa - so we + // don't cover this edge case for now and for those cases return the + // version including country code. + // Details here: + // http://www.petitfute.com/voyage/225-info-pratiques-reunion + Format(number, NATIONAL, formatted_number); + return; + } + string formatted_national_number; + FormatNationalNumber(national_significant_number, region_code, INTERNATIONAL, + &formatted_national_number); + const PhoneMetadata* metadata = GetMetadataForRegion(calling_from); + const string& international_prefix = metadata->international_prefix(); + string formatted_extension; + MaybeGetFormattedExtension(number, region_code, &formatted_extension); + // For countries that have multiple international prefixes, the international + // format of the number is returned, unless there is a preferred international + // prefix. + string international_prefix_for_formatting( + RE2::FullMatch(international_prefix, *unique_international_prefix) + ? international_prefix + : metadata->preferred_international_prefix()); + if (!international_prefix_for_formatting.empty()) { + formatted_number->assign( + StrCat(international_prefix_for_formatting, " ", + SimpleItoa(country_code), " ", formatted_national_number, + formatted_extension)); + } else { + FormatNumberByFormat(country_code, INTERNATIONAL, formatted_national_number, + formatted_extension, formatted_number); + } +} + +void PhoneNumberUtil::FormatOutOfCountryKeepingAlphaChars( + const PhoneNumber& number, + const string& calling_from, + string* formatted_number) const { + // If there is no raw input, then we can't keep alpha characters because there + // aren't any. In this case, we return FormatOutOfCountryCallingNumber. + if (number.raw_input().empty()) { + FormatOutOfCountryCallingNumber(number, calling_from, formatted_number); + return; + } + string region_code; + GetRegionCodeForCountryCode(number.country_code(), ®ion_code); + if (!HasValidRegionCode(region_code, number.country_code(), + number.raw_input())) { + formatted_number->assign(number.raw_input()); + return; + } + // Strip any prefix such as country code, IDD, that was present. We do this by + // comparing the number in raw_input with the parsed number. + string raw_input_copy(number.raw_input()); + // Normalize punctuation. We retain number grouping symbols such as " " only. + NormalizeHelper(*all_plus_number_grouping_symbols, true, &raw_input_copy); + // Now we trim everything before the first three digits in the parsed number. + // We choose three because all valid alpha numbers have 3 digits at the start + // - if it does not, then we don't trim anything at all. Similarly, if the + // national number was less than three digits, we don't trim anything at all. + string national_number; + GetNationalSignificantNumber(number, &national_number); + if (national_number.length() > 3) { + size_t first_national_number_digit = + raw_input_copy.find(national_number.substr(0, 3)); + if (first_national_number_digit != string::npos) { + raw_input_copy = raw_input_copy.substr(first_national_number_digit); + } + } + const PhoneMetadata* metadata = GetMetadataForRegion(calling_from); + if (number.country_code() == kNanpaCountryCode) { + if (IsNANPACountry(calling_from)) { + formatted_number->assign(StrCat(SimpleItoa(number.country_code()), " ", + raw_input_copy)); + return; + } + } else if (number.country_code() == GetCountryCodeForRegion(calling_from)) { + // Here we copy the formatting rules so we can modify the pattern we expect + // to match against. + RepeatedPtrField available_formats = metadata->number_format(); + for (RepeatedPtrField::iterator + it = available_formats.begin(); it != available_formats.end(); ++it) { + // The first group is the first group of digits that the user determined. + it->set_pattern("(\\d+)(.*)"); + // Here we just concatenate them back together after the national prefix + // has been fixed. + it->set_format("$1$2"); + } + // Now we format using these patterns instead of the default pattern, but + // with the national prefix prefixed if necessary, by choosing the format + // rule based on the leading digits present in the unformatted national + // number. + // This will not work in the cases where the pattern (and not the + // leading digits) decide whether a national prefix needs to be used, since + // we have overridden the pattern to match anything, but that is not the + // case in the metadata to date. + FormatAccordingToFormats(national_number, available_formats, + NATIONAL, raw_input_copy, formatted_number); + return; + } + + const string& international_prefix = metadata->international_prefix(); + // For countries that have multiple international prefixes, the international + // format of the number is returned, unless there is a preferred international + // prefix. + string international_prefix_for_formatting( + RE2::FullMatch(international_prefix, *unique_international_prefix) + ? international_prefix + : metadata->preferred_international_prefix()); + if (!international_prefix_for_formatting.empty()) { + formatted_number->assign( + StrCat(international_prefix_for_formatting, " ", + SimpleItoa(number.country_code()), " ", raw_input_copy)); + } else { + FormatNumberByFormat(number.country_code(), INTERNATIONAL, raw_input_copy, + "", formatted_number); + } +} + +void PhoneNumberUtil::FormatNationalNumber( + const string& number, + const string& region_code, + PhoneNumberFormat number_format, + string* formatted_number) const { + DCHECK(formatted_number); + FormatNationalNumberWithCarrier(number, region_code, number_format, "", + formatted_number); +} + +// Note in some countries, the national number can be written in two completely +// different ways depending on whether it forms part of the NATIONAL format or +// INTERNATIONAL format. The number_format parameter here is used to specify +// which format to use for those cases. If a carrier_code is specified, this +// will be inserted into the formatted string to replace $CC. +void PhoneNumberUtil::FormatNationalNumberWithCarrier( + const string& number, + const string& region_code, + PhoneNumberFormat number_format, + const string& carrier_code, + string* formatted_number) const { + DCHECK(formatted_number); + const PhoneMetadata* metadata = GetMetadataForRegion(region_code); + // When the intl_number_formats exists, we use that to format national number + // for the INTERNATIONAL format instead of using the number_formats. + const RepeatedPtrField available_formats = + (metadata->intl_number_format_size() == 0 || number_format == NATIONAL) + ? metadata->number_format() + : metadata->intl_number_format(); + FormatAccordingToFormatsWithCarrier(number, available_formats, number_format, + number, carrier_code, formatted_number); +} + +// Gets the formatted extension of a phone number, if the phone number had an +// extension specified. If not, it returns an empty string. +void PhoneNumberUtil::MaybeGetFormattedExtension(const PhoneNumber& number, + const string& region_code, + string* extension) const { + DCHECK(extension); + if (!number.has_extension()) { + extension->assign(""); + } else { + FormatExtension(number.extension(), region_code, extension); + } +} + +// Formats the extension part of the phone number by prefixing it with the +// appropriate extension prefix. This will be the default extension prefix, +// unless overridden by a preferred extension prefix for this country. +void PhoneNumberUtil::FormatExtension(const string& extension_digits, + const string& region_code, + string* extension) const { + DCHECK(extension); + const PhoneMetadata* metadata = GetMetadataForRegion(region_code); + if (metadata->has_preferred_extn_prefix()) { + extension->assign(StrCat(metadata->preferred_extn_prefix(), + extension_digits)); + } else { + extension->assign(StrCat(kDefaultExtnPrefix, extension_digits)); + } +} + +bool PhoneNumberUtil::IsNANPACountry(const string& region_code) const { + return nanpa_countries_->find(region_code) != nanpa_countries_->end(); +} + +// Returns the region codes that matches the specific country code. In the case +// of no region code being found, region_codes will be left empty. +void PhoneNumberUtil::GetRegionCodesForCountryCode( + int country_code, + list* region_codes) const { + DCHECK(region_codes); + // Create a IntRegionsPair with the country_code passed in, and use it to + // locate the pair with the same country_code in the sorted vector. + IntRegionsPair target_pair; + target_pair.first = country_code; + + typedef vector::const_iterator ConstIterator; + pair range = + equal_range(country_code_to_region_code_map_->begin(), + country_code_to_region_code_map_->end(), + target_pair, CompareFirst()); + + if (range.first != range.second) { + region_codes->insert(region_codes->begin(), + range.first->second->begin(), + range.first->second->end()); + } +} + +// Returns the region code that matches the specific country code. In the case +// of no region code being found, ZZ will be returned. +void PhoneNumberUtil::GetRegionCodeForCountryCode(int country_code, + string* region_code) const { + DCHECK(region_code); + list region_codes; + + GetRegionCodesForCountryCode(country_code, ®ion_codes); + *region_code = region_codes.size() != 0 ? region_codes.front() : "ZZ"; +} + +void PhoneNumberUtil::GetRegionCodeForNumber(const PhoneNumber& number, + string* region_code) const { + DCHECK(region_code); + int country_code = number.country_code(); + list region_codes; + GetRegionCodesForCountryCode(country_code, ®ion_codes); + if (region_codes.size() == 0) { + string number_string; + GetNationalSignificantNumber(number, &number_string); + logger->Warning(string("Missing/invalid country code (") + + SimpleItoa(country_code) + ") for number " + number_string); + *region_code = "ZZ"; + return; + } + if (region_codes.size() == 1) { + *region_code = region_codes.front(); + } else { + GetRegionCodeForNumberFromRegionList(number, region_codes, region_code); + } +} + +void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList( + const PhoneNumber& number, const list& region_codes, + string* region_code) const { + DCHECK(region_code); + string national_number; + GetNationalSignificantNumber(number, &national_number); + for (list::const_iterator it = region_codes.begin(); + it != region_codes.end(); ++it) { + const PhoneMetadata* metadata = GetMetadataForRegion(*it); + if (metadata->has_leading_digits()) { + StringPiece number(national_number); + if (RE2::Consume(&number, + RE2Cache::ScopedAccess(re2_cache.get(), + metadata->leading_digits()))) { + *region_code = *it; + return; + } + } else if (GetNumberTypeHelper(national_number, *metadata) != UNKNOWN) { + *region_code = *it; + return; + } + } + *region_code = "ZZ"; +} + +int PhoneNumberUtil::GetCountryCodeForRegion(const string& region_code) const { + if (!IsValidRegionCode(region_code)) { + logger->Info("Invalid or unknown country code provided."); + return 0; + } + const PhoneMetadata* metadata = GetMetadataForRegion(region_code); + if (!metadata) { + logger->Error("Unsupported country code provided."); + return 0; + } + return metadata->country_code(); +} + +// Gets a valid fixed-line number for the specified region_code. Returns false +// if the country was unknown or if no number exists. +bool PhoneNumberUtil::GetExampleNumber(const string& region_code, + PhoneNumber* number) const { + DCHECK(number); + return GetExampleNumberForType(region_code, + FIXED_LINE, + number); +} + +// Gets a valid number for the specified region_code and type. Returns false if +// the country was unknown or if no number exists. +bool PhoneNumberUtil::GetExampleNumberForType( + const string& region_code, + PhoneNumberUtil::PhoneNumberType type, + PhoneNumber* number) const { + DCHECK(number); + const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code); + const PhoneNumberDesc* description = + GetNumberDescByType(*region_metadata, type); + if (description && description->has_example_number()) { + return (Parse(description->example_number(), + region_code, + number) == NO_ERROR); + } + return false; +} + +PhoneNumberUtil::ErrorType PhoneNumberUtil::Parse(const string& number_to_parse, + const string& default_country, + PhoneNumber* number) const { + DCHECK(number); + return ParseHelper(number_to_parse, default_country, false, true, number); +} + +PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseAndKeepRawInput( + const string& number_to_parse, + const string& default_country, + PhoneNumber* number) const { + DCHECK(number); + return ParseHelper(number_to_parse, default_country, true, true, number); +} + +// Checks to see that the region code used is valid, or if it is not valid, that +// the number to parse starts with a + symbol so that we can attempt to infer +// the country from the number. Returns false if it cannot use the region +// provided and the region cannot be inferred. +bool PhoneNumberUtil::CheckRegionForParsing( + const string& number_to_parse, + const string& default_country) const { + + if (!IsValidRegionCode(default_country) && !number_to_parse.empty()) { + StringPiece number_as_string_piece(number_to_parse); + if (!RE2::Consume(&number_as_string_piece, *plus_chars_pattern)) { + return false; + } + } + return true; +} + +PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper( + const string& number_to_parse, + const string& default_country, + bool keep_raw_input, + bool check_region, + PhoneNumber* phone_number) const { + DCHECK(phone_number); + // Extract a possible number from the string passed in (this strips leading + // characters that could not be the start of a phone number.) + string national_number; + ExtractPossibleNumber(number_to_parse, &national_number); + if (!IsViablePhoneNumber(national_number)) { + logger->Debug("The string supplied did not seem to be a phone number."); + return NOT_A_NUMBER; + } + + if (check_region && + !CheckRegionForParsing(national_number, default_country)) { + logger->Info("Missing or invalid default country."); + return INVALID_COUNTRY_CODE_ERROR; + } + PhoneNumber temp_number; + if (keep_raw_input) { + temp_number.set_raw_input(number_to_parse); + } + // Attempt to parse extension first, since it doesn't require country-specific + // data and we want to have the non-normalised number here. + string extension; + MaybeStripExtension(&national_number, &extension); + if (!extension.empty()) { + temp_number.set_extension(extension); + } + const PhoneMetadata* country_metadata = GetMetadataForRegion(default_country); + // Check to see if the number is given in international format so we know + // whether this number is from the default country or not. + string normalized_national_number(national_number); + ErrorType country_code_error = + MaybeExtractCountryCode(country_metadata, keep_raw_input, + &normalized_national_number, &temp_number); + int country_code = temp_number.country_code(); + if (country_code_error != NO_ERROR) { + return country_code_error; + } + if (country_code != 0) { + string phone_number_region; + GetRegionCodeForCountryCode(country_code, &phone_number_region); + if (phone_number_region != default_country) { + country_metadata = GetMetadataForRegion(phone_number_region); + } + } else if (country_metadata) { + // If no extracted country code, use the region supplied instead. + // Note that the national number was already normalized by + // MaybeExtractCountryCode. + country_code = country_metadata->country_code(); + } + if (normalized_national_number.length() < kMinLengthForNsn) { + logger->Debug("The string supplied is too short to be a phone number."); + return TOO_SHORT_NSN; + } + if (country_metadata) { + RE2Cache::ScopedAccess valid_number_pattern(re2_cache.get(), + country_metadata->general_desc().national_number_pattern()); + string* carrier_code = keep_raw_input ? + temp_number.mutable_preferred_domestic_carrier_code() : NULL; + MaybeStripNationalPrefixAndCarrierCode(*country_metadata, + &normalized_national_number, + carrier_code); + } + unsigned int normalized_national_number_length = + normalized_national_number.length(); + if (normalized_national_number_length < kMinLengthForNsn) { + logger->Debug("The string supplied is too short to be a phone number."); + return TOO_SHORT_NSN; + } + if (normalized_national_number_length > kMaxLengthForNsn) { + logger->Debug("The string supplied is too long to be a phone number."); + return TOO_LONG_NSN; + } + temp_number.set_country_code(country_code); + if (IsLeadingZeroCountry(country_code) && + normalized_national_number[0] == '0') { + temp_number.set_italian_leading_zero(true); + } + uint64 number_as_int; + stringstream ss; + ss << normalized_national_number; + ss >> number_as_int; + temp_number.set_national_number(number_as_int); + phone_number->MergeFrom(temp_number); + return NO_ERROR; +} + +// Attempts to extract a possible number from the string passed in. This +// currently strips all leading characters that could not be used to start a +// phone number. Characters that can be used to start a phone number are +// defined in the valid_start_char_pattern. If none of these characters are +// found in the number passed in, an empty string is returned. This function +// also attempts to strip off any alternative extensions or endings if two or +// more are present, such as in the case of: (530) 583-6985 x302/x2303. The +// second extension here makes this actually two phone numbers, (530) 583-6985 +// x302 and (530) 583-6985 x2303. We remove the second extension so that the +// first number is parsed correctly. +// static +void PhoneNumberUtil::ExtractPossibleNumber(const string& number, + string* extracted_number) { + DCHECK(extracted_number); + + UnicodeText number_as_unicode; + number_as_unicode.PointToUTF8(number.data(), number.size()); + char current_char[5]; + int len; + UnicodeText::const_iterator it; + for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) { + len = it.get_utf8(current_char); + current_char[len] = '\0'; + if (RE2::FullMatch(current_char, *valid_start_char_pattern)) { + break; + } + } + + if (it == number_as_unicode.end()) { + // No valid start character was found. extracted_number should be set to + // empty string. + extracted_number->assign(""); + return; + } + + UnicodeText::const_reverse_iterator reverse_it(number_as_unicode.end()); + for (; reverse_it.base() != it; ++reverse_it) { + len = reverse_it.get_utf8(current_char); + current_char[len] = '\0'; + if (!RE2::FullMatch(current_char, *unwanted_end_char_pattern)) { + break; + } + } + + if (reverse_it.base() == it) { + extracted_number->assign(""); + return; + } + + extracted_number->assign(UnicodeText::UTF8Substring(it, reverse_it.base())); + + logger->Debug("After stripping starting and trailing characters," + " left with: " + *extracted_number); + + // Now remove any extra numbers at the end. + RE2::PartialMatch(*extracted_number, + *capture_up_to_second_number_start_pattern, + extracted_number); +} + +bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const { + return IsPossibleNumberWithReason(number) == IS_POSSIBLE; +} + +bool PhoneNumberUtil::IsPossibleNumberForString( + const string& number, + const string& country_dialing_from) const { + PhoneNumber number_proto; + if (Parse(number, country_dialing_from, &number_proto) == NO_ERROR) { + return IsPossibleNumber(number_proto); + } else { + return false; + } +} + +PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason( + const PhoneNumber& number) const { + string national_number; + GetNationalSignificantNumber(number, &national_number); + int country_code = number.country_code(); + // Note: For Russian Fed and NANPA numbers, we just use the rules from the + // default region (US or Russia) since the GetRegionCodeForNumber will not + // work if the number is possible but not valid. This would need to be + // revisited if the possible number pattern ever differed between various + // countries within those plans. + string region_code; + GetRegionCodeForCountryCode(country_code, ®ion_code); + if (!HasValidRegionCode(region_code, country_code, national_number)) { + return INVALID_COUNTRY_CODE; + } + const PhoneNumberDesc& general_num_desc = + GetMetadataForRegion(region_code)->general_desc(); + // Handling case of numbers with no metadata. + if (!general_num_desc.has_national_number_pattern()) { + unsigned int number_length = national_number.length(); + if (number_length < kMinLengthForNsn) { + return TOO_SHORT; + } else if (number_length > kMaxLengthForNsn) { + return TOO_LONG; + } else { + return IS_POSSIBLE; + } + } + RE2Cache::ScopedAccess possible_number_pattern(re2_cache.get(), + StrCat("(", general_num_desc.possible_number_pattern(), ")")); + string extracted_number; + if (RE2::PartialMatch(national_number, + possible_number_pattern, + &extracted_number)) { + return (national_number.compare(extracted_number) == 0) + ? IS_POSSIBLE + : TOO_LONG; + } else { + return TOO_SHORT; + } +} + +bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const { + if (IsValidNumber(*number)) { + return true; + } + PhoneNumber number_copy(*number); + uint64 national_number = number->national_number(); + do { + national_number /= 10; + number_copy.set_national_number(national_number); + if (IsPossibleNumberWithReason(number_copy) == TOO_SHORT || + national_number == 0) { + return false; + } + } while (!IsValidNumber(number_copy)); + number->set_national_number(national_number); + return true; +} + +PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberType( + const PhoneNumber& number) const { + string region_code; + GetRegionCodeForNumber(number, ®ion_code); + if (!IsValidRegionCode(region_code)) { + return UNKNOWN; + } + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + return GetNumberTypeHelper(national_significant_number, + *GetMetadataForRegion(region_code)); +} + +bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const { + string region_code; + GetRegionCodeForNumber(number, ®ion_code); + return IsValidRegionCode(region_code) && + IsValidNumberForRegion(number, region_code); +} + +bool PhoneNumberUtil::IsValidNumberForRegion(const PhoneNumber& number, + const string& region_code) const { + if (number.country_code() != GetCountryCodeForRegion(region_code)) { + return false; + } + const PhoneMetadata* metadata = GetMetadataForRegion(region_code); + const PhoneNumberDesc& general_desc = metadata->general_desc(); + string national_number; + GetNationalSignificantNumber(number, &national_number); + + // For countries where we don't have metadata for PhoneNumberDesc, we treat + // any number passed in as a valid number if its national significant number + // is between the minimum and maximum lengths defined by ITU for a national + // significant number. + if (!general_desc.has_national_number_pattern()) { + logger->Info("Validating number with incomplete metadata."); + unsigned int number_length = national_number.length(); + return number_length > kMinLengthForNsn && + number_length <= kMaxLengthForNsn; + } + return GetNumberTypeHelper(national_number, *metadata) != UNKNOWN; +} + +bool PhoneNumberUtil::IsLeadingZeroCountry(int country_code) { + return leading_zero_countries->find(country_code) != + leading_zero_countries->end(); +} + +// static +void PhoneNumberUtil::GetNationalSignificantNumber(const PhoneNumber& number, + string* national_number) { + // The leading zero in the national (significant) number of an Italian phone + // number has a special meaning. Unlike the rest of the world, it indicates + // the number is a landline number. There have been plans to migrate landline + // numbers to start with the digit two since December 2000, but it has not yet + // happened. + // See http://en.wikipedia.org/wiki/%2B39 for more details. + // Other countries such as Cote d'Ivoire and Gabon use this for their mobile + // numbers. + DCHECK(national_number); + *national_number += + (IsLeadingZeroCountry(number.country_code()) && + number.has_italian_leading_zero() && + number.italian_leading_zero()) + ? "0" + : ""; + stringstream ss; + ss << number.national_number(); + string national_number_string; + ss >> national_number_string; + + *national_number += national_number_string; +} + +int PhoneNumberUtil::GetLengthOfGeographicalAreaCode( + const PhoneNumber& number) const { + string region_code; + GetRegionCodeForNumber(number, ®ion_code); + if (!IsValidRegionCode(region_code)) { + return 0; + } + const PhoneMetadata* metadata = GetMetadataForRegion(region_code); + DCHECK(metadata); + if (!metadata->has_national_prefix()) { + return 0; + } + + string national_significant_number; + GetNationalSignificantNumber(number, &national_significant_number); + PhoneNumberType type = GetNumberTypeHelper(national_significant_number, + *metadata); + // Most numbers other than the two types below have to be dialled in full. + if (type != FIXED_LINE && type != FIXED_LINE_OR_MOBILE) { + return 0; + } + + return GetLengthOfNationalDestinationCode(number); +} + +int PhoneNumberUtil::GetLengthOfNationalDestinationCode( + const PhoneNumber& number) const { + PhoneNumber copied_proto(number); + if (number.has_extension()) { + // Clear the extension so it's not included when formatting. + copied_proto.clear_extension(); + } + + string formatted_number; + Format(copied_proto, INTERNATIONAL, &formatted_number); + StringPiece i18n_number(formatted_number); + string digit_group; + string ndc; + string third_group; + for (int i = 0; i < 3; ++i) { + if (!RE2::FindAndConsume(&i18n_number, *capturing_ascii_digits_pattern, + &digit_group)) { + // We should find at least three groups. + return 0; + } + if (i == 1) { + ndc = digit_group; + } else if (i == 2) { + third_group = digit_group; + } + } + string region_code; + GetRegionCodeForNumber(number, ®ion_code); + + if (region_code == "AR" && + GetNumberType(number) == MOBILE) { + // Argentinian mobile numbers, when formatted in the international format, + // are in the form of +54 9 NDC XXXX.... As a result, we take the length of + // the third group (NDC) and add 1 for the digit 9, which also forms part of + // the national significant number. + return third_group.size() + 1; + } + return ndc.size(); +} + +// static +void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { + DCHECK(number); + // Delete everything that isn't valid digits. + static const RE2 invalid_digits_pattern(StrCat("[^", kValidDigits, "]")); + static const StringPiece empty; + RE2::GlobalReplace(number, invalid_digits_pattern, empty); + // Normalize all decimal digits to ASCII digits. + UParseError error; + icu::ErrorCode status; + + scoped_ptr transliterator( + icu::Transliterator::createFromRules( + "NormalizeDecimalDigits", + "[[:nv=0:]-[0]-[:^nt=de:]]>0;" + "[[:nv=1:]-[1]-[:^nt=de:]]>1;" + "[[:nv=2:]-[2]-[:^nt=de:]]>2;" + "[[:nv=3:]-[3]-[:^nt=de:]]>3;" + "[[:nv=4:]-[4]-[:^nt=de:]]>4;" + "[[:nv=5:]-[5]-[:^nt=de:]]>5;" + "[[:nv=6:]-[6]-[:^nt=de:]]>6;" + "[[:nv=7:]-[7]-[:^nt=de:]]>7;" + "[[:nv=8:]-[8]-[:^nt=de:]]>8;" + "[[:nv=9:]-[9]-[:^nt=de:]]>9;", + UTRANS_FORWARD, + error, + status + ) + ); + if (!status.isSuccess()) { + logger->Error("Error creating ICU Transliterator"); + return; + } + icu::UnicodeString utf16(number->c_str()); + transliterator->transliterate(utf16); + number->clear(); + utf16.toUTF8String(*number); +} + +bool PhoneNumberUtil::IsAlphaNumber(const string& number) const { + if (!IsViablePhoneNumber(number)) { + // Number is too short, or doesn't match the basic phone number pattern. + return false; + } + // Copy the number, since we are going to try and strip the extension from it. + string number_copy(number); + string extension; + MaybeStripExtension(&number_copy, &extension); + return RE2::FullMatch(number_copy, *valid_alpha_phone_pattern); +} + +void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const { + DCHECK(number); + NormalizeHelper(*all_normalization_mappings, false, number); +} + +// Normalizes a string of characters representing a phone number. This performs +// the following conversions: +// - Wide-ascii digits are converted to normal ASCII (European) digits. +// - Letters are converted to their numeric representation on a telephone +// keypad. The keypad used here is the one defined in ITU Recommendation +// E.161. This is only done if there are 3 or more letters in the number, to +// lessen the risk that such letters are typos - otherwise alpha characters +// are stripped. +// - Punctuation is stripped. +// - Arabic-Indic numerals are converted to European numerals. +void PhoneNumberUtil::Normalize(string* number) const { + DCHECK(number); + if (RE2::PartialMatch(*number, *valid_alpha_phone_pattern)) { + NormalizeHelper(*all_normalization_mappings, true, number); + } + NormalizeDigitsOnly(number); +} + +// Checks to see if the string of characters could possibly be a phone number at +// all. At the moment, checks to see that the string begins with at least 3 +// digits, ignoring any punctuation commonly found in phone numbers. This +// method does not require the number to be normalized in advance - but does +// assume that leading non-number symbols have been removed, such as by the +// method ExtractPossibleNumber. +// static +bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) { + if (number.length() < kMinLengthForNsn) { + logger->Debug("Number too short to be viable:" + number); + return false; + } + return RE2::FullMatch(number, *valid_phone_number_pattern); +} + +// Strips any international prefix (such as +, 00, 011) present in the number +// provided, normalizes the resulting number, and indicates if an international +// prefix was present. +// +// possible_idd_prefix represents the international direct dialing prefix from +// the country we think this number may be dialed in. +// Returns true if an international dialing prefix could be removed from the +// number, otherwise false if the number did not seem to be in international +// format. +PhoneNumber::CountryCodeSource +PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize( + const string& possible_idd_prefix, + string* number) const { + DCHECK(number); + if (number->empty()) { + return PhoneNumber::FROM_DEFAULT_COUNTRY; + } + StringPiece number_string_piece(*number); + if (RE2::Consume(&number_string_piece, *plus_chars_pattern)) { + number->assign(number_string_piece.ToString()); + // Can now normalize the rest of the number since we've consumed the "+" + // sign at the start. + Normalize(number); + return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN; + } + // Attempt to parse the first digits as an international prefix. + RE2Cache::ScopedAccess idd_pattern(re2_cache.get(), possible_idd_prefix); + if (ParsePrefixAsIdd(idd_pattern, number)) { + Normalize(number); + return PhoneNumber::FROM_NUMBER_WITH_IDD; + } + // If still not found, then try and normalize the number and then try again. + // This shouldn't be done before, since non-numeric characters (+ and ~) may + // legally be in the international prefix. + Normalize(number); + return ParsePrefixAsIdd(idd_pattern, number) + ? PhoneNumber::FROM_NUMBER_WITH_IDD + : PhoneNumber::FROM_DEFAULT_COUNTRY; +} + +// Strips any national prefix (such as 0, 1) present in the number provided. +// The number passed in should be the normalized telephone number that we wish +// to strip any national dialing prefix from. The metadata should be for the +// country that we think this number is from. +// static +void PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode( + const PhoneMetadata& metadata, + string* number, + string* carrier_code) { + DCHECK(number); + string carrier_code_temp; + const string& possible_national_prefix = + metadata.national_prefix_for_parsing(); + if (number->empty() || possible_national_prefix.empty()) { + // Early return for numbers of zero length or with no national prefix + // possible. + return; + } + // We use two copies here since Consume modifies the phone number, and if the + // first if-clause fails the number will already be changed. + StringPiece number_copy(*number); + StringPiece number_copy_without_transform(*number); + string number_string_copy(*number); + string captured_part_of_prefix; + RE2Cache::ScopedAccess national_number_rule( + re2_cache.get(), + metadata.general_desc().national_number_pattern()); + // Attempt to parse the first digits as a national prefix. We make a + // copy so that we can revert to the original string if necessary. + const string& transform_rule = metadata.national_prefix_transform_rule(); + if (!transform_rule.empty() && + (RE2::Consume(&number_copy, + RE2Cache::ScopedAccess(re2_cache.get(), + possible_national_prefix), + &carrier_code_temp, &captured_part_of_prefix) || + RE2::Consume(&number_copy, + RE2Cache::ScopedAccess(re2_cache.get(), + possible_national_prefix), + &captured_part_of_prefix)) && + !captured_part_of_prefix.empty()) { + string re2_transform_rule(transform_rule); + TransformRegularExpressionToRE2Syntax(&re2_transform_rule); + // If this succeeded, then we must have had a transform rule and there must + // have been some part of the prefix that we captured. + // We make the transformation and check that the resultant number is viable. + // If so, replace the number and return. + RE2::Replace(&number_string_copy, + RE2Cache::ScopedAccess(re2_cache.get(), + possible_national_prefix), + re2_transform_rule); + if (RE2::FullMatch(number_string_copy, national_number_rule)) { + number->assign(number_string_copy); + if (carrier_code) { + carrier_code->assign(carrier_code_temp); + } + } + } else if (RE2::Consume(&number_copy_without_transform, + RE2Cache::ScopedAccess(re2_cache.get(), + possible_national_prefix), + &carrier_code_temp) || + RE2::Consume(&number_copy_without_transform, + RE2Cache::ScopedAccess(re2_cache.get(), + possible_national_prefix))) { + logger->Debug("Parsed the first digits as a national prefix."); + // If captured_part_of_prefix is empty, this implies nothing was captured by + // the capturing groups in possible_national_prefix; therefore, no + // transformation is necessary, and we just remove the national prefix. + if (RE2::FullMatch(number_copy_without_transform, national_number_rule)) { + number->assign(number_copy_without_transform.ToString()); + if (carrier_code) { + carrier_code->assign(carrier_code_temp); + } + } + } else { + logger->Debug("The first digits did not match the national prefix."); + } +} + +// Strips any extension (as in, the part of the number dialled after the call is +// connected, usually indicated with extn, ext, x or similar) from the end of +// the number, and returns it. The number passed in should be non-normalized. +// static +bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension) { + DCHECK(number); + DCHECK(extension); + // There are two extension capturing groups in the regular expression. + string possible_extension_one; + string possible_extension_two; + string number_copy(*number); + if (RE2::PartialMatch(number_copy, *extn_pattern, + &possible_extension_one, &possible_extension_two)) { + // Replace the extensions in the original string here. + RE2::Replace(&number_copy, *extn_pattern, ""); + logger->Debug("Found an extension. Possible extension one: " + + possible_extension_one + + ". Possible extension two: " + possible_extension_two + + ". Remaining number: " + number_copy); + // If we find a potential extension, and the number preceding this is a + // viable number, we assume it is an extension. + if ((!possible_extension_one.empty() || !possible_extension_two.empty()) && + IsViablePhoneNumber(number_copy)) { + number->assign(number_copy); + extension->assign(possible_extension_one.empty() + ? possible_extension_two + : possible_extension_one); + return true; + } + } + return false; +} + +// Extracts country code from national_number, and returns it. It assumes that +// the leading plus sign or IDD has already been removed. Returns 0 if +// national_number doesn't start with a valid country code, and leaves +// national_number unmodified. Assumes the national_number is at least 3 +// characters long. +int PhoneNumberUtil::ExtractCountryCode(string* national_number) const { + int potential_country_code; + for (int i = 1; i <= 3; ++i) { + stringstream ss; + ss << national_number->substr(0, i); + ss >> potential_country_code; + string region_code; + GetRegionCodeForCountryCode(potential_country_code, ®ion_code); + + if (region_code != "ZZ") { + national_number->erase(0, i); + return potential_country_code; + } + } + return 0; +} + +// Tries to extract a country code from a number. Country codes are extracted +// in the following ways: +// - by stripping the international dialing prefix of the country the person +// is dialing from, if this is present in the number, and looking at the next +// digits +// - by stripping the '+' sign if present and then looking at the next digits +// - by comparing the start of the number and the country code of the default +// region. If the number is not considered possible for the numbering plan of +// the default region initially, but starts with the country code of this +// region, validation will be reattempted after stripping this country code. +// If this number is considered a possible number, then the first digits will +// be considered the country code and removed as such. +// +// Returns NO_ERROR if a country code was successfully extracted or none was +// present, or the appropriate error otherwise, such as if a + was present but +// it was not followed by a valid country code. If NO_ERROR is returned, the +// national_number without the country code is populated, and the country_code +// passed in is set to the country code if found, otherwise to 0. +PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode( + const PhoneMetadata* default_region_metadata, + bool keep_raw_input, + string* national_number, + PhoneNumber* phone_number) const { + DCHECK(national_number); + DCHECK(phone_number); + // Set the default prefix to be something that will never match if there is no + // default region. + string possible_country_idd_prefix = default_region_metadata + ? default_region_metadata->international_prefix() + : "NonMatch"; + PhoneNumber::CountryCodeSource country_code_source = + MaybeStripInternationalPrefixAndNormalize(possible_country_idd_prefix, + national_number); + if (keep_raw_input) { + phone_number->set_country_code_source(country_code_source); + } + if (country_code_source != PhoneNumber::FROM_DEFAULT_COUNTRY) { + if (national_number->length() < kMinLengthForNsn) { + logger->Debug("Phone number had an IDD, but after this was not " + "long enough to be a viable phone number."); + return TOO_SHORT_AFTER_IDD; + } + int potential_country_code = ExtractCountryCode(national_number); + if (potential_country_code != 0) { + phone_number->set_country_code(potential_country_code); + return NO_ERROR; + } + // If this fails, they must be using a strange country code that we don't + // recognize, or that doesn't exist. + return INVALID_COUNTRY_CODE_ERROR; + } else if (default_region_metadata) { + // Check to see if the number starts with the country code for the default + // region. If so, we remove the country code, and do some checks on the + // validity of the number before and after. + int default_country_code = default_region_metadata->country_code(); + string default_country_code_string(SimpleItoa(default_country_code)); + logger->Debug("Possible country code: " + default_country_code_string); + string potential_national_number; + if (TryStripPrefixString(*national_number, + default_country_code_string, + &potential_national_number)) { + const PhoneNumberDesc& general_num_desc = + default_region_metadata->general_desc(); + RE2Cache::ScopedAccess valid_number_pattern( + re2_cache.get(), + general_num_desc.national_number_pattern()); + MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata, + &potential_national_number, + NULL); + logger->Debug("Number without country code prefix: " + + potential_national_number); + string extracted_number; + RE2Cache::ScopedAccess possible_number_pattern( + re2_cache.get(), + StrCat("(", general_num_desc.possible_number_pattern(), ")")); + // If the number was invalid before and is valid now, or if it is still + // too long even with the country code stripped, we consider this a better + // result and keep the potential national number. + if ((RE2::FullMatch(potential_national_number, valid_number_pattern) && + !RE2::FullMatch(*national_number, valid_number_pattern)) || + (RE2::PartialMatch(potential_national_number, possible_number_pattern, + &extracted_number) && + potential_national_number.length() > extracted_number.length())) { + national_number->assign(potential_national_number); + if (keep_raw_input) { + phone_number->set_country_code_source( + PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN); + } + phone_number->set_country_code(default_country_code); + return NO_ERROR; + } + } + } + // No country code present. Set the country_code to 0. + phone_number->set_country_code(0); + return NO_ERROR; +} + +PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatch( + const PhoneNumber& first_number_in, + const PhoneNumber& second_number_in) const { + // Make copies of the phone number so that the numbers passed in are not + // edited. + PhoneNumber first_number(first_number_in); + PhoneNumber second_number(second_number_in); + // First clear raw_input and country_code_source and + // preferred_domestic_carrier_code fields and any empty-string extensions so + // that we can use the proto-buffer equality method. + first_number.clear_raw_input(); + first_number.clear_country_code_source(); + first_number.clear_preferred_domestic_carrier_code(); + second_number.clear_raw_input(); + second_number.clear_country_code_source(); + second_number.clear_preferred_domestic_carrier_code(); + if (first_number.extension().empty()) { + first_number.clear_extension(); + } + if (second_number.extension().empty()) { + second_number.clear_extension(); + } + // Early exit if both had extensions and these are different. + if (first_number.has_extension() && second_number.has_extension() && + first_number.extension() != second_number.extension()) { + return NO_MATCH; + } + int first_number_country_code = first_number.country_code(); + int second_number_country_code = second_number.country_code(); + // Both had country code specified. + if (first_number_country_code != 0 && second_number_country_code != 0) { + if (first_number.DebugString() == second_number.DebugString()) { + return EXACT_MATCH; + } else if (first_number_country_code == second_number_country_code && + IsNationalNumberSuffixOfTheOther(first_number, second_number)) { + // A SHORT_NSN_MATCH occurs if there is a difference because of the + // presence or absence of an 'Italian leading zero', the presence or + // absence of an extension, or one NSN being a shorter variant of the + // other. + return SHORT_NSN_MATCH; + } + // This is not a match. + return NO_MATCH; + } + // Checks cases where one or both country codes were not specified. To make + // equality checks easier, we first set the country codes to be equal. + first_number.set_country_code(second_number_country_code); + // If all else was the same, then this is an NSN_MATCH. + if (first_number.DebugString() == second_number.DebugString()) { + return NSN_MATCH; + } + if (IsNationalNumberSuffixOfTheOther(first_number, second_number)) { + return SHORT_NSN_MATCH; + } + return NO_MATCH; +} + +PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithTwoStrings( + const string& first_number, + const string& second_number) const { + PhoneNumber first_number_as_proto; + ErrorType error_type = + Parse(first_number, "ZZ", &first_number_as_proto); + if (error_type == NO_ERROR) { + return IsNumberMatchWithOneString(first_number_as_proto, second_number); + } + if (error_type == INVALID_COUNTRY_CODE_ERROR) { + PhoneNumber second_number_as_proto; + ErrorType error_type = Parse(second_number, "ZZ", + &second_number_as_proto); + if (error_type == NO_ERROR) { + return IsNumberMatchWithOneString(second_number_as_proto, first_number); + } + if (error_type == INVALID_COUNTRY_CODE_ERROR) { + error_type = ParseHelper(first_number, "ZZ", false, false, + &first_number_as_proto); + if (error_type == NO_ERROR) { + error_type = ParseHelper(second_number, "ZZ", false, false, + &second_number_as_proto); + if (error_type == NO_ERROR) { + return IsNumberMatch(first_number_as_proto, second_number_as_proto); + } + } + } + } + // One or more of the phone numbers we are trying to match is not a viable + // phone number. + return INVALID_NUMBER; +} + +PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithOneString( + const PhoneNumber& first_number, + const string& second_number) const { + // First see if the second number has an implicit country code, by attempting + // to parse it. + PhoneNumber second_number_as_proto; + ErrorType error_type = + Parse(second_number, "ZZ", &second_number_as_proto); + if (error_type == NO_ERROR) { + return IsNumberMatch(first_number, second_number_as_proto); + } + if (error_type == INVALID_COUNTRY_CODE_ERROR) { + // The second number has no country code. EXACT_MATCH is no longer possible. + // We parse it as if the region was the same as that for the first number, + // and if EXACT_MATCH is returned, we replace this with NSN_MATCH. + string first_number_region; + GetRegionCodeForCountryCode(first_number.country_code(), + &first_number_region); + if (first_number_region != "ZZ") { + PhoneNumber second_number_with_first_number_region; + Parse(second_number, first_number_region, + &second_number_with_first_number_region); + MatchType match = IsNumberMatch(first_number, + second_number_with_first_number_region); + if (match == EXACT_MATCH) { + return NSN_MATCH; + } + return match; + } else { + // If the first number didn't have a valid country code, then we parse the + // second number without one as well. + error_type = ParseHelper(second_number, "ZZ", false, false, + &second_number_as_proto); + if (error_type == NO_ERROR) { + return IsNumberMatch(first_number, second_number_as_proto); + } + } + } + // One or more of the phone numbers we are trying to match is not a viable + // phone number. + return INVALID_NUMBER; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumberutil.h b/cpp/src/phonenumberutil.h index 70b786d12..805bf7deb 100644 --- a/cpp/src/phonenumberutil.h +++ b/cpp/src/phonenumberutil.h @@ -1 +1,651 @@ -// TODO +// Copyright (C) 2009 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utility for international phone numbers. +// +// Author: Shaopeng Jia +// Open-sourced by: Philippe Liard + +#ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ +#define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ + +#include +#include +#include +#include +#include +#include + +#include "base/scoped_ptr.h" +#include "base/singleton.h" +#include "phonenumber.pb.h" + +class TelephoneNumber; + +namespace i18n { +namespace phonenumbers { + +using std::list; +using std::map; +using std::pair; +using std::set; +using std::string; +using std::vector; + +using google::protobuf::RepeatedPtrField; + +class LoggerAdapter; +class NumberFormat; +class PhoneMetadata; +class PhoneMetadataCollection; +class PhoneNumber; + +class PhoneNumberUtil { + friend struct DefaultSingletonTraits; + friend class PhoneNumberUtilTest; + public: + // INTERNATIONAL and NATIONAL formats are consistent with the definition + // in ITU-T Recommendation E. 123. For example, the number of the Google + // Zürich office will be written as "+41 44 668 1800" in INTERNATIONAL + // format, and as "044 668 1800" in NATIONAL format. E164 format is as per + // INTERNATIONAL format but with no formatting applied e.g. +41446681800. + enum PhoneNumberFormat { + E164, + INTERNATIONAL, + NATIONAL + }; + + // Type of phone numbers. + enum PhoneNumberType { + FIXED_LINE, + MOBILE, + // In some countries (e.g. the USA), it is impossible to distinguish between + // fixed-line and mobile numbers by looking at the phone number itself. + FIXED_LINE_OR_MOBILE, + // Freephone lines + TOLL_FREE, + PREMIUM_RATE, + // The cost of this call is shared between the caller and the recipient, and + // is hence typically less than PREMIUM_RATE calls. See + // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information. + SHARED_COST, + // Voice over IP numbers. This includes TSoIP (Telephony Service over IP). + VOIP, + // A personal number is associated with a particular person, and may be + // routed to either a MOBILE or FIXED_LINE number. Some more information can + // be found here: http://en.wikipedia.org/wiki/Personal_Numbers + PERSONAL_NUMBER, + PAGER, + // Used for "Universal Access Numbers" or "Company Numbers". They may be + // further routed to specific offices, but allow one number to be used for a + // company. + UAN, + // A phone number is of type UNKNOWN when it does not fit any of the known + // patterns for a specific country. + UNKNOWN + }; + + // Types of phone number matches. See detailed description beside the + // IsNumberMatch() method. + enum MatchType { + INVALID_NUMBER, // NOT_A_NUMBER in the java version. + NO_MATCH, + SHORT_NSN_MATCH, + NSN_MATCH, + EXACT_MATCH, + }; + + enum ErrorType { + NO_ERROR, + INVALID_COUNTRY_CODE_ERROR, // INVALID_COUNTRY_CODE in the java version. + NOT_A_NUMBER, + TOO_SHORT_AFTER_IDD, + TOO_SHORT_NSN, + TOO_LONG_NSN, // TOO_LONG in the java version. + }; + + // Possible outcomes when testing if a PhoneNumber is possible. + enum ValidationResult { + IS_POSSIBLE, + INVALID_COUNTRY_CODE, + TOO_SHORT, + TOO_LONG, + }; + + // Gets a PhoneNumberUtil instance to carry out international phone number + // formatting, parsing, or validation. The instance is loaded with phone + // number metadata for a number of most commonly used countries/regions, + // as specified by DEFAULT_REGIONS_. + // + // The PhoneNumberUtil is implemented as a singleton. Therefore, calling + // getInstance multiple times will only result in one instance being created. + static PhoneNumberUtil* GetInstance(); + + // Check whether country_code represents the country calling code from a + // country whose national significant number could contain a leading zero. An + // example of such a country is Italy. + static bool IsLeadingZeroCountry(int country_code); + + // Returns true if the number is a valid vanity (alpha) number such as 800 + // MICROSOFT. A valid vanity number will start with at least 3 digits and will + // have three or more alpha characters. This does not do region-specific + // checks - to work out if this number is actually valid for a region, it + // should be parsed and methods such as IsPossibleNumberWithRegion or + // IsValidNumber should be used. + bool IsAlphaNumber(const string& number) const; + + // Converts all alpha characters in a number to their respective digits on + // a keypad, but retains existing formatting. + void ConvertAlphaCharactersInNumber(string* number) const; + + // Normalizes a string of characters representing a phone number. This + // converts wide-ascii and arabic-indic numerals to European numerals, and + // strips punctuation and alpha characters. + static void NormalizeDigitsOnly(string* number); + + // Gets the national significant number of a phone number. Note a national + // significant number doesn't contain a national prefix or any formatting. + static void GetNationalSignificantNumber(const PhoneNumber& number, + string* national_significant_num); + + // Gets the length of the geographical area code from the PhoneNumber object + // passed in, so that clients could use it to split a national significant + // number into geographical area code and subscriber number. It works in such + // a way that the resultant subscriber number should be diallable, at least on + // some devices. An example of how this could be used: + // + // const PhoneNumberUtil& phone_util(PhoneNumberUtil::GetInstance()); + // PhoneNumber number; + // phone_util.Parse("16502530000", "US", &number); + // string national_significant_number; + // phone_util.GetNationalSignificantNumber(number, + // &national_significant_number); + // string area_code; + // string subscriber_number; + // + // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number); + // if (area_code_length > 0) { + // area_code = national_significant_number.substring(0, area_code_length); + // subscriber_number = national_significant_number.substring( + // area_code_length, string::npos); + // else { + // area_code = ""; + // subscriber_number = national_significant_number; + // } + // + // N.B.: area code is a very ambiguous concept, so the I18N team generally + // recommends against using it for most purposes, but recommends using the + // more general national_number instead. Read the following carefully before + // deciding to use this method: + // + // - geographical area codes change over time, and this method honors those + // changes; therefore, it doesn't guarantee the stability of the result it + // produces. + // - subscriber numbers may not be diallable from all devices (notably mobile + // devices, which typically requires the full national_number to be dialled + // in most countries). + // - most non-geographical numbers have no area codes. + // - some geographical numbers have no area codes. + int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const; + + // Gets the length of the national destination code (NDC) from the PhoneNumber + // object passed in, so that clients could use it to split a national + // significant number into NDC and subscriber number. The NDC of a phone + // number is normally the first group of digit(s) right after the country code + // when the number is formatted in the international format, if there is a + // subscriber number part that follows. An example of how this could be used: + // + // const PhoneNumberUtil& phone_util(PhoneNumberUtil::GetInstance()); + // PhoneNumber number; + // phone_util.Parse("16502530000", "US", &number); + // string national_significant_number; + // phone_util.GetNationalSignificantNumber(number, + // &national_significant_number); + // string national_destination_code; + // string subscriber_number; + // + // int national_destination_code_length = + // phone_util.GetLengthOfGeographicalAreaCode(number); + // if (national_destination_code_length > 0) { + // national_destination_code = national_significant_number.substring( + // 0, national_destination_code_length); + // subscriber_number = national_significant_number.substring( + // national_destination_code_length, string::npos); + // else { + // national_destination_code = ""; + // subscriber_number = national_significant_number; + // } + // + // Refer to the unittests to see the difference between this function and + // GetLengthOfGeographicalAreaCode(). + int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const; + + // Formats a phone number in the specified format using default rules. Note + // that this does not promise to produce a phone number that the user can + // dial from where they are - although we do format in either NATIONAL or + // INTERNATIONAL format depending on what the client asks for, we do not + // currently support a more abbreviated format, such as for users in the + // same area who could potentially dial the number without area code. + void Format(const PhoneNumber& number, + PhoneNumberFormat number_format, + string* formatted_number) const; + + // Formats a phone number in the specified format using client-defined + // formatting rules. + void FormatByPattern( + const PhoneNumber& number, + PhoneNumberFormat number_format, + const RepeatedPtrField& user_defined_formats, + string* formatted_number) const; + + // Formats a phone number in national format for dialing using the carrier as + // specified in the carrier_code. The carrier_code will always be used + // regardless of whether the phone number already has a preferred domestic + // carrier code stored. If carrier_code contains an empty string, return the + // number in national format without any carrier code. + void FormatNationalNumberWithCarrierCode(const PhoneNumber& number, + const string& carrier_code, + string* formatted_number) const; + + // Formats a phone number in national format for dialing using the carrier as + // specified in the preferred_domestic_carrier_code field of the PhoneNumber + // object passed in. If that is missing, use the fallback_carrier_code passed + // in instead. If there is no preferred_domestic_carrier_code, and the + // fallback_carrier_code contains an empty string, return the number in + // national format without any carrier code. + // + // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed + // in should take precedence over the number's preferred_domestic_carrier_code + // when formatting. + void FormatNationalNumberWithPreferredCarrierCode( + const PhoneNumber& number, + const string& fallback_carrier_code, + string* formatted_number) const; + + // Formats a phone number for out-of-country dialing purpose. + // The calling_from parameter is an ISO 3166-1 two-letter country code string. + // + // Note this function takes care of the case for calling inside of NANPA + // and between Russia and Kazakhstan (who share the same country code). + // In those cases, no international prefix is used. For countries which + // have multiple international prefixes, the number in its INTERNATIONAL + // format will be returned instead. + void FormatOutOfCountryCallingNumber( + const PhoneNumber& number, + const string& calling_from, + string* formatted_number) const; + + // Formats a phone number for out-of-country dialing purpose. + // The calling_from parameter is an ISO 3166-1 two-letter country code string. + // + // Note that in this version, if the number was entered originally using alpha + // characters and this version of the number is stored in raw_input, this + // representation of the number will be used rather than the digit + // representation. Grouping information as specified characters such as "-" + // and " " will be retained. + // + // Caveats: + // 1) This will not produce good results if the country code is both + // present in the raw input _and_ is the start of the national number. This + // is not a problem in the countries which typically use alpha numbers. + // 2) This will also not produce good results if the raw input has any + // grouping information within the first three digits of the national number, + // and if the function needs to strip preceding digits/words in the raw input + // before these digits. Normally people group the first three digits together + // so this is not a huge problem - and will be fixed if it proves to be so. + void FormatOutOfCountryKeepingAlphaChars( + const PhoneNumber& number, + const string& calling_from, + string* formatted_number) const; + + // Attempts to extract a valid number from a phone number that is too long to + // be valid, and resets the PhoneNumber object passed in to that valid + // version. If no valid number could be extracted, the PhoneNumber object + // passed in will not be modified. It returns true if a valid phone number can + // be successfully extracted. + bool TruncateTooLongNumber(PhoneNumber* number) const; + + // Gets the type of a phone number. + PhoneNumberType GetNumberType(const PhoneNumber& number) const; + + // Tests whether a phone number matches a valid pattern. Note this doesn't + // verify the number is actually in use, which is impossible to tell by just + // looking at a number itself. + bool IsValidNumber(const PhoneNumber& number) const; + + // Tests whether a phone number is valid for a certain region. Note this + // doesn't verify the number is actually in use, which is impossible to tell + // by just looking at a number itself. If the country calling code is not the + // same as the country code for the region, this immediately exits with false. + // After this, the specific number pattern rules for the region are examined. + // This is useful for determining for example whether a particular number is + // valid for Canada, rather than just a valid NANPA number. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool IsValidNumberForRegion( + const PhoneNumber& number, + const string& region_code) const; + + // Returns the country/region where a phone number is from. This could be + // used for geo-coding in the country/region level. + // + // The country/region is returned as an ISO 3166-1 two-letter country code + // string. + void GetRegionCodeForNumber(const PhoneNumber& number, + string* region_code) const; + + // Returns the country calling code for a specific region. For example, + // this would be 1 for the United States, and 64 for New Zealand. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + int GetCountryCodeForRegion(const string& region_code) const; + + // Returns the region code that matches the specific country code. Note that + // it is possible that several regions share the same country code (e.g. US + // and Canada), and in that case, only one of the regions (normally the one + // with the largest population) is returned. + // + // The region code is returned as an ISO 3166-1 two-letter country code + // string. + void GetRegionCodeForCountryCode(int country_code, string* region_code) const; + + // Check if a country is one of the countries under the North American + // Numbering Plan Administration (NANPA). + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool IsNANPACountry(const string& region_code) const; + + // Check whether a phone number is a possible number. It provides a more + // lenient check than IsValidNumber() in the following sense: + // 1. It only checks the length of phone numbers. In particular, it doesn't + // check starting digits of the number. + // 2. It doesn't attempt to figure out the type of the number, but uses + // general rules which applies to all types of phone numbers in a + // country. Therefore, it is much faster than IsValidNumber(). + // 3. For fixed line numbers, many countries have the concept of area code, + // which together with subscriber number constitute the national + // significant number. It is sometimes okay to dial the subscriber + // number only when dialing in the same area. This function will return + // true if the subscriber-number-only version is passed in. On the other + // hand, because IsValidNumber() validates using information on both + // starting digits (for fixed line numbers, that would most likely be + // area codes) and length (obviously includes the length of area codes + // for fixed line numbers), it will return false for the + // subscriber-number-only version. + ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const; + + // Convenience wrapper around IsPossibleNumberWithReason. Instead of returning + // the reason for failure, this method returns a boolean value. + bool IsPossibleNumber(const PhoneNumber& number) const; + + // Check whether a phone number is a possible number given a number in the + // form of a string, and the country where the number could be dialed from. + // It provides a more lenient check than IsValidNumber(). See + // IsPossibleNumber(const PhoneNumber& number) for details. + // + // This method first parses the number, then invokes + // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber + // object. + // + // countryDialingFrom represents the country that we are expecting the number + // to be dialed from. Note this is different from the country where the number + // belongs. For example, the number +1 650 253 0000 is a number that belongs + // to US. When written in this form, it could be dialed from any country. When + // it is written as 00 1 650 253 0000, it could be dialed from any country + // which uses an international dialling prefix of 00. When it is written as + // 650 253 0000, it could only be dialed from within the US, and when written + // as 253 0000, it could only be dialed from within a smaller area in the US + // (Mountain View, CA, to be more specific). + // + // The country_dialing_from parameter is an ISO 3166-1 two-letter country code + // string. + bool IsPossibleNumberForString( + const string& number, + const string& country_dialing_from) const; + + // Gets a valid fixed-line number for the specified country. Returns false if + // the country was unknown. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool GetExampleNumber(const string& region_code, + PhoneNumber* number) const; + + // Gets a valid number of the specified type for the specified country. + // Returns false if the country was unknown or if no example number of that + // type could be found. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool GetExampleNumberForType(const string& region_code, + PhoneNumberType type, + PhoneNumber* number) const; + + // Parses a string and returns it in proto buffer format. This method will + // return an error like INVALID_COUNTRY_CODE if the number is not considered + // to be a possible number, and NO_ERROR if it parsed correctly. Note that + // validation of whether the number is actually a valid number for a + // particular country/region is not performed. This can be done separately + // with IsValidNumber(). + // + // default_country represents the country that we are expecting the number to + // be from. This is only used if the number being parsed is not written in + // international format. The country code for the number in this case would + // be stored as that of the default country supplied. If the number is + // guaranteed to start with a '+' followed by the country code, then + // "ZZ" can be supplied. + // + // The default_country parameter is an ISO 3166-1 two-letter country code + // string. + ErrorType Parse(const string& number_to_parse, + const string& default_country, + PhoneNumber* number) const; + // Parses a string and returns it in proto buffer format. This method differs + // from Parse() in that it always populates the raw_input field of the + // protocol buffer with number_to_parse as well as the country_code_source + // field. + // + // The default_country parameter is an ISO 3166-1 two-letter country code + // string. + ErrorType ParseAndKeepRawInput(const string& number_to_parse, + const string& default_country, + PhoneNumber* number) const; + + // Takes two phone numbers and compares them for equality. + // + // Returns EXACT_MATCH if the country code, NSN, presence of a leading zero + // for Italian numbers and any extension present are the same. + // Returns NSN_MATCH if either or both has no country specified, and the NSNs + // and extensions are the same. + // Returns SHORT_NSN_MATCH if either or both has no country specified, or the + // country specified is the same, and one NSN could be a shorter version of + // the other number. This includes the case where one has an extension + // specified, and the other does not. Returns NO_MATCH otherwise. + // For example, the numbers +1 345 657 1234 and 657 1234 are a + // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH. + MatchType IsNumberMatch(const PhoneNumber& first_number, + const PhoneNumber& second_number) const; + + // Takes two phone numbers as strings and compares them for equality. This + // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, + // PhoneNumber secondNumber). No default region is known. + // Returns INVALID_NUMBER if either number cannot be parsed into a phone + // number. + MatchType IsNumberMatchWithTwoStrings(const string& first_number, + const string& second_number) const; + + // Takes two phone numbers and compares them for equality. This is a + // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, + // PhoneNumber secondNumber). No default region is known. + // Returns INVALID_NUMBER if second_number cannot be parsed into a phone + // number. + MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number, + const string& second_number) const; + + // Implement this 'interface' to override the way metadatas are fetched. + // Useful for testing injecting stable metadatas. + class MetadataProvider { + public: + virtual ~MetadataProvider() {} + + // Returns a pair containing a pointer to the data and its size + virtual pair operator()() = 0; + }; + + // Override the default logging system. The provided adapter destruction is + // handled by this class (don't delete it). + static void SetLoggerAdapter(LoggerAdapter* logger_adapter); + + friend bool ConvertFromTelephoneNumberProto( + const TelephoneNumber& proto_to_convert, + PhoneNumber* new_proto); + friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert, + TelephoneNumber* resulting_proto); + private: + typedef pair*> IntRegionsPair; + + // The minimum and maximum length of the national significant number. + static const unsigned int kMinLengthForNsn = 3; + static const unsigned int kMaxLengthForNsn = 15; + + // A mapping from a country code to a region code which denotes the + // country/region represented by that country code. Note countries under + // NANPA share the country code 1 and Russia and Kazakhstan share the country + // code 7. Under this map, 1 is mapped to region code "US" and 7 is mapped to + // region code "RU". This is implemented as a sorted vector to achieve better + // performance. + // + // Region codes are ISO 3166-1 two-letter country code strings. + scoped_ptr > country_code_to_region_code_map_; + + struct CompareFirst { + bool operator()(const IntRegionsPair& p1, + const IntRegionsPair& p2) const { + return p1.first < p2.first; + } + }; + + // The set of countries that share country code 1. + scoped_ptr > nanpa_countries_; + static const int kNanpaCountryCode = 1; + + // A mapping from a region code to a PhoneMetadata for that region. The map + // contains PhoneMetadata for the DEFAULT_REGIONS and countries/regions that + // have been recently used. This is like a cache for performance reasons. + // + // Region codes are ISO 3166-1 two-letter country code strings. + scoped_ptr > country_to_metadata_map_; + + bool LoadMetadata(PhoneMetadataCollection* metadata, + MetadataProvider& provider); + + explicit PhoneNumberUtil(MetadataProvider* provider = 0); + ~PhoneNumberUtil(); + + // Helper function to check region code is not unknown or null. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool IsValidRegionCode(const string& region_code) const; + + // Helper function to check region code is not unknown. The country_code and + // number supplied is used only for the resultant log message. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + bool HasValidRegionCode(const string& region_code, + int country_code, + const string& number) const; + + // The region_code parameter is an ISO 3166-1 two-letter country code string. + const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion( + const string& region_code) const; + + void GetRegionCodesForCountryCode( + int country_code, + list* region_codes) const; + + // Simple wrapper of FormatNationalNumberWithCarrier for the common case of + // no carrier code. + // + // The region_code parameter is an ISO 3166-1 two-letter country code string. + void FormatNationalNumber(const string& number, + const string& region_code, + PhoneNumberFormat number_format, + string* formatted_number) const; + + // The region_code parameter is an ISO 3166-1 two-letter country code string. + void FormatNationalNumberWithCarrier(const string& number, + const string& region_code, + PhoneNumberFormat number_format, + const string& carrier_code, + string* formatted_number) const; + + // The region_code parameter is an ISO 3166-1 two-letter country code string. + void MaybeGetFormattedExtension( + const PhoneNumber& number, + const string& region_code, + string* extension) const; + + // The region_code parameter is an ISO 3166-1 two-letter country code string. + void FormatExtension(const string& extension_digits, + const string& region_code, + string* extension) const; + + void GetRegionCodeForNumberFromRegionList( + const PhoneNumber& number, + const list& region_codes, + string* region_code) const; + + void Normalize(string* number) const; + PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize( + const string& possible_idd_prefix, + string* number) const; + + static void MaybeStripNationalPrefixAndCarrierCode( + const PhoneMetadata& metadata, + string* number, + string* carrier_code); + + static void ExtractPossibleNumber(const string& number, + string* extracted_number); + + static bool IsViablePhoneNumber(const string& number); + + static bool MaybeStripExtension(string* number, string* extension); + + int ExtractCountryCode(string* national_number) const; + ErrorType MaybeExtractCountryCode( + const PhoneMetadata* default_region_metadata, + bool keepRawInput, + string* national_number, + PhoneNumber* phone_number) const; + + // The default_country parameter is an ISO 3166-1 two-letter country code + // string. + bool CheckRegionForParsing( + const string& number_to_parse, + const string& default_country) const; + + // The default_country parameter is an ISO 3166-1 two-letter country code + // string. + ErrorType ParseHelper(const string& number_to_parse, + const string& default_country, + bool keep_raw_input, + bool check_region, + PhoneNumber* phone_number) const; + + DISALLOW_COPY_AND_ASSIGN(PhoneNumberUtil); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ diff --git a/cpp/src/phonenumberutil_test.cc b/cpp/src/phonenumberutil_test.cc index 70b786d12..ff01f8cfe 100644 --- a/cpp/src/phonenumberutil_test.cc +++ b/cpp/src/phonenumberutil_test.cc @@ -1 +1,2598 @@ -// TODO +// Copyright (C) 2009 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Shaopeng Jia +// Open-sourced by: Philippe Liard + +#include +#include + +#include "phonenumberutil.h" +#include "test_metadata.h" +#include "phonemetadata.pb.h" +#include "phonenumber.pb.h" + +namespace i18n { +namespace phonenumbers { + +using std::make_pair; + +using google::protobuf::RepeatedPtrField; + +class TestMetadataProvider : public PhoneNumberUtil::MetadataProvider { + public: + virtual ~TestMetadataProvider () {} + + pair operator()() { + return make_pair(test_metadata_get(), test_metadata_size()); + } +}; + +class PhoneNumberUtilTest : public testing::Test { + protected: + PhoneNumberUtilTest() : phone_util_(&provider_) {} + + // Wrapper functions for private functions that we want to test. + const PhoneMetadata* GetPhoneMetadata(const string& region_code) const { + return phone_util_.GetMetadataForRegion(region_code); + } + + void ExtractPossibleNumber(const string& number, + string* extracted_number) const { + PhoneNumberUtil::ExtractPossibleNumber(number, extracted_number); + } + + bool IsViablePhoneNumber(const string& number) const { + return PhoneNumberUtil::IsViablePhoneNumber(number); + } + + void Normalize(string* number) const { + phone_util_.Normalize(number); + } + + PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize( + const string& possible_idd_prefix, + string* number) const { + return phone_util_.MaybeStripInternationalPrefixAndNormalize( + possible_idd_prefix, + number); + } + + void MaybeStripNationalPrefixAndCarrierCode(const PhoneMetadata& metadata, + string* number, + string* carrier_code) const { + PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode(metadata, number, + carrier_code); + } + + bool MaybeStripExtension(string* number, string* extension) const { + return PhoneNumberUtil::MaybeStripExtension(number, extension); + } + + PhoneNumberUtil::ErrorType MaybeExtractCountryCode( + const PhoneMetadata* default_region_metadata, + bool keep_raw_input, + string* national_number, + PhoneNumber* phone_number) const { + return phone_util_.MaybeExtractCountryCode(default_region_metadata, + keep_raw_input, + national_number, + phone_number); + } + + TestMetadataProvider provider_; + PhoneNumberUtil phone_util_; +}; + +TEST_F(PhoneNumberUtilTest, GetInstanceLoadUSMetadata) { + const PhoneMetadata* metadata = GetPhoneMetadata("US"); + EXPECT_EQ("US", metadata->id()); + EXPECT_EQ(1, metadata->country_code()); + EXPECT_EQ("011", metadata->international_prefix()); + EXPECT_TRUE(metadata->has_national_prefix()); + ASSERT_EQ(2, metadata->number_format_size()); + EXPECT_EQ("(\\d{3})(\\d{3})(\\d{4})", + metadata->number_format(0).pattern()); + EXPECT_EQ("$1 $2 $3", metadata->number_format(0).format()); + EXPECT_EQ("[13-9]\\d{9}|2[0-35-9]\\d{8}", + metadata->general_desc().national_number_pattern()); + EXPECT_EQ("\\d{7,10}", metadata->general_desc().possible_number_pattern()); + EXPECT_EQ(metadata->general_desc().DebugString(), + metadata->fixed_line().DebugString()); + + EXPECT_EQ("\\d{10}", metadata->toll_free().possible_number_pattern()); + EXPECT_EQ("900\\d{7}", metadata->premium_rate().national_number_pattern()); + // No shared-cost data is available, so it should be initialised to "NA". + EXPECT_EQ("NA", metadata->shared_cost().national_number_pattern()); + EXPECT_EQ("NA", metadata->shared_cost().possible_number_pattern()); +} + +TEST_F(PhoneNumberUtilTest, GetInstanceLoadDEMetadata) { + const PhoneMetadata* metadata = GetPhoneMetadata("DE"); + EXPECT_EQ("DE", metadata->id()); + EXPECT_EQ(49, metadata->country_code()); + EXPECT_EQ("00", metadata->international_prefix()); + EXPECT_EQ("0", metadata->national_prefix()); + ASSERT_EQ(6, metadata->number_format_size()); + EXPECT_EQ(1, metadata->number_format(5).leading_digits_pattern_size()); + EXPECT_EQ("900", metadata->number_format(5).leading_digits_pattern(0)); + EXPECT_EQ("(\\d{3})(\\d{3,4})(\\d{4})", + metadata->number_format(5).pattern()); + EXPECT_EQ("$1 $2 $3", metadata->number_format(5).format()); + EXPECT_EQ("(?:[24-6]\\d{2}|3[03-9]\\d|[789](?:[1-9]\\d|0[2-9]))\\d{3,8}", + metadata->fixed_line().national_number_pattern()); + EXPECT_EQ("\\d{2,14}", metadata->fixed_line().possible_number_pattern()); + EXPECT_EQ("30123456", metadata->fixed_line().example_number()); + EXPECT_EQ("\\d{10}", metadata->toll_free().possible_number_pattern()); + EXPECT_EQ("900([135]\\d{6}|9\\d{7})", + metadata->premium_rate().national_number_pattern()); +} + +TEST_F(PhoneNumberUtilTest, GetInstanceLoadARMetadata) { + const PhoneMetadata* metadata = GetPhoneMetadata("AR"); + EXPECT_EQ("AR", metadata->id()); + EXPECT_EQ(54, metadata->country_code()); + EXPECT_EQ("00", metadata->international_prefix()); + EXPECT_EQ("0", metadata->national_prefix()); + EXPECT_EQ("0(?:(11|343|3715)15)?", metadata->national_prefix_for_parsing()); + EXPECT_EQ("9$1", metadata->national_prefix_transform_rule()); + ASSERT_EQ(5, metadata->number_format_size()); + EXPECT_EQ("$1 15 $2-$3", metadata->number_format(2).format()); + EXPECT_EQ("9(\\d{4})(\\d{2})(\\d{4})", metadata->number_format(3).pattern()); + EXPECT_EQ("(9)(\\d{4})(\\d{2})(\\d{4})", + metadata->intl_number_format(3).pattern()); + EXPECT_EQ("$1 $2 $3 $4", metadata->intl_number_format(3).format()); +} + +TEST_F(PhoneNumberUtilTest, GetNationalSignificantNumber) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(6502530000ULL); + string national_significant_number; + phone_util_.GetNationalSignificantNumber(number, + &national_significant_number); + EXPECT_EQ("6502530000", national_significant_number); + + // An Italian mobile number. + national_significant_number.clear(); + number.set_country_code(39); + number.set_national_number(312345678ULL); + phone_util_.GetNationalSignificantNumber(number, + &national_significant_number); + EXPECT_EQ("312345678", national_significant_number); + + // An Italian fixed line number. + national_significant_number.clear(); + number.set_country_code(39); + number.set_national_number(236618300ULL); + number.set_italian_leading_zero(true); + phone_util_.GetNationalSignificantNumber(number, + &national_significant_number); + EXPECT_EQ("0236618300", national_significant_number); +} + +TEST_F(PhoneNumberUtilTest, GetExampleNumber) { + PhoneNumber de_number; + de_number.set_country_code(49); + de_number.set_national_number(30123456ULL); + PhoneNumber test_number; + bool success = phone_util_.GetExampleNumber("DE", &test_number); + EXPECT_TRUE(success); + EXPECT_EQ(test_number.DebugString(), de_number.DebugString()); + success = phone_util_.GetExampleNumberForType("DE", + PhoneNumberUtil::FIXED_LINE, + &test_number); + EXPECT_TRUE(success); + EXPECT_EQ(test_number.DebugString(), de_number.DebugString()); + test_number.Clear(); + success = phone_util_.GetExampleNumberForType("DE", + PhoneNumberUtil::MOBILE, + &test_number); + // Here we test that an example number was not returned, and that the number + // passed in was not modified. + EXPECT_FALSE(success); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + // For the US, the example number is placed under general description, and + // hence should be used for both fixed line and mobile, so neither of these + // should return null. + test_number.Clear(); + success = phone_util_.GetExampleNumberForType("US", + PhoneNumberUtil::FIXED_LINE, + &test_number); + // Here we test that the call to get an example number succeeded, and that the + // number passed in was modified. + EXPECT_TRUE(success); + EXPECT_NE(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + test_number.Clear(); + success = phone_util_.GetExampleNumberForType("US", + PhoneNumberUtil::MOBILE, + &test_number); + EXPECT_TRUE(success); + EXPECT_NE(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); +} + +TEST_F(PhoneNumberUtilTest, FormatUSNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(1); + test_number.set_national_number(6502530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("650 253 0000", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 650 253 0000", formatted_number); + + test_number.set_national_number(8002530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("800 253 0000", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 800 253 0000", formatted_number); + + test_number.set_national_number(9002530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("900 253 0000", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 900 253 0000", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatBSNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(1); + test_number.set_national_number(2421234567ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("242 123 4567", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 242 123 4567", formatted_number); + + test_number.set_national_number(8002530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("800 253 0000", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 800 253 0000", formatted_number); + + test_number.set_national_number(9002530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("900 253 0000", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+1 900 253 0000", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatGBNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(44); + test_number.set_national_number(2087389353ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("(020) 8738 9353", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+44 20 8738 9353", formatted_number); + + test_number.set_national_number(7912345678ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("(07912) 345 678", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+44 7912 345 678", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatDENumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(49); + test_number.set_national_number(301234ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("030 1234", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 30 1234", formatted_number); + + test_number.set_national_number(291123ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("0291 123", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 291 123", formatted_number); + + test_number.set_national_number(29112345678ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("0291 12345678", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 291 12345678", formatted_number); + + test_number.set_national_number(9123123ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("09123 123", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 9123 123", formatted_number); + + test_number.set_national_number(80212345ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("08021 2345", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 8021 2345", formatted_number); + + test_number.set_national_number(1234ULL); + // Note this number is correctly formatted without national prefix. Most of + // the numbers that are treated as invalid numbers by the library are short + // numbers, and they are usually not dialed with national prefix. + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("1234", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+49 1234", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatITNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(39); + test_number.set_national_number(236618300ULL); + test_number.set_italian_leading_zero(true); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("02 3661 8300", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+39 02 3661 8300", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+390236618300", formatted_number); + + test_number.set_national_number(345678901ULL); + test_number.set_italian_leading_zero(false); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("345 678 901", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+39 345 678 901", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+39345678901", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatAUNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(61); + test_number.set_national_number(236618300ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("02 3661 8300", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+61 2 3661 8300", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+61236618300", formatted_number); + + test_number.set_national_number(1800123456ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("1800 123 456", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+61 1800 123 456", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+611800123456", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatARNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(54); + test_number.set_national_number(1187654321ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("011 8765-4321", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+54 11 8765-4321", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+541187654321", formatted_number); + + test_number.set_national_number(91187654321ULL); + phone_util_.Format(test_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("011 15 8765-4321", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+54 9 11 8765 4321", formatted_number); + phone_util_.Format(test_number, PhoneNumberUtil::E164, + &formatted_number); + EXPECT_EQ("+5491187654321", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatOutOfCountryCallingNumber) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(1); + test_number.set_national_number(9002530000ULL); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "DE", + &formatted_number); + EXPECT_EQ("00 1 900 253 0000", formatted_number); + + test_number.set_national_number(6502530000ULL); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "BS", + &formatted_number); + EXPECT_EQ("1 650 253 0000", formatted_number); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "PL", + &formatted_number); + EXPECT_EQ("0~0 1 650 253 0000", formatted_number); + + test_number.set_country_code(44); + test_number.set_national_number(7912345678ULL); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "US", + &formatted_number); + EXPECT_EQ("011 44 7912 345 678", formatted_number); + + test_number.set_country_code(49); + test_number.set_national_number(1234ULL); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "GB", + &formatted_number); + EXPECT_EQ("00 49 1234", formatted_number); + // Note this number is correctly formatted without national prefix. Most of + // the numbers that are treated as invalid numbers by the library are short + // numbers, and they are usually not dialed with national prefix. + phone_util_.FormatOutOfCountryCallingNumber(test_number, "DE", + &formatted_number); + EXPECT_EQ("1234", formatted_number); + + test_number.set_country_code(39); + test_number.set_national_number(236618300ULL); + test_number.set_italian_leading_zero(true); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "US", + &formatted_number); + EXPECT_EQ("011 39 02 3661 8300", formatted_number); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "IT", + &formatted_number); + EXPECT_EQ("02 3661 8300", formatted_number); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "SG", + &formatted_number); + EXPECT_EQ("+39 02 3661 8300", formatted_number); + + test_number.set_country_code(65); + test_number.set_national_number(94777892ULL); + test_number.set_italian_leading_zero(false); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "SG", + &formatted_number); + EXPECT_EQ("9477 7892", formatted_number); + + test_number.set_country_code(54); + test_number.set_national_number(91187654321ULL); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "US", + &formatted_number); + EXPECT_EQ("011 54 9 11 8765 4321", formatted_number); + + test_number.set_extension("1234"); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "US", + &formatted_number); + EXPECT_EQ("011 54 9 11 8765 4321 ext. 1234", formatted_number); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "AU", + &formatted_number); + EXPECT_EQ("0011 54 9 11 8765 4321 ext. 1234", formatted_number); + phone_util_.FormatOutOfCountryCallingNumber(test_number, "AR", + &formatted_number); + EXPECT_EQ("011 15 8765-4321 ext. 1234", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatOutOfCountryWithPreferredIntlPrefix) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(39); + test_number.set_national_number(236618300ULL); + test_number.set_italian_leading_zero(true); + // This should use 0011, since that is the preferred international prefix + // (both 0011 and 0012 are accepted as possible international prefixes in our + // test metadta.) + phone_util_.FormatOutOfCountryCallingNumber(test_number, "AU", + &formatted_number); + EXPECT_EQ("0011 39 02 3661 8300", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatOutOfCountryKeepingAlphaChars) { + PhoneNumber alpha_numeric_number; + string formatted_number; + alpha_numeric_number.set_country_code(1); + alpha_numeric_number.set_national_number(8007493524ULL); + alpha_numeric_number.set_raw_input("1800 six-flag"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("0011 1 800 SIX-FLAG", formatted_number); + + formatted_number.clear(); + alpha_numeric_number.set_raw_input("1-800-SIX-flag"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("0011 1 800-SIX-FLAG", formatted_number); + + formatted_number.clear(); + alpha_numeric_number.set_raw_input("Call us from UK: 00 1 800 SIX-flag"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("0011 1 800 SIX-FLAG", formatted_number); + + formatted_number.clear(); + alpha_numeric_number.set_raw_input("800 SIX-flag"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("0011 1 800 SIX-FLAG", formatted_number); + + // Formatting from within the NANPA region. + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "US", + &formatted_number); + EXPECT_EQ("1 800 SIX-FLAG", formatted_number); + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "BS", + &formatted_number); + EXPECT_EQ("1 800 SIX-FLAG", formatted_number); + + // Testing that if the raw input doesn't exist, it is formatted using + // FormatOutOfCountryCallingNumber. + alpha_numeric_number.clear_raw_input(); + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "DE", + &formatted_number); + EXPECT_EQ("00 1 800 749 3524", formatted_number); + + // Testing AU alpha number formatted from Australia. + alpha_numeric_number.set_country_code(61); + alpha_numeric_number.set_national_number(827493524ULL); + alpha_numeric_number.set_raw_input("+61 82749-FLAG"); + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + // This number should have the national prefix prefixed. + EXPECT_EQ("082749-FLAG", formatted_number); + + alpha_numeric_number.set_raw_input("082749-FLAG"); + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("082749-FLAG", formatted_number); + + alpha_numeric_number.set_national_number(18007493524ULL); + alpha_numeric_number.set_raw_input("1-800-SIX-flag"); + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "AU", + &formatted_number); + // This number should not have the national prefix prefixed, in accordance + // with the override for this specific formatting rule. + EXPECT_EQ("1-800-SIX-FLAG", formatted_number); + // The metadata should not be permanently changed, since we copied it before + // modifying patterns. Here we check this. + formatted_number.clear(); + alpha_numeric_number.set_national_number(1800749352ULL); + phone_util_.FormatOutOfCountryCallingNumber(alpha_numeric_number, + "AU", + &formatted_number); + EXPECT_EQ("1800 749 352", formatted_number); + + // Testing a country with multiple international prefixes. + formatted_number.clear(); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "SG", + &formatted_number); + EXPECT_EQ("+61 1-800-SIX-FLAG", formatted_number); + + // Testing the case with an invalid country code. + formatted_number.clear(); + alpha_numeric_number.set_country_code(0); + alpha_numeric_number.set_national_number(18007493524ULL); + alpha_numeric_number.set_raw_input("1-800-SIX-flag"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "DE", + &formatted_number); + // Uses the raw input only. + EXPECT_EQ("1-800-SIX-flag", formatted_number); + + // Testing the case of an invalid alpha number. + formatted_number.clear(); + alpha_numeric_number.set_country_code(1); + alpha_numeric_number.set_national_number(80749ULL); + alpha_numeric_number.set_raw_input("180-SIX"); + phone_util_.FormatOutOfCountryKeepingAlphaChars(alpha_numeric_number, + "DE", + &formatted_number); + // No country-code stripping can be done. + EXPECT_EQ("00 1 180-SIX", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatWithCarrierCode) { + // We only support this for AR in our test metadata. + PhoneNumber ar_number; + string formatted_number; + ar_number.set_country_code(54); + ar_number.set_national_number(91234125678ULL); + phone_util_.Format(ar_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("01234 12-5678", formatted_number); + // Test formatting with a carrier code. + phone_util_.FormatNationalNumberWithCarrierCode(ar_number, "15", + &formatted_number); + EXPECT_EQ("01234 15 12-5678", formatted_number); + phone_util_.FormatNationalNumberWithCarrierCode(ar_number, "", + &formatted_number); + EXPECT_EQ("01234 12-5678", formatted_number); + // Here the international rule is used, so no carrier code should be present. + phone_util_.Format(ar_number, PhoneNumberUtil::E164, &formatted_number); + EXPECT_EQ("+5491234125678", formatted_number); + // We don't support this for the US so there should be no change. + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(4241231234ULL); + phone_util_.Format(us_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("424 123 1234", formatted_number); + phone_util_.FormatNationalNumberWithCarrierCode(us_number, "15", + &formatted_number); + EXPECT_EQ("424 123 1234", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatWithPreferredCarrierCode) { + // We only support this for AR in our test metadata. + PhoneNumber ar_number; + string formatted_number; + ar_number.set_country_code(54); + ar_number.set_national_number(91234125678ULL); + // Test formatting with no preferred carrier code stored in the number itself. + phone_util_.FormatNationalNumberWithPreferredCarrierCode(ar_number, "15", + &formatted_number); + EXPECT_EQ("01234 15 12-5678", formatted_number); + phone_util_.FormatNationalNumberWithPreferredCarrierCode(ar_number, "", + &formatted_number); + EXPECT_EQ("01234 12-5678", formatted_number); + // Test formatting with preferred carrier code present. + ar_number.set_preferred_domestic_carrier_code("19"); + phone_util_.Format(ar_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("01234 12-5678", formatted_number); + phone_util_.FormatNationalNumberWithPreferredCarrierCode(ar_number, "15", + &formatted_number); + EXPECT_EQ("01234 19 12-5678", formatted_number); + phone_util_.FormatNationalNumberWithPreferredCarrierCode(ar_number, "", + &formatted_number); + EXPECT_EQ("01234 19 12-5678", formatted_number); + // When the preferred_domestic_carrier_code is present (even when it contains + // an empty string), use it instead of the default carrier code passed in. + ar_number.set_preferred_domestic_carrier_code(""); + phone_util_.FormatNationalNumberWithPreferredCarrierCode(ar_number, "15", + &formatted_number); + EXPECT_EQ("01234 12-5678", formatted_number); + // We don't support this for the US so there should be no change. + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(4241231234ULL); + us_number.set_preferred_domestic_carrier_code("99"); + phone_util_.Format(us_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("424 123 1234", formatted_number); + phone_util_.FormatNationalNumberWithPreferredCarrierCode(us_number, "15", + &formatted_number); + EXPECT_EQ("424 123 1234", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatByPattern) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(1); + test_number.set_national_number(6502530000ULL); + + RepeatedPtrField number_formats; + NumberFormat* number_format = number_formats.Add(); + number_format->set_pattern("(\\d{3})(\\d{3})(\\d{4})"); + number_format->set_format("($1) $2-$3"); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("(650) 253-0000", formatted_number); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::INTERNATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("+1 (650) 253-0000", formatted_number); + + // $NP is set to '1' for the US. Here we check that for other NANPA countries + // the US rules are followed. + number_format->set_national_prefix_formatting_rule("$NP ($FG)"); + number_format->set_format("$1 $2-$3"); + test_number.set_country_code(1); + test_number.set_national_number(4168819999ULL); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("1 (416) 881-9999", formatted_number); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::INTERNATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("+1 416 881-9999", formatted_number); + + test_number.set_country_code(39); + test_number.set_national_number(236618300ULL); + test_number.set_italian_leading_zero(true); + number_format->set_pattern("(\\d{2})(\\d{5})(\\d{3})"); + number_format->set_format("$1-$2 $3"); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("02-36618 300", formatted_number); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::INTERNATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("+39 02-36618 300", formatted_number); + + test_number.set_country_code(44); + test_number.set_national_number(2012345678ULL); + test_number.set_italian_leading_zero(false); + number_format->set_national_prefix_formatting_rule("$NP$FG"); + number_format->set_pattern("(\\d{2})(\\d{4})(\\d{4})"); + number_format->set_format("$1 $2 $3"); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("020 1234 5678", formatted_number); + + number_format->set_national_prefix_formatting_rule("($NP$FG)"); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("(020) 1234 5678", formatted_number); + number_format->set_national_prefix_formatting_rule(""); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::NATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("20 1234 5678", formatted_number); + number_format->set_national_prefix_formatting_rule(""); + phone_util_.FormatByPattern(test_number, PhoneNumberUtil::INTERNATIONAL, + number_formats, + &formatted_number); + EXPECT_EQ("+44 20 1234 5678", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, FormatE164Number) { + PhoneNumber test_number; + string formatted_number; + test_number.set_country_code(1); + test_number.set_national_number(6502530000ULL); + phone_util_.Format(test_number, PhoneNumberUtil::E164, &formatted_number); + EXPECT_EQ("+16502530000", formatted_number); + + test_number.set_country_code(49); + test_number.set_national_number(301234ULL); + phone_util_.Format(test_number, PhoneNumberUtil::E164, &formatted_number); + EXPECT_EQ("+49301234", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, GetLengthOfGeographicalAreaCode) { + PhoneNumber number; + // Google MTV, which has area code "650". + number.set_country_code(1); + number.set_national_number(6502530000ULL); + EXPECT_EQ(3, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // A North America toll-free number, which has no area code. + number.set_country_code(1); + number.set_national_number(8002530000ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // An invalid US number (1 digit shorter), which has no area code. + number.set_country_code(1); + number.set_national_number(650253000ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // Google London, which has area code "20". + number.set_country_code(44); + number.set_national_number(2070313000ULL); + EXPECT_EQ(2, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // A UK mobile phone, which has no area code. + number.set_country_code(44); + number.set_national_number(7123456789ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // Google Buenos Aires, which has area code "11". + number.set_country_code(54); + number.set_national_number(1155303000ULL); + EXPECT_EQ(2, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // Google Sydney, which has area code "2". + number.set_country_code(61); + number.set_national_number(293744000ULL); + EXPECT_EQ(1, phone_util_.GetLengthOfGeographicalAreaCode(number)); + + // Google Singapore. Singapore has no area code and no national prefix. + number.set_country_code(65); + number.set_national_number(65218000ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfGeographicalAreaCode(number)); +} + +TEST_F(PhoneNumberUtilTest, GetLengthOfNationalDestinationCode) { + PhoneNumber number; + // Google MTV, which has national destination code (NDC) "650". + number.set_country_code(1); + number.set_national_number(6502530000ULL); + EXPECT_EQ(3, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // A North America toll-free number, which has NDC "800". + number.set_country_code(1); + number.set_national_number(8002530000ULL); + EXPECT_EQ(3, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // Google London, which has NDC "20". + number.set_country_code(44); + number.set_national_number(2070313000ULL); + EXPECT_EQ(2, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // A UK mobile phone, which has NDC "7123" + number.set_country_code(44); + number.set_national_number(7123456789ULL); + EXPECT_EQ(4, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // Google Buenos Aires, which has NDC "11". + number.set_country_code(54); + number.set_national_number(1155303000ULL); + EXPECT_EQ(2, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // Google Sydney, which has NDC "2". + number.set_country_code(61); + number.set_national_number(293744000ULL); + EXPECT_EQ(1, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // Google Singapore. Singapore has NDC "6521". + number.set_country_code(65); + number.set_national_number(65218000ULL); + EXPECT_EQ(4, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // An invalid US number (1 digit shorter), which has no NDC. + number.set_country_code(1); + number.set_national_number(650253000ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // A number containing an invalid country code, which shouldn't have any NDC. + number.set_country_code(123); + number.set_national_number(650253000ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // A number that has only one group of digits after country code when + // formatted in the international format. + number.set_country_code(376); + number.set_national_number(12345ULL); + EXPECT_EQ(0, phone_util_.GetLengthOfNationalDestinationCode(number)); + + // The same number above, but with an extension. + number.set_country_code(376); + number.set_national_number(12345ULL); + number.set_extension("321"); + EXPECT_EQ(0, phone_util_.GetLengthOfNationalDestinationCode(number)); +} + +TEST_F(PhoneNumberUtilTest, ExtractPossibleNumber) { + // Removes preceding funky punctuation and letters but leaves the rest + // untouched. + string extracted_number; + ExtractPossibleNumber("Tel:0800-345-600", &extracted_number); + EXPECT_EQ("0800-345-600", extracted_number); + ExtractPossibleNumber("Tel:0800 FOR PIZZA", &extracted_number); + EXPECT_EQ("0800 FOR PIZZA", extracted_number); + + // Should not remove plus sign. + ExtractPossibleNumber("Tel:+800-345-600", &extracted_number); + EXPECT_EQ("+800-345-600", extracted_number); + // Should recognise wide digits as possible start values. + ExtractPossibleNumber("023", &extracted_number); + EXPECT_EQ("023", extracted_number); + // Dashes are not possible start values and should be removed. + ExtractPossibleNumber("Num-123", &extracted_number); + EXPECT_EQ("123", extracted_number); + // If not possible number present, return empty string. + ExtractPossibleNumber("Num-....", &extracted_number); + EXPECT_EQ("", extracted_number); + // Leading brackets are stripped - these are not used when parsing. + ExtractPossibleNumber("(650) 253-0000", &extracted_number); + EXPECT_EQ("650) 253-0000", extracted_number); + + // Trailing non-alpha-numeric characters should be removed. + ExtractPossibleNumber("(650) 253-0000..- ..", &extracted_number); + EXPECT_EQ("650) 253-0000", extracted_number); + ExtractPossibleNumber("(650) 253-0000.", &extracted_number); + EXPECT_EQ("650) 253-0000", extracted_number); + // This case has a trailing RTL char. + ExtractPossibleNumber("(650) 253-0000‏", &extracted_number); + EXPECT_EQ("650) 253-0000", extracted_number); +} + +TEST_F(PhoneNumberUtilTest, IsNANPACountry) { + EXPECT_TRUE(phone_util_.IsNANPACountry("US")); + EXPECT_TRUE(phone_util_.IsNANPACountry("BS")); +} + +TEST_F(PhoneNumberUtilTest, IsValidNumber) { + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(6502530000ULL); + EXPECT_TRUE(phone_util_.IsValidNumber(us_number)); + + PhoneNumber it_number; + it_number.set_country_code(39); + it_number.set_national_number(236618300ULL); + it_number.set_italian_leading_zero(true); + EXPECT_TRUE(phone_util_.IsValidNumber(it_number)); + + PhoneNumber gb_number; + gb_number.set_country_code(44); + gb_number.set_national_number(7912345678ULL); + EXPECT_TRUE(phone_util_.IsValidNumber(gb_number)); + + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(21387835ULL); + EXPECT_TRUE(phone_util_.IsValidNumber(nz_number)); +} + +TEST_F(PhoneNumberUtilTest, IsValidForRegion) { + // This number is valid for the Bahamas, but is not a valid US number. + PhoneNumber bs_number; + bs_number.set_country_code(1); + bs_number.set_national_number(2423232345ULL); + EXPECT_TRUE(phone_util_.IsValidNumber(bs_number)); + EXPECT_TRUE(phone_util_.IsValidNumberForRegion(bs_number, "BS")); + EXPECT_FALSE(phone_util_.IsValidNumberForRegion(bs_number, "US")); + bs_number.set_national_number(2421232345ULL); + // This number is no longer valid. + EXPECT_FALSE(phone_util_.IsValidNumber(bs_number)); + + // La Mayotte and Réunion use 'leadingDigits' to differentiate them. + PhoneNumber re_number; + re_number.set_country_code(262); + re_number.set_national_number(262123456ULL); + EXPECT_TRUE(phone_util_.IsValidNumber(re_number)); + EXPECT_TRUE(phone_util_.IsValidNumberForRegion(re_number, "RE")); + EXPECT_FALSE(phone_util_.IsValidNumberForRegion(re_number, "YT")); + // Now change the number to be a number for La Mayotte. + re_number.set_national_number(269601234ULL); + EXPECT_TRUE(phone_util_.IsValidNumberForRegion(re_number, "YT")); + EXPECT_FALSE(phone_util_.IsValidNumberForRegion(re_number, "RE")); + // This number is no longer valid. + re_number.set_national_number(269123456ULL); + EXPECT_FALSE(phone_util_.IsValidNumberForRegion(re_number, "YT")); + EXPECT_FALSE(phone_util_.IsValidNumberForRegion(re_number, "RE")); + EXPECT_FALSE(phone_util_.IsValidNumber(re_number)); + // However, it should be recognised as from La Mayotte. + string region_code; + phone_util_.GetRegionCodeForNumber(re_number, ®ion_code); + EXPECT_EQ("YT", region_code); + // This number is valid in both places. + re_number.set_national_number(800123456ULL); + EXPECT_TRUE(phone_util_.IsValidNumberForRegion(re_number, "YT")); + EXPECT_TRUE(phone_util_.IsValidNumberForRegion(re_number, "RE")); +} + +TEST_F(PhoneNumberUtilTest, IsNotValidNumber) { + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(2530000ULL); + EXPECT_FALSE(phone_util_.IsValidNumber(us_number)); + + PhoneNumber it_number; + it_number.set_country_code(39); + it_number.set_national_number(23661830000ULL); + it_number.set_italian_leading_zero(true); + EXPECT_FALSE(phone_util_.IsValidNumber(it_number)); + + PhoneNumber gb_number; + gb_number.set_country_code(44); + gb_number.set_national_number(791234567ULL); + EXPECT_FALSE(phone_util_.IsValidNumber(gb_number)); + + PhoneNumber de_number; + de_number.set_country_code(49); + de_number.set_national_number(1234ULL); + EXPECT_FALSE(phone_util_.IsValidNumber(de_number)); + + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(3316005ULL); + EXPECT_FALSE(phone_util_.IsValidNumber(nz_number)); +} + +TEST_F(PhoneNumberUtilTest, IsPossibleNumber) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(6502530000ULL); + EXPECT_TRUE(phone_util_.IsPossibleNumber(number)); + + number.set_country_code(1); + number.set_national_number(2530000ULL); + EXPECT_TRUE(phone_util_.IsPossibleNumber(number)); + + number.set_country_code(44); + number.set_national_number(2070313000ULL); + EXPECT_TRUE(phone_util_.IsPossibleNumber(number)); + + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("+1 650 253 0000", + "US")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("+1 650 GOO OGLE", + "US")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("(650) 253-0000", + "US")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("253-0000", + "US")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("+1 650 253 0000", + "GB")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("+44 20 7031 3000", + "GB")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("(020) 7031 3000", + "GB")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("7031 3000", + "GB")); + EXPECT_TRUE(phone_util_.IsPossibleNumberForString("3331 6005", + "NZ")); +} + +TEST_F(PhoneNumberUtilTest, IsPossibleNumberWithReason) { + // FYI, national numbers for country code +1 that are within 7 to 10 digits + // are possible. + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(6502530000ULL); + EXPECT_EQ(PhoneNumberUtil::IS_POSSIBLE, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(1); + number.set_national_number(2530000ULL); + EXPECT_EQ(PhoneNumberUtil::IS_POSSIBLE, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(0); + number.set_national_number(2530000ULL); + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(1); + number.set_national_number(253000ULL); + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(1); + number.set_national_number(65025300000ULL); + EXPECT_EQ(PhoneNumberUtil::TOO_LONG, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(44); + number.set_national_number(2070310000ULL); + EXPECT_EQ(PhoneNumberUtil::IS_POSSIBLE, + phone_util_.IsPossibleNumberWithReason(number)); + + number.set_country_code(49); + number.set_national_number(30123456ULL); + EXPECT_EQ(PhoneNumberUtil::IS_POSSIBLE, + phone_util_.IsPossibleNumberWithReason(number)); + + // Try with number that we don't have metadata for. + PhoneNumber ad_number; + ad_number.set_country_code(376); + ad_number.set_national_number(12345ULL); + EXPECT_EQ(PhoneNumberUtil::IS_POSSIBLE, + phone_util_.IsPossibleNumberWithReason(ad_number)); + ad_number.set_country_code(376); + ad_number.set_national_number(13ULL); + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT, + phone_util_.IsPossibleNumberWithReason(ad_number)); + ad_number.set_country_code(376); + ad_number.set_national_number(1234567890123456ULL); + EXPECT_EQ(PhoneNumberUtil::TOO_LONG, + phone_util_.IsPossibleNumberWithReason(ad_number)); +} + +TEST_F(PhoneNumberUtilTest, IsNotPossibleNumber) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(65025300000ULL); + EXPECT_FALSE(phone_util_.IsPossibleNumber(number)); + + number.set_country_code(1); + number.set_national_number(253000ULL); + EXPECT_FALSE(phone_util_.IsPossibleNumber(number)); + + number.set_country_code(44); + number.set_national_number(300ULL); + EXPECT_FALSE(phone_util_.IsPossibleNumber(number)); + + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("+1 650 253 00000", + "US")); + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("(650) 253-00000", + "US")); + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("I want a Pizza", + "US")); + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("253-000", + "US")); + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("1 3000", + "GB")); + EXPECT_FALSE(phone_util_.IsPossibleNumberForString("+44 300", + "GB")); +} + +TEST_F(PhoneNumberUtilTest, TruncateTooLongNumber) { + // US number 650-253-0000, but entered with one additional digit at the end. + PhoneNumber too_long_number; + too_long_number.set_country_code(1); + too_long_number.set_national_number(65025300001ULL); + PhoneNumber valid_number; + valid_number.set_country_code(1); + valid_number.set_national_number(6502530000ULL); + EXPECT_TRUE(phone_util_.TruncateTooLongNumber(&too_long_number)); + EXPECT_EQ(valid_number.DebugString(), too_long_number.DebugString()); + + // GB number 080 1234 5678, but entered with 4 extra digits at the end. + too_long_number.set_country_code(44); + too_long_number.set_national_number(80123456780123ULL); + valid_number.set_country_code(44); + valid_number.set_national_number(8012345678ULL); + EXPECT_TRUE(phone_util_.TruncateTooLongNumber(&too_long_number)); + EXPECT_EQ(valid_number.DebugString(), too_long_number.DebugString()); + + // IT number 022 3456 7890, but entered with 3 extra digits at the end. + too_long_number.set_country_code(39); + too_long_number.set_national_number(2234567890123ULL); + too_long_number.set_italian_leading_zero(true); + valid_number.set_country_code(39); + valid_number.set_national_number(2234567890ULL); + valid_number.set_italian_leading_zero(true); + EXPECT_TRUE(phone_util_.TruncateTooLongNumber(&too_long_number)); + EXPECT_EQ(valid_number.DebugString(), too_long_number.DebugString()); + + // Tests what happens when a valid number is passed in. + PhoneNumber valid_number_copy(valid_number); + EXPECT_TRUE(phone_util_.TruncateTooLongNumber(&valid_number)); + // Tests the number is not modified. + EXPECT_EQ(valid_number_copy.DebugString(), valid_number.DebugString()); + + // Tests what happens when a number with invalid prefix is passed in. + PhoneNumber number_with_invalid_prefix; + number_with_invalid_prefix.set_country_code(1); + // The test metadata says US numbers cannot have prefix 240. + number_with_invalid_prefix.set_national_number(2401234567ULL); + PhoneNumber invalid_number_copy(number_with_invalid_prefix); + EXPECT_FALSE(phone_util_.TruncateTooLongNumber(&number_with_invalid_prefix)); + // Tests the number is not modified. + EXPECT_EQ(invalid_number_copy.DebugString(), + number_with_invalid_prefix.DebugString()); + + // Tests what happens when a too short number is passed in. + PhoneNumber too_short_number; + too_short_number.set_country_code(1); + too_short_number.set_national_number(1234ULL); + PhoneNumber too_short_number_copy(too_short_number); + EXPECT_FALSE(phone_util_.TruncateTooLongNumber(&too_short_number)); + // Tests the number is not modified. + EXPECT_EQ(too_short_number_copy.DebugString(), + too_short_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, IsLeadingZeroCountry) { + EXPECT_TRUE(PhoneNumberUtil::IsLeadingZeroCountry(39)); // Italy + EXPECT_TRUE(PhoneNumberUtil::IsLeadingZeroCountry(225)); // Cote d'Ivoire + EXPECT_TRUE(PhoneNumberUtil::IsLeadingZeroCountry(241)); // Gabon + EXPECT_FALSE(PhoneNumberUtil::IsLeadingZeroCountry(1)); // USA +} + +TEST_F(PhoneNumberUtilTest, IsPremiumRate) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(9004433030ULL); + EXPECT_EQ(PhoneNumberUtil::PREMIUM_RATE, phone_util_.GetNumberType(number)); + + number.set_country_code(39); + number.set_national_number(892123ULL); + EXPECT_EQ(PhoneNumberUtil::PREMIUM_RATE, phone_util_.GetNumberType(number)); + + number.set_country_code(44); + number.set_national_number(9187654321ULL); + EXPECT_EQ(PhoneNumberUtil::PREMIUM_RATE, phone_util_.GetNumberType(number)); + + number.set_country_code(49); + number.set_national_number(9001654321ULL); + EXPECT_EQ(PhoneNumberUtil::PREMIUM_RATE, phone_util_.GetNumberType(number)); + + number.set_country_code(49); + number.set_national_number(90091234567ULL); + EXPECT_EQ(PhoneNumberUtil::PREMIUM_RATE, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsTollFree) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(8881234567ULL); + EXPECT_EQ(PhoneNumberUtil::TOLL_FREE, phone_util_.GetNumberType(number)); + + number.set_country_code(39); + number.set_national_number(803123ULL); + EXPECT_EQ(PhoneNumberUtil::TOLL_FREE, phone_util_.GetNumberType(number)); + + number.set_country_code(44); + number.set_national_number(8012345678ULL); + EXPECT_EQ(PhoneNumberUtil::TOLL_FREE, phone_util_.GetNumberType(number)); + + number.set_country_code(49); + number.set_national_number(8001234567ULL); + EXPECT_EQ(PhoneNumberUtil::TOLL_FREE, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsMobile) { + PhoneNumber number; + // A Bahama mobile number + number.set_country_code(1); + number.set_national_number(2423570000ULL); + EXPECT_EQ(PhoneNumberUtil::MOBILE, phone_util_.GetNumberType(number)); + + number.set_country_code(39); + number.set_national_number(312345678ULL); + EXPECT_EQ(PhoneNumberUtil::MOBILE, phone_util_.GetNumberType(number)); + + number.set_country_code(44); + number.set_national_number(7912345678ULL); + EXPECT_EQ(PhoneNumberUtil::MOBILE, phone_util_.GetNumberType(number)); + + number.set_country_code(49); + number.set_national_number(15123456789ULL); + EXPECT_EQ(PhoneNumberUtil::MOBILE, phone_util_.GetNumberType(number)); + + number.set_country_code(54); + number.set_national_number(91187654321ULL); + EXPECT_EQ(PhoneNumberUtil::MOBILE, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsFixedLine) { + PhoneNumber number; + // A Bahama fixed-line number + number.set_country_code(1); + number.set_national_number(2423651234ULL); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE, phone_util_.GetNumberType(number)); + + // An Italian fixed-line number + number.Clear(); + number.set_country_code(39); + number.set_national_number(236618300ULL); + number.set_italian_leading_zero(true); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE, phone_util_.GetNumberType(number)); + + number.Clear(); + number.set_country_code(44); + number.set_national_number(2012345678ULL); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE, phone_util_.GetNumberType(number)); + + number.set_country_code(49); + number.set_national_number(301234ULL); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsFixedLineAndMobile) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(6502531111ULL); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE_OR_MOBILE, + phone_util_.GetNumberType(number)); + + number.set_country_code(54); + number.set_national_number(1987654321ULL); + EXPECT_EQ(PhoneNumberUtil::FIXED_LINE_OR_MOBILE, + phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsSharedCost) { + PhoneNumber number; + number.set_country_code(44); + number.set_national_number(8431231234ULL); + EXPECT_EQ(PhoneNumberUtil::SHARED_COST, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsVoip) { + PhoneNumber number; + number.set_country_code(44); + number.set_national_number(5631231234ULL); + EXPECT_EQ(PhoneNumberUtil::VOIP, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsPersonalNumber) { + PhoneNumber number; + number.set_country_code(44); + number.set_national_number(7031231234ULL); + EXPECT_EQ(PhoneNumberUtil::PERSONAL_NUMBER, + phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, IsUnknown) { + PhoneNumber number; + number.set_country_code(1); + number.set_national_number(65025311111ULL); + EXPECT_EQ(PhoneNumberUtil::UNKNOWN, phone_util_.GetNumberType(number)); +} + +TEST_F(PhoneNumberUtilTest, GetCountryCodeForRegion) { + EXPECT_EQ(1, phone_util_.GetCountryCodeForRegion("US")); + EXPECT_EQ(64, phone_util_.GetCountryCodeForRegion("NZ")); + EXPECT_EQ(0, phone_util_.GetCountryCodeForRegion("ZZ")); + // CS is already deprecated so the library doesn't support it. + EXPECT_EQ(0, phone_util_.GetCountryCodeForRegion("CS")); +} + +TEST_F(PhoneNumberUtilTest, IsViablePhoneNumber) { + // Only one or two digits before strange non-possible punctuation. + EXPECT_FALSE(IsViablePhoneNumber("12. March")); + EXPECT_FALSE(IsViablePhoneNumber("1+1+1")); + EXPECT_FALSE(IsViablePhoneNumber("80+0")); + EXPECT_FALSE(IsViablePhoneNumber("00")); + // Three digits is viable. + EXPECT_TRUE(IsViablePhoneNumber("111")); + // Alpha numbers. + EXPECT_TRUE(IsViablePhoneNumber("0800-4-pizza")); + EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA")); + // Only one or two digits before possible punctuation followed by more digits. + // The punctuation used here is the unicode character u+3000. + EXPECT_TRUE(IsViablePhoneNumber("1 34")); + EXPECT_FALSE(IsViablePhoneNumber("1 3+4")); + // Unicode variants of possible starting character and other allowed + // punctuation/digits. + EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789")); + // Testing a leading + is okay. + EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789")); +} + +TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) { + string input_number("034-56&+#234"); + Normalize(&input_number); + static const string kExpectedOutput("03456234"); + EXPECT_EQ(kExpectedOutput, input_number) + << "Conversion did not correctly remove punctuation"; +} + +TEST_F(PhoneNumberUtilTest, NormaliseReplaceAlphaCharacters) { + string input_number("034-I-am-HUNGRY"); + Normalize(&input_number); + static const string kExpectedOutput("034426486479"); + EXPECT_EQ(kExpectedOutput, input_number) + << "Conversion did not correctly replace alpha characters"; +} + +TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) { + // The first digit is a full-width 2, the last digit is an Arabic-indic digit + // 5. + string input_number("25٥"); + Normalize(&input_number); + static const string kExpectedOutput("255"); + EXPECT_EQ(kExpectedOutput, input_number) + << "Conversion did not correctly replace non-latin digits"; + // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0. + string eastern_arabic_input_number("۵2۰"); + Normalize(&eastern_arabic_input_number); + static const string kExpectedOutput2("520"); + EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number) + << "Conversion did not correctly replace non-latin digits"; +} + +TEST_F(PhoneNumberUtilTest, NormaliseStripAlphaCharacters) { + string input_number("034-56&+a#234"); + phone_util_.NormalizeDigitsOnly(&input_number); + static const string kExpectedOutput("03456234"); + EXPECT_EQ(kExpectedOutput, input_number) + << "Conversion did not correctly remove alpha characters"; +} + +TEST_F(PhoneNumberUtilTest, MaybeStripInternationalPrefix) { + string international_prefix("00[39]"); + string number_to_strip("0034567700-3898003"); + // Note the dash is removed as part of the normalization. + string stripped_number("45677003898003"); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_IDD, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + EXPECT_EQ(stripped_number, number_to_strip) + << "The number was not stripped of its international prefix."; + + // Now the number no longer starts with an IDD prefix, so it should now report + // FROM_DEFAULT_COUNTRY. + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + + number_to_strip.assign("00945677003898003"); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_IDD, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + EXPECT_EQ(stripped_number, number_to_strip) + << "The number was not stripped of its international prefix."; + + // Test it works when the international prefix is broken up by spaces. + number_to_strip.assign("00 9 45677003898003"); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_IDD, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + EXPECT_EQ(stripped_number, number_to_strip) + << "The number was not stripped of its international prefix."; + // Now the number no longer starts with an IDD prefix, so it should now report + // FROM_DEFAULT_COUNTRY. + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + + // Test the + symbol is also recognised and stripped. + number_to_strip.assign("+45677003898003"); + stripped_number.assign("45677003898003"); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + EXPECT_EQ(stripped_number, number_to_strip) + << "The number supplied was not stripped of the plus symbol."; + + // If the number afterwards is a zero, we should not strip this - no country + // code begins with 0. + number_to_strip.assign("0090112-3123"); + stripped_number.assign("00901123123"); + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); + EXPECT_EQ(stripped_number, number_to_strip) + << "The number had a 0 after the match so shouldn't be stripped."; + // Here the 0 is separated by a space from the IDD. + number_to_strip.assign("009 0-112-3123"); + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, + MaybeStripInternationalPrefixAndNormalize(international_prefix, + &number_to_strip)); +} + +TEST_F(PhoneNumberUtilTest, MaybeStripNationalPrefixAndCarrierCode) { + PhoneMetadata metadata; + metadata.set_national_prefix_for_parsing("34"); + metadata.mutable_general_desc()->set_national_number_pattern("\\d{4,8}"); + string number_to_strip("34356778"); + string stripped_number("356778"); + string carrier_code; + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ(stripped_number, number_to_strip) + << "Should have had national prefix stripped."; + EXPECT_EQ("", carrier_code) << "Should have had no carrier code stripped."; + // Retry stripping - now the number should not start with the national prefix, + // so no more stripping should occur. + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ(stripped_number, number_to_strip) + << "Should have had no change - no national prefix present."; + // Some countries have no national prefix. Repeat test with none specified. + metadata.clear_national_prefix_for_parsing(); + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ(stripped_number, number_to_strip) + << "Should have had no change - empty national prefix."; + // If the resultant number doesn't match the national rule, it shouldn't be + // stripped. + metadata.set_national_prefix_for_parsing("3"); + number_to_strip.assign("3123"); + stripped_number.assign("3123"); + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ(stripped_number, number_to_strip) + << "Should have had no change - after stripping, it wouldn't have " + << "matched the national rule."; + // Test extracting carrier selection code. + metadata.set_national_prefix_for_parsing("0(81)?"); + number_to_strip.assign("08122123456"); + stripped_number.assign("22123456"); + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ("81", carrier_code) << "Should have had carrier code stripped."; + EXPECT_EQ(stripped_number, number_to_strip) + << "Should have had national prefix and carrier code stripped."; + // If there was a transform rule, check it was applied. + metadata.set_national_prefix_transform_rule("5$15"); + // Note that a capturing group is present here. + metadata.set_national_prefix_for_parsing("0(\\d{2})"); + number_to_strip.assign("031123"); + string transformed_number("5315123"); + MaybeStripNationalPrefixAndCarrierCode(metadata, &number_to_strip, + &carrier_code); + EXPECT_EQ(transformed_number, number_to_strip) + << "Was not successfully transformed."; +} + +TEST_F(PhoneNumberUtilTest, MaybeStripExtension) { + // One with extension. + string number("1234576 ext. 1234"); + string extension; + string expected_extension("1234"); + string stripped_number("1234576"); + EXPECT_TRUE(MaybeStripExtension(&number, &extension)); + EXPECT_EQ(stripped_number, number); + EXPECT_EQ(expected_extension, extension); + + // One without extension. + number.assign("1234-576"); + extension.clear(); + stripped_number.assign("1234-576"); + EXPECT_FALSE(MaybeStripExtension(&number, &extension)); + EXPECT_EQ(stripped_number, number); + EXPECT_TRUE(extension.empty()); + + // One with an extension caught by the second capturing group in + // kKnownExtnPatterns. + number.assign("1234576-123#"); + extension.clear(); + expected_extension.assign("123"); + stripped_number.assign("1234576"); + EXPECT_TRUE(MaybeStripExtension(&number, &extension)); + EXPECT_EQ(stripped_number, number); + EXPECT_EQ(expected_extension, extension); + + number.assign("1234576 ext.123#"); + extension.clear(); + EXPECT_TRUE(MaybeStripExtension(&number, &extension)); + EXPECT_EQ(stripped_number, number); + EXPECT_EQ(expected_extension, extension); +} + +TEST_F(PhoneNumberUtilTest, MaybeExtractCountryCode) { + PhoneNumber number; + const PhoneMetadata* metadata = GetPhoneMetadata("US"); + // Note that for the US, the IDD is 011. + string phone_number("011112-3456789"); + string stripped_number("123456789"); + int expected_country_code = 1; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_IDD, number.country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + number.Clear(); + phone_number.assign("+6423456789"); + stripped_number.assign("23456789"); + expected_country_code = 64; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN, + number.country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + // Should not have extracted a country code - no international prefix present. + number.Clear(); + expected_country_code = 0; + phone_number.assign("2345-6789"); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, number.country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + expected_country_code = 0; + phone_number.assign("0119991123456789"); + stripped_number.assign(phone_number); + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + + number.Clear(); + phone_number.assign("(1 610) 619 4466"); + stripped_number.assign("6106194466"); + expected_country_code = 1; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_EQ(PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN, + number.country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + number.Clear(); + phone_number.assign("(1 610) 619 4466"); + stripped_number.assign("6106194466"); + expected_country_code = 1; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, false, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_FALSE(number.has_country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + // Should not have extracted a country code - invalid number after extraction + // of uncertain country code. + number.Clear(); + phone_number.assign("(1 610) 619 446"); + stripped_number.assign("1610619446"); + expected_country_code = 0; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, false, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_FALSE(number.has_country_code_source()); + EXPECT_EQ(stripped_number, phone_number); + + number.Clear(); + phone_number.assign("(1 610) 619 43"); + stripped_number.assign("161061943"); + expected_country_code = 0; + // Should not have extracted a country code - invalid number both before and + // after extraction of uncertain country code. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + MaybeExtractCountryCode(metadata, true, &phone_number, &number)); + EXPECT_EQ(expected_country_code, number.country_code()); + EXPECT_EQ(PhoneNumber::FROM_DEFAULT_COUNTRY, number.country_code_source()); + EXPECT_EQ(stripped_number, phone_number); +} + +TEST_F(PhoneNumberUtilTest, CountryWithNoNumberDesc) { + string formatted_number; + // Andorra is a country where we don't have PhoneNumberDesc info in the + // metadata. + PhoneNumber ad_number; + ad_number.set_country_code(376); + ad_number.set_national_number(12345ULL); + phone_util_.Format(ad_number, PhoneNumberUtil::INTERNATIONAL, + &formatted_number); + EXPECT_EQ("+376 12345", formatted_number); + phone_util_.Format(ad_number, PhoneNumberUtil::E164, &formatted_number); + EXPECT_EQ("+37612345", formatted_number); + phone_util_.Format(ad_number, PhoneNumberUtil::NATIONAL, &formatted_number); + EXPECT_EQ("12345", formatted_number); + EXPECT_EQ(PhoneNumberUtil::UNKNOWN, phone_util_.GetNumberType(ad_number)); + EXPECT_TRUE(phone_util_.IsValidNumber(ad_number)); + + // Test dialing a US number from within Andorra. + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(6502530000ULL); + phone_util_.FormatOutOfCountryCallingNumber(us_number, "AD", + &formatted_number); + EXPECT_EQ("00 1 650 253 0000", formatted_number); +} + +TEST_F(PhoneNumberUtilTest, UnknownCountryCallingCodeForValidation) { + PhoneNumber invalid_number; + invalid_number.set_country_code(0); + invalid_number.set_national_number(1234ULL); + EXPECT_FALSE(phone_util_.IsValidNumber(invalid_number)); +} + +TEST_F(PhoneNumberUtilTest, IsNumberMatchMatches) { + // Test simple matches where formatting is different, or leading zeroes, or + // country code has been specified. + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331 6005", + "+64 03 331 6005")); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 03 331-6005", + "+64 03331 6005")); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+643 331-6005", + "+64033316005")); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+643 331-6005", + "+6433316005")); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "+6433316005")); + // Test alpha numbers. + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+1800 siX-Flags", + "+1 800 7493 5247")); + // Test numbers with extensions. + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234", + "+6433316005#1234")); + // Test proto buffers. + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(33316005ULL); + nz_number.set_extension("3456"); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithOneString(nz_number, + "+643 331 6005 ext 3456")); + nz_number.clear_extension(); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithOneString(nz_number, + "+643 331 6005")); + // Check empty extensions are ignored. + nz_number.set_extension(""); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatchWithOneString(nz_number, + "+643 331 6005")); + // Check variant with two proto buffers. + PhoneNumber nz_number_2; + nz_number_2.set_country_code(64); + nz_number_2.set_national_number(33316005ULL); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatch(nz_number, nz_number_2)); + + // Check raw_input, country_code_source and preferred_domestic_carrier_code + // are ignored. + PhoneNumber br_number_1; + PhoneNumber br_number_2; + br_number_1.set_country_code(55); + br_number_1.set_national_number(3121286979ULL); + br_number_1.set_country_code_source(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN); + br_number_1.set_preferred_domestic_carrier_code("12"); + br_number_1.set_raw_input("012 3121286979"); + br_number_2.set_country_code(55); + br_number_2.set_national_number(3121286979ULL); + br_number_2.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY); + br_number_2.set_preferred_domestic_carrier_code("14"); + br_number_2.set_raw_input("143121286979"); + EXPECT_EQ(PhoneNumberUtil::EXACT_MATCH, + phone_util_.IsNumberMatch(br_number_1, br_number_2)); +} + +TEST_F(PhoneNumberUtilTest, IsNumberMatchNonMetches) { + // NSN matches. + EXPECT_EQ(PhoneNumberUtil::NO_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("03 331 6005", + "03 331 6006")); + // Different country code, partial number match. + EXPECT_EQ(PhoneNumberUtil::NO_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "+16433316005")); + // Different country code, same number. + EXPECT_EQ(PhoneNumberUtil::NO_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "+6133316005")); + // Extension different, all else the same. + EXPECT_EQ(PhoneNumberUtil::NO_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 extn 1234", + "+0116433316005#1235")); + // NSN matches, but extension is different - not the same number. + EXPECT_EQ(PhoneNumberUtil::NO_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005 ext.1235", + "3 331 6005#1234")); + // Invalid numbers that can't be parsed. + EXPECT_EQ(PhoneNumberUtil::INVALID_NUMBER, + phone_util_.IsNumberMatchWithTwoStrings("43", "3 331 6043")); + // Invalid numbers that can't be parsed. + EXPECT_EQ(PhoneNumberUtil::INVALID_NUMBER, + phone_util_.IsNumberMatchWithTwoStrings("+43", "+64 3 331 6005")); + EXPECT_EQ(PhoneNumberUtil::INVALID_NUMBER, + phone_util_.IsNumberMatchWithTwoStrings("+43", "64 3 331 6005")); + EXPECT_EQ(PhoneNumberUtil::INVALID_NUMBER, + phone_util_.IsNumberMatchWithTwoStrings("Dog", "64 3 331 6005")); +} + +TEST_F(PhoneNumberUtilTest, IsNumberMatchNsnMatches) { + // NSN matches. + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "03 331 6005")); + + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("3 331-6005", + "03 331 6005")); + + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(33316005ULL); + nz_number.set_extension(""); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithOneString(nz_number, "03 331 6005")); + // Here the second number possibly starts with the country code for New + // Zealand, although we are unsure. + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithOneString(nz_number, + "(64-3) 331 6005")); + + // Here, the 1 might be a national prefix, if we compare it to the US number, + // so the resultant match is an NSN match. + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(2345678901ULL); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithOneString(us_number, + "1-234-567-8901")); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithOneString(us_number, "2345678901")); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+1 234-567 8901", + "1 234 567 8901")); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("1 234-567 8901", + "1 234 567 8901")); + EXPECT_EQ(PhoneNumberUtil::NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("1 234-567 8901", + "+1 234 567 8901")); + // For this case, the match will be a short NSN match, because we cannot + // assume that the 1 might be a national prefix, so don't remove it when + // parsing. + PhoneNumber random_number; + random_number.set_country_code(41); + random_number.set_national_number(2345678901ULL); + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithOneString(random_number, + "1-234-567-8901")); +} + +TEST_F(PhoneNumberUtilTest, IsNumberMatchShortNsnMatches) { + // Short NSN matches with the country not specified for either one or both + // numbers. + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "331 6005")); + + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("3 331-6005", + "331 6005")); + + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("3 331-6005", + "+64 331 6005")); + + // Short NSN match with the country specified. + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("03 331-6005", + "331 6005")); + + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("1 234 345 6789", + "345 6789")); + + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+1 (234) 345 6789", + "345 6789")); + + // NSN matches, country code omitted for one number, extension missing for + // one. + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatchWithTwoStrings("+64 3 331-6005", + "3 331 6005#1234")); + + // One has Italian leading zero, one does not. + PhoneNumber it_number_1, it_number_2; + it_number_1.set_country_code(39); + it_number_1.set_national_number(1234ULL); + it_number_1.set_italian_leading_zero(true); + it_number_2.set_country_code(39); + it_number_2.set_national_number(1234ULL); + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatch(it_number_1, it_number_2)); + + // One has an extension, the other has an extension of "". + it_number_1.set_extension("1234"); + it_number_1.clear_italian_leading_zero(); + it_number_2.set_extension(""); + EXPECT_EQ(PhoneNumberUtil::SHORT_NSN_MATCH, + phone_util_.IsNumberMatch(it_number_1, it_number_2)); +} + +TEST_F(PhoneNumberUtilTest, ParseNationalNumber) { + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(33316005ULL); + PhoneNumber test_number; + // National prefix attached. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("033316005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + // National prefix missing. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("33316005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + // National prefix attached and some formatting present. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03-331 6005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03 331 6005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + + // Testing international prefixes. + // Should strip country code. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0064 3 331 6005", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + // Try again, but this time we have an international number with Region Code + // US. It should recognise the country code and parse accordingly. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("01164 3 331 6005", + "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+64 3 331 6005", + "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + + // Test for http://b/issue?id=2247493 + nz_number.Clear(); + nz_number.set_country_code(64); + nz_number.set_national_number(64123456ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+64(0)64123456", + "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + + // Check that using a "/" is fine in a phone number. + PhoneNumber de_number; + de_number.set_country_code(49); + de_number.set_national_number(12345678ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("123/45678", "DE", &test_number)); + EXPECT_EQ(test_number.DebugString(), de_number.DebugString()); + + PhoneNumber us_number; + us_number.set_country_code(1); + // Check it doesn't use the '1' as a country code when parsing if the phone + // number was already possible. + us_number.set_national_number(1234567890ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("123-456-7890", "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseNumberWithAlphaCharacters) { + // Test case with alpha characters. + PhoneNumber test_number; + PhoneNumber tollfree_number; + tollfree_number.set_country_code(64); + tollfree_number.set_national_number(800332005ULL); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0800 DDA 005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), tollfree_number.DebugString()); + + test_number.Clear(); + PhoneNumber premium_number; + premium_number.set_country_code(64); + premium_number.set_national_number(9003326005ULL); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0900 DDA 6005", "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), premium_number.DebugString()); + + // Not enough alpha characters for them to be considered intentional, so they + // are stripped. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0900 332 6005a", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), premium_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0900 332 600a5", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), premium_number.DebugString()); + + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0900 332 600A5", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), premium_number.DebugString()); + + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0900 a332 600A5", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), premium_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseWithInternationalPrefixes) { + PhoneNumber us_number; + us_number.set_country_code(1); + us_number.set_national_number(6503336000ULL); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1 (650) 333-6000", + "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1-650-333-6000", + "US", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + + // Calling the US number from Singapore by using different service providers + // 1st test: calling using SingTel IDD service (IDD is 001) + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0011-650-333-6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + // 2nd test: calling using StarHub IDD service (IDD is 008) + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0081-650-333-6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + // 3rd test: calling using SingTel V019 service (IDD is 019) + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0191-650-333-6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + // Calling the US number from Poland + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0~01-650-333-6000", + "PL", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + + // Using "++" at the start. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("++1 (650) 333-6000", + "PL", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + // Using a full-width plus sign. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1 (650) 333-6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + // The whole number, including punctuation, is here represented in full-width + // form. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1 (650) 333-6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); + + // Using the U+30FC dash. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1 (650) 333ー6000", + "SG", &test_number)); + EXPECT_EQ(test_number.DebugString(), us_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseWithLeadingZero) { + PhoneNumber it_number; + it_number.set_country_code(39); + it_number.set_national_number(236618300ULL); + it_number.set_italian_leading_zero(true); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+39 02-36618 300", + "NZ", &test_number)); + EXPECT_EQ(test_number.DebugString(), it_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("02-36618 300", "IT", &test_number)); + EXPECT_EQ(test_number.DebugString(), it_number.DebugString()); + + it_number.Clear(); + it_number.set_country_code(39); + it_number.set_national_number(312345678ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("312 345 678", "IT", &test_number)); + EXPECT_EQ(test_number.DebugString(), it_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseNationalNumberArgentina) { + // Test parsing mobile numbers of Argentina. + PhoneNumber ar_number; + ar_number.set_country_code(54); + ar_number.set_national_number(93435551212ULL); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+54 9 343 555 1212", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0343 15 555 1212", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + + ar_number.set_national_number(93715654320ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+54 9 3715 65 4320", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03715 15 65 4320", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + + // Test parsing fixed-line numbers of Argentina. + ar_number.set_national_number(1137970000ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+54 11 3797 0000", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("011 3797 0000", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + + ar_number.set_national_number(3715654321ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+54 3715 65 4321", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03715 65 4321", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + + ar_number.set_national_number(2312340000ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+54 23 1234 0000", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("023 1234 0000", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseWithXInNumber) { + // Test that having an 'x' in the phone number at the start is ok and that it + // just gets removed. + PhoneNumber ar_number; + ar_number.set_country_code(54); + ar_number.set_national_number(123456789ULL); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0123456789", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(0) 123456789", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0 123456789", "AR", &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(0xx) 123456789", "AR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_number.DebugString()); + + PhoneNumber ar_from_us; + ar_from_us.set_country_code(54); + ar_from_us.set_national_number(81429712ULL); + // This test is intentionally constructed such that the number of digit after + // xx is larger than 7, so that the number won't be mistakenly treated as an + // extension, as we allow extensions up to 7 digits. This assumption is okay + // for now as all the countries where a carrier selection code is written in + // the form of xx have a national significant number of length larger than 7. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("011xx5481429712", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), ar_from_us.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseNumbersMexico) { + // Test parsing fixed-line numbers of Mexico. + PhoneNumber mx_number; + + mx_number.set_country_code(52); + mx_number.set_national_number(4499780001ULL); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+52 (449)978-0001", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("01 (449)978-0001", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(449)978-0001", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); + + // Test parsing mobile numbers of Mexico. + mx_number.Clear(); + mx_number.set_country_code(52); + mx_number.set_national_number(13312345678ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+52 1 33 1234-5678", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("044 (33) 1234-5678", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("045 33 1234-5678", "MX", + &test_number)); + EXPECT_EQ(test_number.DebugString(), mx_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, FailedParseOnInvalidNumbers) { + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NOT_A_NUMBER, + phone_util_.Parse("This is not a phone number", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_LONG_NSN, + phone_util_.Parse("01495 72553301873 810104", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT_NSN, + phone_util_.Parse("+49 0", "DE", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE_ERROR, + phone_util_.Parse("+210 3456 56789", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE_ERROR, + phone_util_.Parse("123 456 7890", "ZZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE_ERROR, + phone_util_.Parse("123 456 7890", "CS", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT_AFTER_IDD, + phone_util_.Parse("0044-----", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT_AFTER_IDD, + phone_util_.Parse("0044", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT_AFTER_IDD, + phone_util_.Parse("011", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + EXPECT_EQ(PhoneNumberUtil::TOO_SHORT_AFTER_IDD, + phone_util_.Parse("0119", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseNumbersWithPlusWithNoRegion) { + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(33316005ULL); + // "ZZ" is allowed only if the number starts with a '+' - then + // the country code can be calculated. + PhoneNumber result_proto; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+64 3 331 6005", "ZZ", + &result_proto)); + EXPECT_EQ(nz_number.DebugString(), result_proto.DebugString()); + + // Test with full-width plus. + result_proto.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+64 3 331 6005", "ZZ", + &result_proto)); + EXPECT_EQ(nz_number.DebugString(), result_proto.DebugString()); + // Test with normal plus but leading characters that need to be stripped. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse(" +64 3 331 6005", "ZZ", + &result_proto)); + EXPECT_EQ(nz_number.DebugString(), result_proto.DebugString()); + + nz_number.set_raw_input("+64 3 331 6005"); + nz_number.set_country_code_source(PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN); + // It is important that we set this to an empty string, since we used + // ParseAndKeepRawInput and no carrrier code was found. + nz_number.set_preferred_domestic_carrier_code(""); + result_proto.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("+64 3 331 6005", "ZZ", + &result_proto)); + EXPECT_EQ(nz_number.DebugString(), result_proto.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseExtensions) { + PhoneNumber nz_number; + nz_number.set_country_code(64); + nz_number.set_national_number(33316005ULL); + nz_number.set_extension("3456"); + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03 331 6005 ext 3456", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03 331 6005x3456", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03-331 6005 int.3456", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("03 331 6005 #3456", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), nz_number.DebugString()); + + // Test the following do not extract extensions: + PhoneNumber non_extn_number; + non_extn_number.set_country_code(1); + non_extn_number.set_national_number(80074935247ULL); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("1800 six-flags", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), non_extn_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("1800 SIX-FLAGS", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), non_extn_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0~0 1800 7493 5247", "PL", + &test_number)); + EXPECT_EQ(test_number.DebugString(), non_extn_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(1800) 7493.5247", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), non_extn_number.DebugString()); + + // Check that the last instance of an extension token is matched. + PhoneNumber extn_number; + extn_number.set_country_code(1); + extn_number.set_national_number(80074935247ULL); + extn_number.set_extension("1234"); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("0~0 1800 7493 5247 ~1234", "PL", + &test_number)); + EXPECT_EQ(test_number.DebugString(), extn_number.DebugString()); + + // Verifying bug-fix where the last digit of a number was previously omitted + // if it was a 0 when extracting the extension. Also verifying a few different + // cases of extensions. + PhoneNumber uk_number; + uk_number.set_country_code(44); + uk_number.set_national_number(2034567890ULL); + uk_number.set_extension("456"); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890x456", "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890x456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 x456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 X456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 X 456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 X 456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 x 456 ", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+44 2034567890 X 456", "GB", + &test_number)); + EXPECT_EQ(test_number.DebugString(), uk_number.DebugString()); + + PhoneNumber us_with_extension; + us_with_extension.set_country_code(1); + us_with_extension.set_national_number(8009013355ULL); + us_with_extension.set_extension("7246433"); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 x 7246433", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 , ext 7246433", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 ,extension 7246433", + "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 ,extensión 7246433", + "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + // Repeat with the small letter o with acute accent created by combining + // characters. + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 ,extensión 7246433", + "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 , 7246433", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(800) 901-3355 ext: 7246433", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); + + // Test that if a number has two extensions specified, we ignore the second. + PhoneNumber us_with_two_extensions_number; + us_with_two_extensions_number.set_country_code(1); + us_with_two_extensions_number.set_national_number(2121231234ULL); + us_with_two_extensions_number.set_extension("508"); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(212)123-1234 x508/x1234", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_two_extensions_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(212)123-1234 x508/ x1234", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_two_extensions_number.DebugString()); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("(212)123-1234 x508\\x1234", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_two_extensions_number.DebugString()); + + // Test parsing numbers in the form (645) 123-1234-910# works, where the last + // 3 digits before the # are an extension. + us_with_extension.Clear(); + us_with_extension.set_country_code(1); + us_with_extension.set_national_number(6451231234ULL); + us_with_extension.set_extension("910"); + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.Parse("+1 (645) 123 1234-910#", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), us_with_extension.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, ParseAndKeepRaw) { + PhoneNumber alpha_numeric_number; + alpha_numeric_number.set_country_code(1); + alpha_numeric_number.set_national_number(80074935247ULL); + alpha_numeric_number.set_raw_input("800 six-flags"); + alpha_numeric_number.set_country_code_source( + PhoneNumber::FROM_DEFAULT_COUNTRY); + alpha_numeric_number.set_preferred_domestic_carrier_code(""); + + PhoneNumber test_number; + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("800 six-flags", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), alpha_numeric_number.DebugString()); + + alpha_numeric_number.set_national_number(8007493524ULL); + alpha_numeric_number.set_raw_input("1800 six-flag"); + alpha_numeric_number.set_country_code_source( + PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("1800 six-flag", "US", + &test_number)); + EXPECT_EQ(test_number.DebugString(), alpha_numeric_number.DebugString()); + + alpha_numeric_number.set_raw_input("+1800 six-flag"); + alpha_numeric_number.set_country_code_source( + PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("+1800 six-flag", "CN", + &test_number)); + EXPECT_EQ(test_number.DebugString(), alpha_numeric_number.DebugString()); + + alpha_numeric_number.set_raw_input("001800 six-flag"); + alpha_numeric_number.set_country_code_source( + PhoneNumber::FROM_NUMBER_WITH_IDD); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("001800 six-flag", + "NZ", + &test_number)); + EXPECT_EQ(test_number.DebugString(), alpha_numeric_number.DebugString()); + + // Try with invalid region - expect failure. + test_number.Clear(); + EXPECT_EQ(PhoneNumberUtil::INVALID_COUNTRY_CODE_ERROR, + phone_util_.Parse("123 456 7890", "CS", &test_number)); + EXPECT_EQ(test_number.DebugString(), + PhoneNumber::default_instance().DebugString()); + + PhoneNumber korean_number; + korean_number.set_country_code(82); + korean_number.set_national_number(22123456); + korean_number.set_raw_input("08122123456"); + korean_number.set_country_code_source(PhoneNumber::FROM_DEFAULT_COUNTRY); + korean_number.set_preferred_domestic_carrier_code("81"); + EXPECT_EQ(PhoneNumberUtil::NO_ERROR, + phone_util_.ParseAndKeepRawInput("08122123456", + "KR", + &test_number)); + EXPECT_EQ(test_number.DebugString(), korean_number.DebugString()); +} + +TEST_F(PhoneNumberUtilTest, IsAlphaNumber) { + static const string kAlphaNumber("1800 six-flags"); + EXPECT_TRUE(phone_util_.IsAlphaNumber(kAlphaNumber)); + static const string kAlphaNumberWithExtension = "1800 six-flags ext. 1234"; + EXPECT_TRUE(phone_util_.IsAlphaNumber(kAlphaNumberWithExtension)); + static const string kNonAlphaNumber("1800 123-1234"); + EXPECT_FALSE(phone_util_.IsAlphaNumber(kNonAlphaNumber)); + static const string kNonAlphaNumberWithExtension( + "1800 123-1234 extension: 1234"); + EXPECT_FALSE(phone_util_.IsAlphaNumber(kNonAlphaNumberWithExtension)); +} + +} // namespace phonenumbers +} // namespace i18n