// Copyright (C) 2025 The Libphonenumber Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include "phonenumbers/regexp_cache.h" #include "phonenumbers/regexp_factory.h" #include "phonenumbers/stringutil.h" #ifndef I18N_PHONENUMBERS_REGEXPSANDMAPPINGS_H_ #define I18N_PHONENUMBERS_REGEXPSANDMAPPINGS_H_ namespace i18n { namespace phonenumbers { class PhoneNumberRegExpsAndMappings { friend class PhoneContextParser; friend class PhoneNumberNormalizer; friend class PhoneNumberUtil; friend class PhoneContextParserTest; friend class PhoneNumberNormalizerTest; private: void InitializeMapsAndSets(); // Helper initialiser method to create the regular-expression pattern to match // extensions. Note that: // - There are currently six capturing groups for the extension itself. If this // number is changed, MaybeStripExtension needs to be updated. // - The only capturing groups should be around the digits that you want to // capture as part of the extension, or else parsing will fail! static std::string CreateExtnPattern(bool for_parsing); // Helper method for constructing regular expressions for parsing. Creates an // expression that captures up to max_length digits. static std::string ExtnDigits(int max_length); // Regular expression of viable phone numbers. This is location independent. // Checks we have at least three leading digits, and only valid punctuation, // alpha characters and digits in the phone number. Does not include extension // data. The symbol 'x' is allowed here as valid punctuation since it is often // used as a placeholder for carrier codes, for example in Brazilian phone // numbers. We also allow multiple plus-signs at the start. // Corresponds to the following: // [digits]{minLengthNsn}| // plus_sign*(([punctuation]|[star])*[digits]){3,} // ([punctuation]|[star]|[digits]|[alpha])* // // The first reg-ex is to allow short numbers (two digits long) to be parsed // if they are entered as "15" etc, but only if there is no punctuation in // them. The second expression restricts the number of digits to three or // more, but then allows them to be in international form, and to have // alpha-characters and punctuation. const string valid_phone_number_; // Regexp of all possible ways to write extensions, for use when parsing. This // will be run as a case-insensitive regexp match. Wide character versions are // also provided after each ASCII version. // For parsing, we are slightly more lenient in our interpretation than for // matching. Here we allow "comma" and "semicolon" as possible extension // indicators. When matching, these are hardly ever used to indicate this. const string extn_patterns_for_parsing_; // Regular expressions of different parts of the phone-context parameter, // following the syntax defined in RFC3966. const std::string rfc3966_phone_digit_; const std::string alphanum_; const std::string rfc3966_domainlabel_; const std::string rfc3966_toplabel_; scoped_ptr regexp_factory_; scoped_ptr regexp_cache_; // A map that contains characters that are essential when dialling. That means // any of the characters in this map must not be removed from a number when // dialing, otherwise the call will not reach the intended destination. std::map diallable_char_mappings_; // These mappings map a character (key) to a specific digit that should // replace it for normalization purposes. std::map alpha_mappings_; // For performance reasons, store a map of combining alpha_mappings with ASCII // digits. std::map alpha_phone_mappings_; // Separate map of all symbols that we wish to retain when formatting alpha // numbers. This includes digits, ascii letters and number grouping symbols // such as "-" and " ". std::map all_plus_number_grouping_symbols_; // Map of country calling codes that use a mobile token before the area code. // One example of when this is relevant is when determining the length of the // national destination code, which should be the length of the area code plus // the length of the mobile token. std::map mobile_token_mappings_; // Set of country codes that doesn't have national prefix, but it has area // codes. std::set countries_without_national_prefix_with_area_codes_; // Set of country codes that have geographically assigned mobile numbers (see // geo_mobile_countries_ below) which are not based on *area codes*. For // example, in China mobile numbers start with a carrier indicator, and beyond // that are geographically assigned: this carrier indicator is not considered // to be an area code. std::set geo_mobile_countries_without_mobile_area_codes_; // Set of country calling codes that have geographically assigned mobile // numbers. This may not be complete; we add calling codes case by case, as we // find geographical mobile numbers or hear from user reports. std::set geo_mobile_countries_; // Pattern that makes it easy to distinguish whether a region has a single // international dialing prefix or not. If a region has a single international // prefix (e.g. 011 in USA), it will be represented as a string that contains // a sequence of ASCII digits, and possibly a tilde, which signals waiting for // the tone. If there are multiple available international prefixes in a // region, they will be represented as a regex string that always contains one // or more characters that are not ASCII digits or a tilde. scoped_ptr single_international_prefix_; scoped_ptr digits_pattern_; scoped_ptr capturing_digit_pattern_; scoped_ptr capturing_ascii_digits_pattern_; // Regular expression of acceptable characters that may start a phone number // for the purposes of parsing. This allows us to strip away meaningless // prefixes to phone numbers that may be mistakenly given to us. This consists // of digits, the plus symbol and arabic-indic digits. This does not contain // alpha characters, although they may be used later in the number. It also // does not include other punctuation, as this will be stripped later during // parsing and is of no information value when parsing a number. The string // starting with this valid character is captured. // This corresponds to VALID_START_CHAR in the java version. scoped_ptr valid_start_char_pattern_; // Regular expression of valid characters before a marker that might indicate // a second number. scoped_ptr capture_up_to_second_number_start_pattern_; // Regular expression of trailing characters that we want to remove. We remove // all characters that are not alpha or numerical characters. The hash // character is retained here, as it may signify the previous block was an // extension. Note the capturing block at the start to capture the rest of the // number if this was a match. // This corresponds to UNWANTED_END_CHAR_PATTERN in the java version. scoped_ptr unwanted_end_char_pattern_; // Regular expression of groups of valid punctuation characters. scoped_ptr separator_pattern_; // Regexp of all possible ways to write extensions, for use when finding phone // numbers in text. This will be run as a case-insensitive regexp match. Wide // character versions are also provided after each ASCII version. const string extn_patterns_for_matching_; // Regexp of all known extension prefixes used by different regions followed // by 1 or more valid digits, for use when parsing. scoped_ptr extn_pattern_; // We append optionally the extension pattern to the end here, as a valid // phone number may have an extension prefix appended, followed by 1 or more // digits. scoped_ptr valid_phone_number_pattern_; // We use this pattern to check if the phone number has at least three letters // in it - if so, then we treat it as a number where some phone-number digits // are represented by letters. scoped_ptr valid_alpha_phone_pattern_; scoped_ptr first_group_capturing_pattern_; scoped_ptr carrier_code_pattern_; scoped_ptr plus_chars_pattern_; // Regular expression of valid global-number-digits for the phone-context // parameter, following the syntax defined in RFC3966. std::unique_ptr rfc3966_global_number_digits_pattern_; // Regular expression of valid domainname for the phone-context parameter, // following the syntax defined in RFC3966. std::unique_ptr rfc3966_domainname_pattern_; PhoneNumberRegExpsAndMappings(); // This type is neither copyable nor movable. PhoneNumberRegExpsAndMappings(const PhoneNumberRegExpsAndMappings&) = delete; PhoneNumberRegExpsAndMappings& operator=( const PhoneNumberRegExpsAndMappings&) = delete; }; } // namespace phonenumbers } // namespace i18n #endif // I18N_PHONENUMBERS_REGEXPSANDMAPPINGS_H_