From e930086b33beca985667f22282290cee3943e22e Mon Sep 17 00:00:00 2001 From: Philippe Liard Date: Thu, 19 Jul 2012 12:49:04 +0000 Subject: [PATCH] CPP: Implement MappingFileProvider. --- cpp/CMakeLists.txt | 2 + .../geocoding/mapping_file_provider.cc | 175 ++++++++++++++++++ .../geocoding/mapping_file_provider.h | 75 ++++++++ .../geocoding/mapping_file_provider_test.cc | 91 +++++++++ 4 files changed, 343 insertions(+) create mode 100644 cpp/src/phonenumbers/geocoding/mapping_file_provider.cc create mode 100644 cpp/src/phonenumbers/geocoding/mapping_file_provider.h create mode 100644 cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 00e9b067a..6bfed5e54 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -182,6 +182,7 @@ set ( "src/phonenumbers/geocoding/area_code_map.cc" "src/phonenumbers/geocoding/default_map_storage.cc" "src/phonenumbers/geocoding/geocoding_data.cc" + "src/phonenumbers/geocoding/mapping_file_provider.cc" "src/phonenumbers/logger.cc" "src/phonenumbers/metadata.h" # Generated by build tools. "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers. @@ -354,6 +355,7 @@ set (TEST_SOURCES "test/phonenumbers/geocoding/area_code_map_test.cc" "test/phonenumbers/geocoding/geocoding_data_test.cc" "test/phonenumbers/geocoding/geocoding_test_data.cc" + "test/phonenumbers/geocoding/mapping_file_provider_test.cc" "test/phonenumbers/logger_test.cc" "test/phonenumbers/phonenumberutil_test.cc" "test/phonenumbers/regexp_adapter_test.cc" diff --git a/cpp/src/phonenumbers/geocoding/mapping_file_provider.cc b/cpp/src/phonenumbers/geocoding/mapping_file_provider.cc new file mode 100644 index 000000000..82cb1a842 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/mapping_file_provider.cc @@ -0,0 +1,175 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "phonenumbers/geocoding/mapping_file_provider.h" + +#include +#include +#include +#include +#include + +#include "phonenumbers/geocoding/geocoding_data.h" + +namespace i18n { +namespace phonenumbers { + +using std::string; + +namespace { + +struct NormalizedLocale { + const char* locale; + const char* normalized_locale; +}; + +const NormalizedLocale kNormalizedLocales[] = { + {"zh_TW", "zh_Hant"}, + {"zh_HK", "zh_Hant"}, + {"zh_MO", "zh_Hant"}, +}; + +const char* GetNormalizedLocale(const string& full_locale) { + const int size = sizeof(kNormalizedLocales) / sizeof(*kNormalizedLocales); + for (int i = 0; i != size; ++i) { + if (full_locale.compare(kNormalizedLocales[i].locale) == 0) { + return kNormalizedLocales[i].normalized_locale; + } + } + return NULL; +} + +void AppendLocalePart(const string& part, string* full_locale) { + if (!part.empty()) { + full_locale->append("_"); + full_locale->append(part); + } +} + +void ConstructFullLocale(const string& language, const string& script, const + string& region, string* full_locale) { + full_locale->assign(language); + AppendLocalePart(script, full_locale); + AppendLocalePart(region, full_locale); +} + +// Returns true if s1 comes strictly before s2 in lexicographic order. +bool IsLowerThan(const char* s1, const char* s2) { + return strcmp(s1, s2) < 0; +} + +// Returns true if languages contains language. +bool HasLanguage(const CountryLanguages* languages, const string& language) { + const char** const start = languages->available_languages; + const char** const end = start + languages->available_languages_size; + const char** const it = + std::lower_bound(start, end, language.c_str(), IsLowerThan); + return it != end && strcmp(language.c_str(), *it) == 0; +} + +} // namespace + +MappingFileProvider::MappingFileProvider( + const int* country_calling_codes, int country_calling_codes_size, + country_languages_getter get_country_languages) + : country_calling_codes_(country_calling_codes), + country_calling_codes_size_(country_calling_codes_size), + get_country_languages_(get_country_languages) { +} + +const string& MappingFileProvider::GetFileName(int country_calling_code, + const string& language, + const string& script, + const string& region, + string* filename) const { + filename->clear(); + if (language.empty()) { + return *filename; + } + const int* const country_calling_codes_end = country_calling_codes_ + + country_calling_codes_size_; + const int* const it = + std::lower_bound(country_calling_codes_, + country_calling_codes_end, + country_calling_code); + if (it == country_calling_codes_end || *it != country_calling_code) { + return *filename; + } + const CountryLanguages* const langs = + get_country_languages_(it - country_calling_codes_); + if (langs->available_languages_size > 0) { + string language_code; + FindBestMatchingLanguageCode(langs, language, script, region, + &language_code); + if (!language_code.empty()) { + std::stringstream filename_buf; + filename_buf << country_calling_code << "_" << language_code; + *filename = filename_buf.str(); + } + } + return *filename; +} + +void MappingFileProvider::FindBestMatchingLanguageCode( + const CountryLanguages* languages, const string& language, + const string& script, const string& region, string* best_match) const { + string full_locale; + ConstructFullLocale(language, script, region, &full_locale); + const char* const normalized_locale = GetNormalizedLocale(full_locale); + if (normalized_locale != NULL) { + string normalized_locale_str(normalized_locale); + if (HasLanguage(languages, normalized_locale_str)) { + best_match->swap(normalized_locale_str); + return; + } + } + + if (HasLanguage(languages, full_locale)) { + best_match->swap(full_locale); + return; + } + + if (script.empty() != region.empty()) { + if (HasLanguage(languages, language)) { + *best_match = language; + return; + } + } else if (!script.empty() && !region.empty()) { + string lang_with_script(language); + lang_with_script.append("_"); + lang_with_script.append(script); + if (HasLanguage(languages, lang_with_script)) { + best_match->swap(lang_with_script); + return; + } + } + + string lang_with_region(language); + lang_with_region.append("_"); + lang_with_region.append(region); + if (HasLanguage(languages, lang_with_region)) { + best_match->swap(lang_with_region); + return; + } + if (HasLanguage(languages, language)) { + *best_match = language; + return; + } + best_match->clear(); +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumbers/geocoding/mapping_file_provider.h b/cpp/src/phonenumbers/geocoding/mapping_file_provider.h new file mode 100644 index 000000000..1234a5e57 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/mapping_file_provider.h @@ -0,0 +1,75 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Patrick Mezard + +#ifndef I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_ +#define I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_ + +#include + +#include "base/basictypes.h" + +namespace i18n { +namespace phonenumbers { + +using std::string; + +struct CountryLanguages; + +// A utility which knows the data files that are available for the geocoder to +// use. The data files contain mappings from phone number prefixes to text +// descriptions, and are organized by country calling code and language that the +// text descriptions are in. +class MappingFileProvider { + public: + typedef const CountryLanguages* (*country_languages_getter)(int index); + + // Initializes a MappingFileProvider with country_calling_codes, a sorted + // list of country_calling_code_size calling codes, and a function + // get_country_languages(int index) returning the CountryLanguage information + // related to the country code at index in country_calling_codes. + MappingFileProvider(const int* country_calling_codes, + int country_calling_code_size, + country_languages_getter get_country_languages); + + // Returns the name of the file that contains the mapping data for the + // country_calling_code in the language specified, or an empty string if no + // such file can be found. language is a two-letter lowercase ISO language + // codes as defined by ISO 639-1. script is a four-letter titlecase (the first + // letter is uppercase and the rest of the letters are lowercase) ISO script + // codes as defined in ISO 15924. region is a two-letter uppercase ISO country + // codes as defined by ISO 3166-1. + const string& GetFileName(int country_calling_code, const string& language, + const string& script, const string& region, string* + filename) const; + + private: + void FindBestMatchingLanguageCode(const CountryLanguages* languages, + const string& language, + const string& script, + const string& region, + string* best_match) const; + + const int* const country_calling_codes_; + const int country_calling_codes_size_; + const country_languages_getter get_country_languages_; + + DISALLOW_COPY_AND_ASSIGN(MappingFileProvider); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_GEOCODING_MAPPING_FILE_PROVIDER_H_ diff --git a/cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc b/cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc new file mode 100644 index 000000000..81057af68 --- /dev/null +++ b/cpp/test/phonenumbers/geocoding/mapping_file_provider_test.cc @@ -0,0 +1,91 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "phonenumbers/geocoding/mapping_file_provider.h" + +#include // NOLINT(build/include_order) + +#include "phonenumbers/geocoding/geocoding_data.h" + +namespace i18n { +namespace phonenumbers { + +using std::string; + +namespace { + +#define COUNTRY_LANGUAGES(code, languagelist) \ + const char* country_languages_##code[] = languagelist; \ + const CountryLanguages country_##code = { \ + country_languages_##code, \ + sizeof(country_languages_##code) / sizeof(*country_languages_##code), \ + }; + +// Array literals cannot be passed as regular macro arguments, the separating +// commas are interpreted as macro arguments separators. The following dummy +// variadic macro wraps the array commas, and appears as a single argument to an +// outer macro call. +#define ARRAY_WRAPPER(...) __VA_ARGS__ + +const int country_calling_codes[] = {1, 41, 65, 86}; + +const int country_calling_codes_size = + sizeof(country_calling_codes) / sizeof(*country_calling_codes); + +COUNTRY_LANGUAGES(1, ARRAY_WRAPPER({"en"})); +COUNTRY_LANGUAGES(41, ARRAY_WRAPPER({"de", "fr", "it", "rm"})); +COUNTRY_LANGUAGES(65, ARRAY_WRAPPER({"en", "ms", "ta", "zh_Hans"})); +COUNTRY_LANGUAGES(86, ARRAY_WRAPPER({"en", "zh", "zh_Hant"})); + +const CountryLanguages* country_languages[] = { + &country_1, + &country_41, + &country_65, + &country_86, +}; + +const CountryLanguages* test_get_country_languages(int index) { + return country_languages[index]; +} + +} // namespace + +TEST(MappingFileProviderTest, TestGetFileName) { + MappingFileProvider provider(country_calling_codes, + country_calling_codes_size, + test_get_country_languages); + + string filename; + EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "", &filename)); + EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "US", &filename)); + EXPECT_EQ("1_en", provider.GetFileName(1, "en", "", "GB", &filename)); + EXPECT_EQ("41_de", provider.GetFileName(41, "de", "", "CH", &filename)); + EXPECT_EQ("", provider.GetFileName(44, "en", "", "GB", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "CN", &filename)); + EXPECT_EQ("", provider.GetFileName(86, "", "", "CN", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "CN", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "Hans", "SG", &filename)); + EXPECT_EQ("86_zh", provider.GetFileName(86, "zh", "", "SG", &filename)); + EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "", "TW", &filename)); + EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "", "HK", &filename)); + EXPECT_EQ("86_zh_Hant", provider.GetFileName(86, "zh", "Hant", "TW", + &filename)); +} + +} // namespace phonenumbers +} // namespace i18n