Browse Source

CPP: Implement PhoneNumberOfflineGeocoder.

pull/567/head
Philippe Liard 13 years ago
committed by Mihaela Rosca
parent
commit
7f40b02808
4 changed files with 586 additions and 0 deletions
  1. +2
    -0
      cpp/CMakeLists.txt
  2. +216
    -0
      cpp/src/phonenumbers/geocoding/phonenumber_offline_geocoder.cc
  3. +164
    -0
      cpp/src/phonenumbers/geocoding/phonenumber_offline_geocoder.h
  4. +204
    -0
      cpp/test/phonenumbers/geocoding/phonenumber_offline_geocoder_test.cc

+ 2
- 0
cpp/CMakeLists.txt View File

@ -185,6 +185,7 @@ set (
"src/phonenumbers/geocoding/default_map_storage.cc"
"src/phonenumbers/geocoding/geocoding_data.cc"
"src/phonenumbers/geocoding/mapping_file_provider.cc"
"src/phonenumbers/geocoding/phonenumber_offline_geocoder.cc"
"src/phonenumbers/logger.cc"
"src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
"src/phonenumbers/phonenumber.cc"
@ -379,6 +380,7 @@ set (TEST_SOURCES
"test/phonenumbers/geocoding/geocoding_data_test.cc"
"test/phonenumbers/geocoding/geocoding_test_data.cc"
"test/phonenumbers/geocoding/mapping_file_provider_test.cc"
"test/phonenumbers/geocoding/phonenumber_offline_geocoder_test.cc"
"test/phonenumbers/logger_test.cc"
"test/phonenumbers/phonenumberutil_test.cc"
"test/phonenumbers/regexp_adapter_test.cc"


+ 216
- 0
cpp/src/phonenumbers/geocoding/phonenumber_offline_geocoder.cc View File

@ -0,0 +1,216 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: Patrick Mezard
#include "phonenumbers/geocoding/phonenumber_offline_geocoder.h"
#include <algorithm>
#include <map>
#include <string>
#include <unicode/unistr.h> // NOLINT(build/include_order)
#include "phonenumbers/geocoding/area_code_map.h"
#include "phonenumbers/geocoding/geocoding_data.h"
#include "phonenumbers/geocoding/mapping_file_provider.h"
#include "phonenumbers/phonenumberutil.h"
#include "phonenumbers/stl_util.h"
namespace i18n {
namespace phonenumbers {
using icu::UnicodeString;
using std::map;
using std::string;
namespace {
// Returns true if s1 comes strictly before s2 in lexicographic order.
bool IsLowerThan(const char* s1, const char* s2) {
return strcmp(s1, s2) < 0;
}
} // namespace
PhoneNumberOfflineGeocoder::PhoneNumberOfflineGeocoder() {
Init(get_country_calling_codes(), get_country_calling_codes_size(),
get_country_languages, get_prefix_language_code_pairs(),
get_prefix_language_code_pairs_size(), get_prefix_descriptions);
}
PhoneNumberOfflineGeocoder::PhoneNumberOfflineGeocoder(
const int* country_calling_codes, int country_calling_codes_size,
country_languages_getter get_country_languages,
const char** prefix_language_code_pairs,
int prefix_language_code_pairs_size,
prefix_descriptions_getter get_prefix_descriptions) {
Init(country_calling_codes, country_calling_codes_size,
get_country_languages, prefix_language_code_pairs,
prefix_language_code_pairs_size, get_prefix_descriptions);
}
void PhoneNumberOfflineGeocoder::Init(
const int* country_calling_codes, int country_calling_codes_size,
country_languages_getter get_country_languages,
const char** prefix_language_code_pairs,
int prefix_language_code_pairs_size,
prefix_descriptions_getter get_prefix_descriptions) {
phone_util_ = PhoneNumberUtil::GetInstance();
provider_.reset(new MappingFileProvider(country_calling_codes,
country_calling_codes_size,
get_country_languages));
prefix_language_code_pairs_ = prefix_language_code_pairs;
prefix_language_code_pairs_size_ = prefix_language_code_pairs_size;
get_prefix_descriptions_ = get_prefix_descriptions;
}
PhoneNumberOfflineGeocoder::~PhoneNumberOfflineGeocoder() {
STLDeleteContainerPairSecondPointers(
available_maps_.begin(), available_maps_.end());
}
const AreaCodeMap* PhoneNumberOfflineGeocoder::GetPhonePrefixDescriptions(
int prefix, const string& language, const string& script,
const string& region) const {
string filename;
provider_->GetFileName(prefix, language, script, region, &filename);
if (filename.empty()) {
return NULL;
}
AreaCodeMaps::const_iterator it = available_maps_.find(filename);
if (it == available_maps_.end()) {
it = LoadAreaCodeMapFromFile(filename);
if (it == available_maps_.end()) {
return NULL;
}
}
return it->second;
}
PhoneNumberOfflineGeocoder::AreaCodeMaps::const_iterator
PhoneNumberOfflineGeocoder::LoadAreaCodeMapFromFile(
const string& filename) const {
const char** const prefix_language_code_pairs_end =
prefix_language_code_pairs_ + prefix_language_code_pairs_size_;
const char** const prefix_language_code_pair =
std::lower_bound(prefix_language_code_pairs_,
prefix_language_code_pairs_end,
filename.c_str(), IsLowerThan);
if (prefix_language_code_pair != prefix_language_code_pairs_end &&
filename.compare(*prefix_language_code_pair) == 0) {
AreaCodeMap* const m = new AreaCodeMap();
m->ReadAreaCodeMap(get_prefix_descriptions_(
prefix_language_code_pair - prefix_language_code_pairs_));
return available_maps_.insert(AreaCodeMaps::value_type(filename, m)).first;
}
return available_maps_.end();
}
string PhoneNumberOfflineGeocoder::GetCountryNameForNumber(
const PhoneNumber& number, const Locale& language) const {
string region_code;
phone_util_->GetRegionCodeForNumber(number, &region_code);
return GetRegionDisplayName(&region_code, language);
}
string PhoneNumberOfflineGeocoder::GetRegionDisplayName(
const string* region_code, const Locale& language) const {
if (region_code == NULL || region_code->compare("ZZ") == 0 ||
region_code->compare(
PhoneNumberUtil::kRegionCodeForNonGeoEntity) == 0) {
return "";
}
UnicodeString udisplay_country;
icu::Locale("", region_code->c_str()).getDisplayCountry(
language, udisplay_country);
string display_country;
udisplay_country.toUTF8String(display_country);
return display_country;
}
string PhoneNumberOfflineGeocoder::GetDescriptionForValidNumber(
const PhoneNumber& number, const Locale& language) const {
const char* const description = GetAreaDescription(
number, language.getLanguage(), "", language.getCountry());
return *description != '\0'
? description
: GetCountryNameForNumber(number, language);
}
string PhoneNumberOfflineGeocoder::GetDescriptionForValidNumber(
const PhoneNumber& number, const Locale& language,
const string& user_region) const {
// If the user region matches the number's region, then we just show the
// lower-level description, if one exists - if no description exists, we will
// show the region(country) name for the number.
string region_code;
phone_util_->GetRegionCodeForNumber(number, &region_code);
if (user_region.compare(region_code) == 0) {
return GetDescriptionForValidNumber(number, language);
}
// Otherwise, we just show the region(country) name for now.
return GetRegionDisplayName(&region_code, language);
}
string PhoneNumberOfflineGeocoder::GetDescriptionForNumber(
const PhoneNumber& number, const Locale& locale) const {
if (!phone_util_->IsValidNumber(number)) {
return "";
}
return GetDescriptionForValidNumber(number, locale);
}
string PhoneNumberOfflineGeocoder::GetDescriptionForNumber(
const PhoneNumber& number, const Locale& language,
const string& user_region) const {
if (!phone_util_->IsValidNumber(number)) {
return "";
}
return GetDescriptionForValidNumber(number, language, user_region);
}
const char* PhoneNumberOfflineGeocoder::GetAreaDescription(
const PhoneNumber& number, const string& lang, const string& script,
const string& region) const {
const int country_calling_code = number.country_code();
// NANPA area is not split in C++ code.
const int phone_prefix = country_calling_code;
const AreaCodeMap* const descriptions = GetPhonePrefixDescriptions(
phone_prefix, lang, script, region);
const char* description = descriptions ? descriptions->Lookup(number) : NULL;
// When a location is not available in the requested language, fall back to
// English.
if ((!description || *description == '\0') && MayFallBackToEnglish(lang)) {
const AreaCodeMap* default_descriptions = GetPhonePrefixDescriptions(
phone_prefix, "en", "", "");
if (!default_descriptions) {
return "";
}
description = default_descriptions->Lookup(number);
}
return description ? description : "";
}
// Don't fall back to English if the requested language is among the following:
// - Chinese
// - Japanese
// - Korean
bool PhoneNumberOfflineGeocoder::MayFallBackToEnglish(
const string& lang) const {
return lang.compare("zh") && lang.compare("ja") && lang.compare("ko");
}
} // namespace phonenumbers
} // namespace i18n

+ 164
- 0
cpp/src/phonenumbers/geocoding/phonenumber_offline_geocoder.h View File

@ -0,0 +1,164 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#ifndef I18N_PHONENUMBERS_GEOCODING_PHONENUMBER_OFFLINE_GEOCODER_H_
#define I18N_PHONENUMBERS_GEOCODING_PHONENUMBER_OFFLINE_GEOCODER_H_
#include <map>
#include <string>
#include <unicode/locid.h> // NOLINT(build/include_order)
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
namespace i18n {
namespace phonenumbers {
using std::map;
using std::string;
class AreaCodeMap;
class MappingFileProvider;
class PhoneNumber;
class PhoneNumberUtil;
struct CountryLanguages;
struct PrefixDescriptions;
typedef icu::Locale Locale;
// An offline geocoder which provides geographical information related to a
// phone number.
class PhoneNumberOfflineGeocoder {
private:
typedef map<string, const AreaCodeMap*> AreaCodeMaps;
public:
typedef const CountryLanguages* (*country_languages_getter)(int index);
typedef const PrefixDescriptions* (*prefix_descriptions_getter)(int index);
PhoneNumberOfflineGeocoder();
// For tests
PhoneNumberOfflineGeocoder(
const int* country_calling_codes,
int country_calling_codes_size,
country_languages_getter get_country_languages,
const char** prefix_language_code_pairs,
int prefix_language_code_pairs_size,
prefix_descriptions_getter get_prefix_descriptions);
virtual ~PhoneNumberOfflineGeocoder();
// Returns a text description for the given phone number, in the language
// provided. The description might consist of the name of the country where
// the phone number is from, or the name of the geographical area the phone
// number is from if more detailed information is available.
//
// This method assumes the validity of the number passed in has already been
// checked.
string GetDescriptionForValidNumber(const PhoneNumber& number,
const Locale& language) const;
// As per GetDescriptionForValidNumber(PhoneNumber, Locale) but also considers
// the region of the user. If the phone number is from the same region as the
// user, only a lower-level description will be returned, if one exists.
// Otherwise, the phone number's region will be returned, with optionally some
// more detailed information.
//
// For example, for a user from the region "US" (United States), we would show
// "Mountain View, CA" for a particular number, omitting the United States
// from the description. For a user from the United Kingdom (region "GB"), for
// the same number we may show "Mountain View, CA, United States" or even just
// "United States".
//
// This method assumes the validity of the number passed in has already been
// checked.
//
// user_region is the region code for a given user. This region will be
// omitted from the description if the phone number comes from this region. It
// is a two-letter uppercase ISO country code as defined by ISO 3166-1.
string GetDescriptionForValidNumber(const PhoneNumber& number,
const Locale& language, const string& user_region) const;
// As per GetDescriptionForValidNumber(PhoneNumber, Locale) but explicitly
// checks the validity of the number passed in.
string GetDescriptionForNumber(const PhoneNumber& number,
const Locale& locale) const;
// As per GetDescriptionForValidNumber(PhoneNumber, Locale, String) but
string GetDescriptionForNumber(const PhoneNumber& number,
const Locale& language, const string& user_region) const;
private:
void Init(const int* country_calling_codes,
int country_calling_codes_size,
country_languages_getter get_country_languages,
const char** prefix_language_code_pairs,
int prefix_language_code_pairs_size,
prefix_descriptions_getter get_prefix_descriptions);
const AreaCodeMap* GetPhonePrefixDescriptions(int prefix,
const string& language, const string& script, const string& region) const;
AreaCodeMaps::const_iterator LoadAreaCodeMapFromFile(
const string& filename) const;
// Returns the customary display name in the given language for the given
// region.
string GetRegionDisplayName(const string* region_code,
const Locale& language) const;
// Returns the customary display name in the given language for the given
// territory the phone number is from.
string GetCountryNameForNumber(const PhoneNumber& number,
const Locale& language) const;
// Returns an area-level text description in the given language for the given
// phone number, or an empty string.
// lang is a two-letter lowercase ISO language codes as defined by ISO 639-1.
// script is a four-letter titlecase (the first letter is uppercase and the
// rest of the letters are lowercase) ISO script codes as defined in ISO
// 15924.
// region is a two-letter uppercase ISO country codes as defined by ISO
// 3166-1.
const char* GetAreaDescription(const PhoneNumber& number, const string& lang,
const string& script,
const string& region) const;
bool MayFallBackToEnglish(const string& lang) const;
private:
const PhoneNumberUtil* phone_util_;
// The MappingFileProvider knows for which combination of country calling code
// and language a phone prefix mapping file is available in the file system,
// so that a file can be loaded when needed.
scoped_ptr<const MappingFileProvider> provider_;
const char** prefix_language_code_pairs_;
int prefix_language_code_pairs_size_;
prefix_descriptions_getter get_prefix_descriptions_;
// A mapping from country calling codes languages pairs to the corresponding
// phone prefix map that has been loaded.
mutable AreaCodeMaps available_maps_;
DISALLOW_COPY_AND_ASSIGN(PhoneNumberOfflineGeocoder);
};
} // namespace phonenumbers
} // namespace i18n
#endif /* I18N_PHONENUMBERS_GEOCODING_PHONENUMBER_OFFLINE_GEOCODER_H_ */

+ 204
- 0
cpp/test/phonenumbers/geocoding/phonenumber_offline_geocoder_test.cc View File

@ -0,0 +1,204 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#include "phonenumbers/geocoding/phonenumber_offline_geocoder.h"
#include <gtest/gtest.h>
#include <unicode/locid.h>
#include "phonenumbers/geocoding/geocoding_test_data.h"
#include "phonenumbers/phonenumber.h"
#include "phonenumbers/phonenumber.pb.h"
namespace i18n {
namespace phonenumbers {
using icu::Locale;
namespace {
PhoneNumber MakeNumber(int32 country_code, uint64 national_number) {
PhoneNumber n;
n.set_country_code(country_code);
n.set_national_number(national_number);
return n;
}
const Locale kEnglishLocale = Locale("en", "GB");
const Locale kFrenchLocale = Locale("fr", "FR");
const Locale kGermanLocale = Locale("de", "DE");
const Locale kItalianLocale = Locale("it", "IT");
const Locale kKoreanLocale = Locale("ko", "KR");
const Locale kSimplifiedChineseLocale = Locale("zh", "CN");
} // namespace
class PhoneNumberOfflineGeocoderTest : public testing::Test {
protected:
PhoneNumberOfflineGeocoderTest() :
KO_NUMBER1(MakeNumber(82, 22123456L)),
KO_NUMBER2(MakeNumber(82, 322123456L)),
KO_NUMBER3(MakeNumber(82, 6421234567L)),
KO_INVALID_NUMBER(MakeNumber(82, 1234L)),
US_NUMBER1(MakeNumber(1, 6502530000L)),
US_NUMBER2(MakeNumber(1, 6509600000L)),
US_NUMBER3(MakeNumber(1, 2128120000L)),
US_NUMBER4(MakeNumber(1, 6174240000L)),
US_INVALID_NUMBER(MakeNumber(1, 123456789L)),
BS_NUMBER1(MakeNumber(1, 2423651234L)),
AU_NUMBER(MakeNumber(61, 236618300L)),
NUMBER_WITH_INVALID_COUNTRY_CODE(MakeNumber(999, 2423651234L)),
INTERNATIONAL_TOLL_FREE(MakeNumber(800, 12345678L)) {
}
virtual void SetUp() {
geocoder_.reset(
new PhoneNumberOfflineGeocoder(
get_test_country_calling_codes(),
get_test_country_calling_codes_size(),
get_test_country_languages,
get_test_prefix_language_code_pairs(),
get_test_prefix_language_code_pairs_size(),
get_test_prefix_descriptions));
}
protected:
scoped_ptr<PhoneNumberOfflineGeocoder> geocoder_;
const PhoneNumber KO_NUMBER1;
const PhoneNumber KO_NUMBER2;
const PhoneNumber KO_NUMBER3;
const PhoneNumber KO_INVALID_NUMBER;
const PhoneNumber US_NUMBER1;
const PhoneNumber US_NUMBER2;
const PhoneNumber US_NUMBER3;
const PhoneNumber US_NUMBER4;
const PhoneNumber US_INVALID_NUMBER;
const PhoneNumber BS_NUMBER1;
const PhoneNumber AU_NUMBER;
const PhoneNumber NUMBER_WITH_INVALID_COUNTRY_CODE;
const PhoneNumber INTERNATIONAL_TOLL_FREE;
};
TEST_F(PhoneNumberOfflineGeocoderTest,
TestGetDescriptionForNumberWithNoDataFile) {
// No data file containing mappings for US numbers is available in Chinese for
// the unittests. As a result, the country name of United States in simplified
// Chinese is returned.
// "\u7F8E\u56FD" (unicode escape sequences are not always supported)
EXPECT_EQ("\xe7""\xbe""\x8e""\xe5""\x9b""\xbd",
geocoder_->GetDescriptionForNumber(US_NUMBER1,
kSimplifiedChineseLocale));
EXPECT_EQ("Bahamas",
geocoder_->GetDescriptionForNumber(BS_NUMBER1, Locale("en", "US")));
EXPECT_EQ("Australia",
geocoder_->GetDescriptionForNumber(AU_NUMBER, Locale("en", "US")));
EXPECT_EQ("",
geocoder_->GetDescriptionForNumber(NUMBER_WITH_INVALID_COUNTRY_CODE,
Locale("en", "US")));
EXPECT_EQ("",
geocoder_->GetDescriptionForNumber(INTERNATIONAL_TOLL_FREE,
Locale("en", "US")));
}
TEST_F(PhoneNumberOfflineGeocoderTest,
TestGetDescriptionForNumberWithMissingPrefix) {
// Test that the name of the country is returned when the number passed in is
// valid but not covered by the geocoding data file.
EXPECT_EQ("United States",
geocoder_->GetDescriptionForNumber(US_NUMBER4, Locale("en", "US")));
}
TEST_F(PhoneNumberOfflineGeocoderTest, TestGetDescriptionForNumber_en_US) {
EXPECT_EQ("CA",
geocoder_->GetDescriptionForNumber(US_NUMBER1, Locale("en", "US")));
EXPECT_EQ("Mountain View, CA",
geocoder_->GetDescriptionForNumber(US_NUMBER2, Locale("en", "US")));
EXPECT_EQ("New York, NY",
geocoder_->GetDescriptionForNumber(US_NUMBER3, Locale("en", "US")));
}
TEST_F(PhoneNumberOfflineGeocoderTest, TestGetDescriptionForKoreanNumber) {
EXPECT_EQ("Seoul",
geocoder_->GetDescriptionForNumber(KO_NUMBER1, kEnglishLocale));
EXPECT_EQ("Incheon",
geocoder_->GetDescriptionForNumber(KO_NUMBER2, kEnglishLocale));
EXPECT_EQ("Jeju",
geocoder_->GetDescriptionForNumber(KO_NUMBER3, kEnglishLocale));
// "\uC11C\uC6B8"
EXPECT_EQ("\xec""\x84""\x9c""\xec""\x9a""\xb8",
geocoder_->GetDescriptionForNumber(KO_NUMBER1, kKoreanLocale));
// "\uC778\uCC9C"
EXPECT_EQ("\xec""\x9d""\xb8""\xec""\xb2""\x9c",
geocoder_->GetDescriptionForNumber(KO_NUMBER2, kKoreanLocale));
}
TEST_F(PhoneNumberOfflineGeocoderTest, TestGetDescriptionForFallBack) {
// No fallback, as the location name for the given phone number is available
// in the requested language.
EXPECT_EQ("Kalifornien",
geocoder_->GetDescriptionForNumber(US_NUMBER1, kGermanLocale));
// German falls back to English.
EXPECT_EQ("New York, NY",
geocoder_->GetDescriptionForNumber(US_NUMBER3, kGermanLocale));
// Italian falls back to English.
EXPECT_EQ("CA",
geocoder_->GetDescriptionForNumber(US_NUMBER1, kItalianLocale));
// Korean doesn't fall back to English.
// "\uB300\uD55C\uBBFC\uAD6D"
EXPECT_EQ("\xeb""\x8c""\x80""\xed""\x95""\x9c""\xeb""\xaf""\xbc""\xea""\xb5"
"\xad",
geocoder_->GetDescriptionForNumber(KO_NUMBER3, kKoreanLocale));
}
TEST_F(PhoneNumberOfflineGeocoderTest,
TestGetDescriptionForNumberWithUserRegion) {
// User in Italy, American number. We should just show United States, in
// Spanish, and not more detailed information.
EXPECT_EQ("Estados Unidos",
geocoder_->GetDescriptionForNumber(US_NUMBER1, Locale("es", "ES"),
"IT"));
// Unknown region - should just show country name.
EXPECT_EQ("Estados Unidos",
geocoder_->GetDescriptionForNumber(US_NUMBER1, Locale("es", "ES"),
"ZZ"));
// User in the States, language German, should show detailed data.
EXPECT_EQ("Kalifornien",
geocoder_->GetDescriptionForNumber(US_NUMBER1, kGermanLocale,
"US"));
// User in the States, language French, no data for French, so we fallback to
// English detailed data.
EXPECT_EQ("CA",
geocoder_->GetDescriptionForNumber(US_NUMBER1, kFrenchLocale,
"US"));
// Invalid number - return an empty string.
EXPECT_EQ("",
geocoder_->GetDescriptionForNumber(US_INVALID_NUMBER,
kEnglishLocale, "US"));
}
TEST_F(PhoneNumberOfflineGeocoderTest, TestGetDescriptionForInvalidNumber) {
EXPECT_EQ("", geocoder_->GetDescriptionForNumber(KO_INVALID_NUMBER,
kEnglishLocale));
EXPECT_EQ("", geocoder_->GetDescriptionForNumber(US_INVALID_NUMBER,
kEnglishLocale));
}
} // namespace phonenumbers
} // namespace i18n

Loading…
Cancel
Save