From 31b159870d975ae87320a268e7336af733e9dbfc Mon Sep 17 00:00:00 2001 From: Philippe Liard Date: Fri, 1 Jun 2012 15:33:18 +0000 Subject: [PATCH] CPP: Implement AreaCodeMap and DefaultMapStorage. --- cpp/CMakeLists.txt | 3 + .../phonenumbers/geocoding/area_code_map.cc | 100 ++++++++++++ .../phonenumbers/geocoding/area_code_map.h | 76 +++++++++ .../area_code_map_storage_strategy.h | 62 ++++++++ .../geocoding/default_map_storage.cc | 71 +++++++++ .../geocoding/default_map_storage.h | 67 ++++++++ cpp/src/phonenumbers/stringutil.cc | 8 + cpp/src/phonenumbers/stringutil.h | 4 + .../geocoding/area_code_map_test.cc | 146 ++++++++++++++++++ 9 files changed, 537 insertions(+) create mode 100644 cpp/src/phonenumbers/geocoding/area_code_map.cc create mode 100644 cpp/src/phonenumbers/geocoding/area_code_map.h create mode 100644 cpp/src/phonenumbers/geocoding/area_code_map_storage_strategy.h create mode 100644 cpp/src/phonenumbers/geocoding/default_map_storage.cc create mode 100644 cpp/src/phonenumbers/geocoding/default_map_storage.h create mode 100644 cpp/test/phonenumbers/geocoding/area_code_map_test.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5d0a44204..2557fb62f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -159,6 +159,8 @@ set ( "src/base/string_piece.cc" "src/phonenumbers/asyoutypeformatter.cc" "src/phonenumbers/default_logger.cc" + "src/phonenumbers/geocoding/area_code_map.cc" + "src/phonenumbers/geocoding/default_map_storage.cc" "src/phonenumbers/logger.cc" "src/phonenumbers/metadata.h" # Generated by build tools. "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers. @@ -310,6 +312,7 @@ add_dependencies (phonenumber_testing generate-sources ${TEST_METADATA_TARGET}) set (TEST_SOURCES "test/phonenumbers/asyoutypeformatter_test.cc" + "test/phonenumbers/geocoding/area_code_map_test.cc" "test/phonenumbers/logger_test.cc" "test/phonenumbers/phonenumberutil_test.cc" "test/phonenumbers/regexp_adapter_test.cc" diff --git a/cpp/src/phonenumbers/geocoding/area_code_map.cc b/cpp/src/phonenumbers/geocoding/area_code_map.cc new file mode 100644 index 000000000..7e869be99 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/area_code_map.cc @@ -0,0 +1,100 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "phonenumbers/geocoding/area_code_map.h" + +#include +#include +#include + +#include "phonenumbers/geocoding/area_code_map_storage_strategy.h" +#include "phonenumbers/geocoding/default_map_storage.h" +#include "phonenumbers/phonenumber.pb.h" +#include "phonenumbers/phonenumberutil.h" +#include "phonenumbers/stringutil.h" + +namespace i18n { +namespace phonenumbers { + +AreaCodeMap::AreaCodeMap() + : phone_util_(*PhoneNumberUtil::GetInstance()) { +} + +AreaCodeMap::~AreaCodeMap() { +} + +AreaCodeMapStorageStrategy* AreaCodeMap::CreateDefaultMapStorage() const { + return new DefaultMapStorage(); +} + +void AreaCodeMap::ReadAreaCodeMap(const map& area_codes) { + AreaCodeMapStorageStrategy* storage = CreateDefaultMapStorage(); + storage->ReadFromMap(area_codes); + storage_.reset(storage); +} + +const string* AreaCodeMap::Lookup(const PhoneNumber& number) const { + const int entries = storage_->GetNumOfEntries(); + if (!entries) { + return NULL; + } + + string national_number; + phone_util_.GetNationalSignificantNumber(number, &national_number); + int64 phone_prefix; + safe_strto64(SimpleItoa(number.country_code()) + national_number, + &phone_prefix); + + const set& lengths = storage_->GetPossibleLengths(); + int current_index = entries - 1; + for (set::const_reverse_iterator lengths_it = lengths.rbegin(); + lengths_it != lengths.rend(); ++lengths_it) { + const int possible_length = *lengths_it; + string phone_prefix_str = SimpleItoa(phone_prefix); + if (static_cast(phone_prefix_str.length()) > possible_length) { + safe_strto64(phone_prefix_str.substr(0, possible_length), &phone_prefix); + } + current_index = BinarySearch(0, current_index, phone_prefix); + if (current_index < 0) { + return NULL; + } + const int current_prefix = storage_->GetPrefix(current_index); + if (phone_prefix == current_prefix) { + return &storage_->GetDescription(current_index); + } + } + return NULL; +} + +int AreaCodeMap::BinarySearch(int start, int end, int64 value) const { + int current = 0; + while (start <= end) { + current = (start + end) / 2; + int current_value = storage_->GetPrefix(current); + if (current_value == value) { + return current; + } else if (current_value > value) { + --current; + end = current; + } else { + start = current + 1; + } + } + return current; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumbers/geocoding/area_code_map.h b/cpp/src/phonenumbers/geocoding/area_code_map.h new file mode 100644 index 000000000..24df22d1f --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/area_code_map.h @@ -0,0 +1,76 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#ifndef I18N_PHONENUMBERS_AREA_CODE_MAP_H_ +#define I18N_PHONENUMBERS_AREA_CODE_MAP_H_ + +#include +#include + +#include "base/basictypes.h" +#include "base/memory/scoped_ptr.h" + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::string; + +class AreaCodeMapStorageStrategy; +class PhoneNumber; +class PhoneNumberUtil; + +// A utility that maps phone number prefixes to a string describing the +// geographical area the prefix covers. +class AreaCodeMap { + public: + AreaCodeMap(); + ~AreaCodeMap(); + + // Returns the description of the geographical area the number corresponds + // to. This method distinguishes the case of an invalid prefix and a prefix + // for which the name is not available in the current language. If the + // description is not available in the current language an empty string is + // returned. If no description was found for the provided number, null is + // returned. + const string* Lookup(const PhoneNumber& number) const; + + // Creates an AreaCodeMap initialized with area_codes. Note that the + // underlying implementation of this method is expensive thus should + // not be called by time-critical applications. + // + // area_codes maps phone number prefixes to geographical area description. + void ReadAreaCodeMap(const map& area_codes); + + private: + AreaCodeMapStorageStrategy* CreateDefaultMapStorage() const; + + // Does a binary search for value in the provided array from start to end + // (inclusive). Returns the position if {@code value} is found; otherwise, + // returns the position which has the largest value that is less than value. + // This means if value is the smallest, -1 will be returned. + int BinarySearch(int start, int end, int64 value) const; + + const PhoneNumberUtil& phone_util_; + scoped_ptr storage_; + + DISALLOW_COPY_AND_ASSIGN(AreaCodeMap); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif /* I18N_PHONENUMBERS_AREA_CODE_MAP_H_ */ diff --git a/cpp/src/phonenumbers/geocoding/area_code_map_storage_strategy.h b/cpp/src/phonenumbers/geocoding/area_code_map_storage_strategy.h new file mode 100644 index 000000000..e25c1f070 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/area_code_map_storage_strategy.h @@ -0,0 +1,62 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard +// +// Interface for phone numbers area prefixes storage classes. + +#ifndef I18N_PHONENUMBERS_AREA_CODE_MAP_STRATEGY_H_ +#define I18N_PHONENUMBERS_AREA_CODE_MAP_STRATEGY_H_ + +#include +#include +#include + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::set; +using std::string; + +// Abstracts the way area code data is stored into memory. It is used by +// AreaCodeMap to support the most space-efficient storage strategy according +// to the provided data. +class AreaCodeMapStorageStrategy { + public: + virtual ~AreaCodeMapStorageStrategy() {} + + // Returns the phone number prefix located at the provided index. + virtual int GetPrefix(int index) const = 0; + + // Gets the description corresponding to the phone number prefix located + // at the provided index. If the description is not available in the current + // language an empty string is returned. + virtual const string& GetDescription(int index) const = 0; + + // Sets the internal state of the underlying storage implementation from the + // provided area_codes that maps phone number prefixes to description strings. + virtual void ReadFromMap(const map& area_codes) = 0; + + // Returns the number of entries contained in the area code map. + virtual int GetNumOfEntries() const = 0; + + // Returns the set containing the possible lengths of prefixes. + virtual const set& GetPossibleLengths() const = 0; +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_AREA_CODE_MAP_STRATEGY_H_ diff --git a/cpp/src/phonenumbers/geocoding/default_map_storage.cc b/cpp/src/phonenumbers/geocoding/default_map_storage.cc new file mode 100644 index 000000000..450ee5b08 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/default_map_storage.cc @@ -0,0 +1,71 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "phonenumbers/geocoding/default_map_storage.h" + +#include +#include + +#include "base/logging.h" + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::set; +using std::string; + +DefaultMapStorage::DefaultMapStorage() { +} + +DefaultMapStorage::~DefaultMapStorage() { +} + +int DefaultMapStorage::GetPrefix(int index) const { + DCHECK_GE(index, 0); + DCHECK_LT(index, static_cast(prefixes_.size())); + return prefixes_[index]; +} + +const string& DefaultMapStorage::GetDescription(int index) const { + DCHECK_GE(index, 0); + DCHECK_LT(index, static_cast(descriptions_.size())); + return descriptions_[index]; +} + +void DefaultMapStorage::ReadFromMap(const map& area_codes) { + prefixes_.resize(area_codes.size()); + descriptions_.resize(area_codes.size()); + possible_lengths_.clear(); + int index = 0; + for (map::const_iterator it = area_codes.begin(); + it != area_codes.end(); ++it, ++index) { + prefixes_[index] = it->first; + descriptions_[index] = it->second; + possible_lengths_.insert(static_cast(log10(it->first)) + 1); + } +} + +int DefaultMapStorage::GetNumOfEntries() const { + return prefixes_.size(); +} + +const set& DefaultMapStorage::GetPossibleLengths() const { + return possible_lengths_; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumbers/geocoding/default_map_storage.h b/cpp/src/phonenumbers/geocoding/default_map_storage.h new file mode 100644 index 000000000..b4b9631c0 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/default_map_storage.h @@ -0,0 +1,67 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard +// +// Default class for storing area codes. + +#ifndef I18N_PHONENUMBERS_DEFAULT_MAP_STORAGE_H_ +#define I18N_PHONENUMBERS_DEFAULT_MAP_STORAGE_H_ + +#include +#include +#include +#include + +#include "base/basictypes.h" +#include "phonenumbers/geocoding/area_code_map_storage_strategy.h" + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::set; +using std::string; +using std::vector; + +// Default area code map storage strategy that is used for data not +// containing description duplications. It is mainly intended to avoid +// the overhead of the string table management when it is actually +// unnecessary (i.e no string duplication). +class DefaultMapStorage : public AreaCodeMapStorageStrategy { + public: + DefaultMapStorage(); + virtual ~DefaultMapStorage(); + + virtual int GetPrefix(int index) const; + virtual const string& GetDescription(int index) const; + virtual void ReadFromMap(const map& area_codes); + virtual int GetNumOfEntries() const; + virtual const set& GetPossibleLengths() const; + + private: + // Sorted sequence of phone number prefixes. + vector prefixes_; + // Sequence of prefix descriptions, in the same order than prefixes_. + vector descriptions_; + // Sequence of unique possible lengths in ascending order. + set possible_lengths_; + + DISALLOW_COPY_AND_ASSIGN(DefaultMapStorage); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif /* I18N_PHONENUMBERS_DEFAULT_MAP_STORAGE_H_ */ diff --git a/cpp/src/phonenumbers/stringutil.cc b/cpp/src/phonenumbers/stringutil.cc index 01db9eb45..95ec125d5 100644 --- a/cpp/src/phonenumbers/stringutil.cc +++ b/cpp/src/phonenumbers/stringutil.cc @@ -56,6 +56,10 @@ string SimpleItoa(uint64 n) { return GenericSimpleItoa(n); } +string SimpleItoa(int64 n) { + return GenericSimpleItoa(n); +} + bool HasPrefixString(const string& s, const string& prefix) { return s.size() >= prefix.size() && equal(s.begin(), s.begin() + prefix.size(), prefix.begin()); @@ -133,6 +137,10 @@ void safe_strtou64(const string& s, uint64 *n) { GenericAtoi(s, n); } +void safe_strto64(const string& s, int64* n) { + GenericAtoi(s, n); +} + void strrmm(string* s, const string& chars) { for (string::iterator it = s->begin(); it != s->end(); ) { const char current_char = *it; diff --git a/cpp/src/phonenumbers/stringutil.h b/cpp/src/phonenumbers/stringutil.h index 48a256c39..950054609 100644 --- a/cpp/src/phonenumbers/stringutil.h +++ b/cpp/src/phonenumbers/stringutil.h @@ -34,6 +34,7 @@ string operator+(const string& s, int n); // NOLINT(runtime/string) // Converts integer to string. string SimpleItoa(uint64 n); +string SimpleItoa(int64 n); string SimpleItoa(int n); // Returns whether the provided string starts with the supplied prefix. @@ -65,6 +66,9 @@ void safe_strto32(const string& s, int32 *n); // Converts string to uint64. void safe_strtou64(const string& s, uint64 *n); +// Converts string to int64. +void safe_strto64(const string& s, int64* n); + // Remove all occurrences of a given set of characters from a string. void strrmm(string* s, const string& chars); diff --git a/cpp/test/phonenumbers/geocoding/area_code_map_test.cc b/cpp/test/phonenumbers/geocoding/area_code_map_test.cc new file mode 100644 index 000000000..112f50937 --- /dev/null +++ b/cpp/test/phonenumbers/geocoding/area_code_map_test.cc @@ -0,0 +1,146 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard +// +// Basic test cases for MappingFileProvider. + +#include "phonenumbers/geocoding/area_code_map.h" + +#include +#include + +#include // NOLINT(build/include_order) + +#include "phonenumbers/phonenumber.pb.h" + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::string; +using std::vector; + +namespace { + +void MakeCodeMap(const map& m, scoped_ptr* code_map) { + scoped_ptr cm(new AreaCodeMap()); + cm->ReadAreaCodeMap(m); + code_map->swap(cm); +} + +void MakeCodeMapUS(scoped_ptr* code_map) { + map m; + m[1212] = "New York"; + m[1480] = "Arizona"; + m[1650] = "California"; + m[1907] = "Alaska"; + m[1201664] = "Westwood, NJ"; + m[1480893] = "Phoenix, AZ"; + m[1501372] = "Little Rock, AR"; + m[1626308] = "Alhambra, CA"; + m[1650345] = "San Mateo, CA"; + m[1867993] = "Dawson, YT"; + m[1972480] = "Richardson, TX"; + MakeCodeMap(m, code_map); +} + +void MakeCodeMapIT(scoped_ptr* code_map) { + map m; + m[3902] = "Milan"; + m[3906] = "Rome"; + m[39010] = "Genoa"; + m[390131] = "Alessandria"; + m[390321] = "Novara"; + m[390975] = "Potenza"; + MakeCodeMap(m, code_map); +} + +PhoneNumber MakePhoneNumber(int32 country_code, uint64 national_number) { + PhoneNumber number; + number.set_country_code(country_code); + number.set_national_number(national_number); + return number; +} + +} // namespace + +class AreaCodeMapTest : public testing::Test { + protected: + virtual void SetUp() { + MakeCodeMapUS(&map_US_); + MakeCodeMapIT(&map_IT_); + } + + scoped_ptr map_US_; + scoped_ptr map_IT_; +}; + +TEST_F(AreaCodeMapTest, TestLookupInvalidNumberUS) { + EXPECT_EQ("New York", *map_US_->Lookup(MakePhoneNumber(1, 2121234567L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberNJ) { + EXPECT_EQ("Westwood, NJ", *map_US_->Lookup(MakePhoneNumber(1, 2016641234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberNY) { + EXPECT_EQ("New York", *map_US_->Lookup(MakePhoneNumber(1, 2126641234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberCA1) { + EXPECT_EQ("San Mateo, CA", *map_US_->Lookup(MakePhoneNumber(1, 6503451234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberCA2) { + EXPECT_EQ("California", *map_US_->Lookup(MakePhoneNumber(1, 6502531234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberTX) { + EXPECT_EQ("Richardson, TX", + *map_US_->Lookup(MakePhoneNumber(1, 9724801234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberNotFoundTX) { + EXPECT_EQ(NULL, map_US_->Lookup(MakePhoneNumber(1, 9724811234L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberCH) { + EXPECT_EQ(NULL, map_US_->Lookup(MakePhoneNumber(41, 446681300L))); +} + +TEST_F(AreaCodeMapTest, TestLookupNumberIT) { + PhoneNumber number = MakePhoneNumber(39, 212345678L); + number.set_italian_leading_zero(true); + EXPECT_EQ("Milan", *map_IT_->Lookup(number)); + + number.set_national_number(612345678L); + EXPECT_EQ("Rome", *map_IT_->Lookup(number)); + + number.set_national_number(3211234L); + EXPECT_EQ("Novara", *map_IT_->Lookup(number)); + + // A mobile number + number.set_national_number(321123456L); + number.set_italian_leading_zero(false); + EXPECT_EQ(NULL, map_IT_->Lookup(number)); + + // An invalid number (too short) + number.set_national_number(321123L); + number.set_italian_leading_zero(true); + EXPECT_EQ("Novara", *map_IT_->Lookup(number)); +} + +} // namespace phonenumbers +} // namespace i18n