Browse Source

CPP: Ported alternate phone number format support.

pull/567/head
David Beaumont 14 years ago
committed by Mihaela Rosca
parent
commit
6a2ac124fd
3 changed files with 106 additions and 3 deletions
  1. +83
    -0
      cpp/src/phonenumbers/phonenumbermatcher.cc
  2. +5
    -0
      cpp/src/phonenumbers/phonenumbermatcher.h
  3. +18
    -3
      cpp/test/phonenumbers/phonenumbermatcher_test.cc

+ 83
- 0
cpp/src/phonenumbers/phonenumbermatcher.cc View File

@ -25,6 +25,8 @@
#endif // USE_ICU_REGEXP
#include <ctype.h>
#include <map>
#include <iostream>
#include <limits>
#include <stddef.h>
#include <string>
@ -35,6 +37,7 @@
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "phonenumbers/alternate_format.h"
#include "phonenumbers/callback.h"
#include "phonenumbers/default_logger.h"
#include "phonenumbers/encoding_utils.h"
@ -51,6 +54,10 @@
#include "phonenumbers/regexp_adapter_re2.h"
#endif // USE_RE2_AND_ICU
using std::cerr;
using std::endl;
using std::make_pair;
using std::map;
using std::numeric_limits;
using std::string;
using std::vector;
@ -149,6 +156,15 @@ bool AllNumberGroupsRemainGrouped(
return normalized_candidate.substr(from_index)
.find(phone_number.extension()) != string::npos;
}
bool LoadAlternateFormats(PhoneMetadataCollection* alternate_formats) {
if (!alternate_formats->ParseFromArray(alternate_format_get(),
alternate_format_size())) {
cerr << "Could not parse binary data." << endl;
return false;
}
return true;
}
} // namespace
#ifdef USE_GOOGLE_BASE
@ -298,12 +314,61 @@ class PhoneNumberMatcherRegExps : public Singleton<PhoneNumberMatcherRegExps> {
DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcherRegExps);
};
#ifdef USE_GOOGLE_BASE
class AlternateFormats {
friend struct DefaultSingletonTraits<AlternateFormats>;
#else
class AlternateFormats : public Singleton<AlternateFormats> {
friend class Singleton<AlternateFormats>;
#endif // USE_GOOGLE_BASE
public:
PhoneMetadataCollection format_data_;
map<int, const PhoneMetadata*> calling_code_to_alternate_formats_map_;
#ifdef USE_GOOGLE_BASE
static AlternateFormats* GetInstance() {
return Singleton<AlternateFormats>::get();
}
#endif // USE_GOOGLE_BASE
AlternateFormats()
: format_data_(),
calling_code_to_alternate_formats_map_() {
if (!LoadAlternateFormats(&format_data_)) {
LOG(DFATAL) << "Could not parse compiled-in metadata.";
return;
}
for (RepeatedPtrField<PhoneMetadata>::const_iterator it =
format_data_.metadata().begin();
it != format_data_.metadata().end();
++it) {
calling_code_to_alternate_formats_map_.insert(
make_pair(it->country_code(), &*it));
}
}
const PhoneMetadata* GetAlternateFormatsForCountry(int country_calling_code)
const {
map<int, const PhoneMetadata*>::const_iterator it =
calling_code_to_alternate_formats_map_.find(country_calling_code);
if (it != calling_code_to_alternate_formats_map_.end()) {
return it->second;
}
return NULL;
}
private:
DISALLOW_COPY_AND_ASSIGN(AlternateFormats);
};
PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
const string& text,
const string& region_code,
PhoneNumberMatcher::Leniency leniency,
int max_tries)
: reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
alternate_formats_(AlternateFormats::GetInstance()),
phone_util_(util),
text_(text),
preferred_region_(region_code),
@ -317,6 +382,7 @@ PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
const string& region_code)
: reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
alternate_formats_(NULL), // Not used.
phone_util_(*PhoneNumberUtil::GetInstance()),
text_(text),
preferred_region_(region_code),
@ -613,6 +679,23 @@ bool PhoneNumberMatcher::CheckNumberGroupingIsValid(
formatted_number_groups)) {
return true;
}
// If this didn't pass, see if there are any alternate formats, and try them
// instead.
const PhoneMetadata* alternate_formats =
alternate_formats_->GetAlternateFormatsForCountry(
phone_number.country_code());
if (alternate_formats) {
for (RepeatedPtrField<NumberFormat>::const_iterator it =
alternate_formats->number_format().begin();
it != alternate_formats->number_format().end(); ++it) {
formatted_number_groups.clear();
GetNationalNumberGroups(phone_number, &*it, &formatted_number_groups);
if (checker->Run(phone_util_, phone_number, normalized_candidate,
formatted_number_groups)) {
return true;
}
}
}
return false;
}


+ 5
- 0
cpp/src/phonenumbers/phonenumbermatcher.h View File

@ -39,6 +39,7 @@ template <class R, class A1, class A2, class A3, class A4>
using std::string;
using std::vector;
class AlternateFormats;
class NumberFormat;
class PhoneNumber;
class PhoneNumberMatch;
@ -156,6 +157,10 @@ class PhoneNumberMatcher {
// Helper class holding useful regular expressions.
const PhoneNumberMatcherRegExps* reg_exps_;
// Helper class holding loaded data containing alternate ways phone numbers
// might be formatted for certain regions.
const AlternateFormats* alternate_formats_;
// The phone number utility;
const PhoneNumberUtil& phone_util_;


+ 18
- 3
cpp/test/phonenumbers/phonenumbermatcher_test.cc View File

@ -737,9 +737,12 @@ static const NumberTest kValidCases[] = {
"\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()),
NumberTest("2012-0102 08", RegionCode::US()), // Very strange formatting.
NumberTest("2012-01-02 08", RegionCode::US()),
// Breakdown assistance number.
NumberTest("1800-10-10 22", RegionCode::AU()),
};
// Breakdown assistance number with unexpected formatting.
NumberTest("1800-1-0-10 22", RegionCode::AU()),
NumberTest("030-3-2 23 12 34", RegionCode::DE()),
NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()),
NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()),
NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),};
// Strings with number-like things that should only be found up to and including
// the "strict_grouping" leniency level.
@ -749,6 +752,11 @@ static const NumberTest kStrictGroupingCases[] = {
// Should be found by strict grouping but not exact grouping, as the last two
// groups are formatted together as a block.
NumberTest("0800-2491234", RegionCode::DE()),
// Doesn't match any formatting in the test file, but almost matches an
// alternate format (the last two groups have been squashed together here).
NumberTest("0900-1 123123", RegionCode::DE()),
NumberTest("(0)900-1 123123", RegionCode::DE()),
NumberTest("0 900-1 123123", RegionCode::DE()),
};
// Strings with number-like things that should be found at all levels.
@ -780,6 +788,13 @@ static const NumberTest kExactGroupingCases[] = {
NumberTest("0494949 ext. 49", RegionCode::DE()),
NumberTest("01 (33) 3461 2234", RegionCode::MX()), // Optional NP present
NumberTest("(33) 3461 2234", RegionCode::MX()), // Optional NP omitted
// Breakdown assistance number with normal formatting.
NumberTest("1800-10-10 22", RegionCode::AU()),
// Doesn't match any formatting in the test file, but matches an alternate
// format exactly.
NumberTest("0900-1 123 123", RegionCode::DE()),
NumberTest("(0)900-1 123 123", RegionCode::DE()),
NumberTest("0 900-1 123 123", RegionCode::DE()),
};
TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) {


Loading…
Cancel
Save