Browse Source

CPP: Add AbstractRegExpFactory to allow the use of both ICU Regex and RE2.

pull/567/head
Philippe Liard 15 years ago
committed by Mihaela Rosca
parent
commit
908c75a437
14 changed files with 489 additions and 210 deletions
  1. +3
    -0
      cpp/CMakeLists.txt
  2. +64
    -52
      cpp/src/phonenumbers/phonenumberutil.cc
  3. +6
    -1
      cpp/src/phonenumbers/phonenumberutil.h
  4. +16
    -10
      cpp/src/phonenumbers/regexp_adapter.h
  5. +3
    -3
      cpp/src/phonenumbers/regexp_adapter_icu.cc
  6. +39
    -0
      cpp/src/phonenumbers/regexp_adapter_icu.h
  7. +3
    -5
      cpp/src/phonenumbers/regexp_adapter_re2.cc
  8. +38
    -0
      cpp/src/phonenumbers/regexp_adapter_re2.h
  9. +251
    -132
      cpp/src/phonenumbers/regexp_adapter_test.cc
  10. +6
    -4
      cpp/src/phonenumbers/regexp_cache.cc
  11. +4
    -1
      cpp/src/phonenumbers/regexp_cache.h
  12. +3
    -2
      cpp/src/phonenumbers/regexp_cache_test.cc
  13. +44
    -0
      cpp/src/phonenumbers/regexp_factory.h
  14. +9
    -0
      cpp/src/phonenumbers/stl_util.h

+ 3
- 0
cpp/CMakeLists.txt View File

@ -167,6 +167,9 @@ set (
# Add regexp engine sources. ICU is used by default.
if (${USE_RE2} STREQUAL "ON")
# Add a flag to select the right regexp factory implementation used by
# regexp_factory.h and regexp_adapter_test.cc.
add_definitions (-DUSE_RE2)
list (APPEND SOURCES "src/phonenumbers/regexp_adapter_re2.cc")
else ()
list (APPEND SOURCES "src/phonenumbers/regexp_adapter_icu.cc")


+ 64
- 52
cpp/src/phonenumbers/phonenumberutil.cc View File

@ -43,6 +43,7 @@
#include "phonenumbers/phonenumber.pb.h"
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_cache.h"
#include "phonenumbers/regexp_factory.h"
#include "phonenumbers/region_code.h"
#include "phonenumbers/stl_util.h"
#include "phonenumbers/stringutil.h"
@ -78,6 +79,7 @@ const char PhoneNumberUtil::kValidPunctuation[] =
namespace {
scoped_ptr<Logger> logger_;
scoped_ptr<const AbstractRegExpFactory> regexp_factory;
scoped_ptr<RegExpCache> regexp_cache;
// These objects are created in the function InitializeStaticMapsAndSets.
@ -290,7 +292,7 @@ void FormatAccordingToFormatsWithCarrier(
int size = it->leading_digits_pattern_size();
if (size > 0) {
const scoped_ptr<RegExpInput> number_copy(
RegExpInput::Create(number_for_leading_digits_match));
regexp_factory->CreateInput(number_for_leading_digits_match));
// We always use the last leading_digits_pattern, as it is the most
// detailed.
if (!regexp_cache->GetRegExp(it->leading_digits_pattern(size - 1))
@ -443,7 +445,8 @@ char32 ToUnicodeCodepoint(const char* unicode_char) {
void InitializeStaticMapsAndSets() {
// Create global objects.
regexp_cache.reset(new RegExpCache(128));
regexp_factory.reset(new RegExpFactory());
regexp_cache.reset(new RegExpCache(*regexp_factory.get(), 128));
all_plus_number_grouping_symbols.reset(new map<char32, char>);
alpha_mappings.reset(new map<char32, char>);
alpha_phone_mappings.reset(new map<char32, char>);
@ -579,30 +582,6 @@ void NormalizeHelper(const map<char32, char>& normalization_replacements,
number->assign(normalized_number);
}
// Strips the IDD from the start of the number if present. Helper function used
// by MaybeStripInternationalPrefixAndNormalize.
bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) {
DCHECK(number);
const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number));
// First attempt to strip the idd_pattern at the start, if present. We make a
// copy so that we can revert to the original string if necessary.
if (idd_pattern.Consume(number_copy.get())) {
// Only strip this if the first digit after the match is not a 0, since
// country calling codes cannot begin with 0.
string extracted_digit;
if (capturing_digit_pattern->PartialMatch(number_copy->ToString(),
&extracted_digit)) {
PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit);
if (extracted_digit == "0") {
return false;
}
}
number->assign(number_copy->ToString());
return true;
}
return false;
}
PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
const RegExp& number_pattern, const string& number) {
string extracted_number;
@ -694,7 +673,7 @@ PhoneNumberUtil* PhoneNumberUtil::GetInstance() {
#endif
void PhoneNumberUtil::CreateRegularExpressions() const {
unique_international_prefix.reset(RegExp::Create(
unique_international_prefix.reset(regexp_factory->CreateRegExp(
/* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
"[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
// The first_group_capturing_pattern was originally set to $1 but there are
@ -702,17 +681,22 @@ void PhoneNumberUtil::CreateRegularExpressions() const {
// pattern (e.g. Argentina) so the $1 group does not match correctly.
// Therefore, we use \d, so that the first group actually used in the pattern
// will be matched.
first_group_capturing_pattern.reset(RegExp::Create("(\\$\\d)"));
carrier_code_pattern.reset(RegExp::Create("\\$CC"));
digits_pattern.reset(RegExp::Create(StrCat("[", kDigits, "]*")));
capturing_digit_pattern.reset(RegExp::Create(StrCat("([", kDigits, "])")));
capturing_ascii_digits_pattern.reset(RegExp::Create("(\\d+)"));
first_group_capturing_pattern.reset(regexp_factory->CreateRegExp("(\\$\\d)"));
carrier_code_pattern.reset(regexp_factory->CreateRegExp("\\$CC"));
digits_pattern.reset(
regexp_factory->CreateRegExp(StrCat("[", kDigits, "]*")));
capturing_digit_pattern.reset(
regexp_factory->CreateRegExp(StrCat("([", kDigits, "])")));
capturing_ascii_digits_pattern.reset(regexp_factory->CreateRegExp("(\\d+)"));
valid_start_char.reset(new string(StrCat("[", kPlusChars, kDigits, "]")));
valid_start_char_pattern.reset(RegExp::Create(*valid_start_char));
capture_up_to_second_number_start_pattern.reset(RegExp::Create(
valid_start_char_pattern.reset(
regexp_factory->CreateRegExp(*valid_start_char));
capture_up_to_second_number_start_pattern.reset(regexp_factory->CreateRegExp(
kCaptureUpToSecondNumberStart));
unwanted_end_char_pattern.reset(RegExp::Create(kUnwantedEndChar));
separator_pattern.reset(RegExp::Create(StrCat("[", kValidPunctuation, "]+")));
unwanted_end_char_pattern.reset(
regexp_factory->CreateRegExp(kUnwantedEndChar));
separator_pattern.reset(
regexp_factory->CreateRegExp(StrCat("[", kValidPunctuation, "]+")));
valid_phone_number.reset(new string(
StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kDigits,
"]){3,}[", kValidAlpha, kValidPunctuation, kDigits, "]*")));
@ -734,13 +718,14 @@ void PhoneNumberUtil::CreateRegularExpressions() const {
"[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits,
"#?|[- ]+([", kDigits, "]{1,5})#")));
extn_pattern.reset(RegExp::Create(
extn_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)(?:", *known_extn_patterns, ")$")));
valid_phone_number_pattern.reset(RegExp::Create(
valid_phone_number_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?")));
valid_alpha_phone_pattern.reset(RegExp::Create(
valid_alpha_phone_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}")));
plus_chars_pattern.reset(RegExp::Create(StrCat("[", kPlusChars, "]+")));
plus_chars_pattern.reset(
regexp_factory->CreateRegExp(StrCat("[", kPlusChars, "]+")));
}
const string& PhoneNumberUtil::GetExtnPatterns() const {
@ -1288,7 +1273,7 @@ void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList(
const PhoneMetadata* metadata = GetMetadataForRegion(*it);
if (metadata->has_leading_digits()) {
const scoped_ptr<RegExpInput> number(
RegExpInput::Create(national_number));
regexp_factory->CreateInput(national_number));
if (regexp_cache->GetRegExp(metadata->leading_digits()).Consume(
number.get())) {
*region_code = *it;
@ -1366,7 +1351,8 @@ bool PhoneNumberUtil::CheckRegionForParsing(
const string& number_to_parse,
const string& default_region) const {
if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
const scoped_ptr<RegExpInput> number(RegExpInput::Create(number_to_parse));
const scoped_ptr<RegExpInput> number(
regexp_factory->CreateInput(number_to_parse));
if (!plus_chars_pattern->Consume(number.get())) {
return false;
}
@ -1688,7 +1674,7 @@ int PhoneNumberUtil::GetLengthOfNationalDestinationCode(
string formatted_number;
Format(copied_proto, INTERNATIONAL, &formatted_number);
const scoped_ptr<RegExpInput> i18n_number(
RegExpInput::Create(formatted_number));
regexp_factory->CreateInput(formatted_number));
string digit_group;
string ndc;
string third_group;
@ -1717,13 +1703,12 @@ int PhoneNumberUtil::GetLengthOfNationalDestinationCode(
return ndc.size();
}
// static
void PhoneNumberUtil::NormalizeDigitsOnly(string* number) {
void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
DCHECK(number);
static const scoped_ptr<const RegExp> non_digits_pattern(RegExp::Create(
StrCat("[^", kDigits, "]")));
const RegExp& non_digits_pattern = regexp_cache->GetRegExp(
StrCat("[^", kDigits, "]"));
// Delete everything that isn't valid digits.
non_digits_pattern->GlobalReplace(number, "");
non_digits_pattern.GlobalReplace(number, "");
// Normalize all decimal digits to ASCII digits.
number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
}
@ -1779,6 +1764,32 @@ bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const {
return valid_phone_number_pattern->FullMatch(number);
}
// Strips the IDD from the start of the number if present. Helper function used
// by MaybeStripInternationalPrefixAndNormalize.
bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern,
string* number) const {
DCHECK(number);
const scoped_ptr<RegExpInput> number_copy(
regexp_factory->CreateInput(*number));
// First attempt to strip the idd_pattern at the start, if present. We make a
// copy so that we can revert to the original string if necessary.
if (idd_pattern.Consume(number_copy.get())) {
// Only strip this if the first digit after the match is not a 0, since
// country calling codes cannot begin with 0.
string extracted_digit;
if (capturing_digit_pattern->PartialMatch(number_copy->ToString(),
&extracted_digit)) {
NormalizeDigitsOnly(&extracted_digit);
if (extracted_digit == "0") {
return false;
}
}
number->assign(number_copy->ToString());
return true;
}
return false;
}
// Strips any international prefix (such as +, 00, 011) present in the number
// provided, normalizes the resulting number, and indicates if an international
// prefix was present.
@ -1797,7 +1808,7 @@ PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize(
return PhoneNumber::FROM_DEFAULT_COUNTRY;
}
const scoped_ptr<RegExpInput> number_string_piece(
RegExpInput::Create(*number));
regexp_factory->CreateInput(*number));
if (plus_chars_pattern->Consume(number_string_piece.get())) {
number->assign(number_string_piece->ToString());
// Can now normalize the rest of the number since we've consumed the "+"
@ -1839,9 +1850,10 @@ void PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode(
}
// We use two copies here since Consume modifies the phone number, and if the
// first if-clause fails the number will already be changed.
const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number));
const scoped_ptr<RegExpInput> number_copy(
regexp_factory->CreateInput(*number));
const scoped_ptr<RegExpInput> number_copy_without_transform(
RegExpInput::Create(*number));
regexp_factory->CreateInput(*number));
string number_string_copy(*number);
string captured_part_of_prefix;
const RegExp& national_number_rule = regexp_cache->GetRegExp(
@ -1903,7 +1915,7 @@ bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension)
string possible_extension_three;
string number_copy(*number);
const scoped_ptr<RegExpInput> number_copy_as_regexp_input(
RegExpInput::Create(number_copy));
regexp_factory->CreateInput(number_copy));
if (extn_pattern->Consume(number_copy_as_regexp_input.get(),
false,
&possible_extension_one,


+ 6
- 1
cpp/src/phonenumbers/phonenumberutil.h View File

@ -51,6 +51,7 @@ class NumberFormat;
class PhoneMetadata;
class PhoneMetadataCollection;
class PhoneNumber;
class RegExp;
// NOTE: A lot of methods in this class require Region Code strings. These must
// be provided using ISO 3166-1 two-letter country-code format. The list of the
@ -170,7 +171,7 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
// Normalizes a string of characters representing a phone number. This
// converts wide-ascii and arabic-indic numerals to European numerals, and
// strips punctuation and alpha characters.
static void NormalizeDigitsOnly(string* number);
void NormalizeDigitsOnly(string* number) const;
// Gets the national significant number of a phone number. Note a national
// significant number doesn't contain a national prefix or any formatting.
@ -611,6 +612,10 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
const list<string>& region_codes,
string* region_code) const;
// Strips the IDD from the start of the number if present. Helper function
// used by MaybeStripInternationalPrefixAndNormalize.
bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
void Normalize(string* number) const;
PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
const string& possible_idd_prefix,


+ 16
- 10
cpp/src/phonenumbers/regexp_adapter.h View File

@ -15,7 +15,7 @@
// Author: George Yakovlev
// Philippe Liard
//
// Regexp adapter to allow a pluggable regexp engine. It has been introduced
// RegExp adapter to allow a pluggable regexp engine. It has been introduced
// during the integration of the open-source version of this library into
// Chromium to be able to use the ICU Regex engine instead of RE2, which is not
// officially supported on Windows.
@ -40,11 +40,6 @@ class RegExpInput {
public:
virtual ~RegExpInput() {}
// Creates a new instance of the default RegExpInput implementation. The
// deletion of the returned instance is under the responsibility of the
// caller.
static RegExpInput* Create(const string& utf8_input);
// Converts to a C++ string.
virtual string ToString() const = 0;
};
@ -56,10 +51,6 @@ class RegExp {
public:
virtual ~RegExp() {}
// Creates a new instance of the default RegExp implementation. The deletion
// of the returned instance is under the responsibility of the caller.
static RegExp* Create(const string& utf8_regexp);
// Matches string to regular expression, returns true if expression was
// matched, false otherwise, advances position in the match.
// input_string - string to be searched.
@ -156,6 +147,21 @@ class RegExp {
}
};
// Abstract factory class that lets its subclasses instantiate the classes
// implementing RegExp and RegExpInput.
class AbstractRegExpFactory {
public:
virtual ~AbstractRegExpFactory() {}
// Creates a new instance of RegExpInput. The deletion of the returned
// instance is under the responsibility of the caller.
virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
// Creates a new instance of RegExp. The deletion of the returned instance is
// under the responsibility of the caller.
virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
};
} // namespace phonenumbers
} // namespace i18n


+ 3
- 3
cpp/src/phonenumbers/regexp_adapter_icu.cc View File

@ -15,7 +15,7 @@
// Author: George Yakovlev
// Philippe Liard
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_adapter_icu.h"
#include <string>
@ -197,11 +197,11 @@ class IcuRegExp : public RegExp {
DISALLOW_COPY_AND_ASSIGN(IcuRegExp);
};
RegExpInput* RegExpInput::Create(const string& utf8_input) {
RegExpInput* ICURegExpFactory::CreateInput(const string& utf8_input) const {
return new IcuRegExpInput(utf8_input);
}
RegExp* RegExp::Create(const string& utf8_regexp) {
RegExp* ICURegExpFactory::CreateRegExp(const string& utf8_regexp) const {
return new IcuRegExp(utf8_regexp);
}


+ 39
- 0
cpp/src/phonenumbers/regexp_adapter_icu.h View File

@ -0,0 +1,39 @@
// Copyright (C) 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: Philippe Liard
#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_
#define I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_
#include "phonenumbers/regexp_adapter.h"
namespace i18n {
namespace phonenumbers {
// ICU regexp factory that lets the user instantiate the underlying
// implementation of RegExp and RegExpInput classes based on the ICU regexp
// engine.
class ICURegExpFactory : public AbstractRegExpFactory {
public:
virtual ~ICURegExpFactory() {}
virtual RegExpInput* CreateInput(const string& utf8_input) const;
virtual RegExp* CreateRegExp(const string& utf8_regexp) const;
};
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_

+ 3
- 5
cpp/src/phonenumbers/regexp_adapter_re2.cc View File

@ -15,7 +15,7 @@
// Author: George Yakovlev
// Philippe Liard
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_adapter_re2.h"
#include <cstddef>
#include <string>
@ -149,13 +149,11 @@ class RE2RegExp : public RegExp {
RE2 utf8_regexp_;
};
// Implementation of the adapter static factory methods.
// RE2 RegExp engine is the default implementation.
RegExpInput* RegExpInput::Create(const string& utf8_input) {
RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
return new RE2RegExpInput(utf8_input);
}
RegExp* RegExp::Create(const string& utf8_regexp) {
RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
return new RE2RegExp(utf8_regexp);
}


+ 38
- 0
cpp/src/phonenumbers/regexp_adapter_re2.h View File

@ -0,0 +1,38 @@
// Copyright (C) 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: Philippe Liard
#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_
#define I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_
#include "phonenumbers/regexp_adapter.h"
namespace i18n {
namespace phonenumbers {
// RE2 regexp factory that lets the user instantiate the underlying
// implementation of RegExp and RegExpInput classes based on RE2.
class RE2RegExpFactory : public AbstractRegExpFactory {
public:
virtual ~RE2RegExpFactory() {}
virtual RegExpInput* CreateInput(const string& utf8_input) const;
virtual RegExp* CreateRegExp(const string& utf8_regexp) const;
};
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_

+ 251
- 132
cpp/src/phonenumbers/regexp_adapter_test.cc View File

@ -18,186 +18,305 @@
#include "phonenumbers/regexp_adapter.h"
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include "base/basictypes.h"
#include "base/scoped_ptr.h"
#include "stl_util.h"
#include "stringutil.h"
#ifdef USE_RE2
#include "phonenumbers/regexp_adapter_re2.h"
#else
#include "phonenumbers/regexp_adapter_icu.h"
#endif // USE_RE2
namespace i18n {
namespace phonenumbers {
using std::string;
using std::vector;
// Structure that contains the attributes used to test an implementation of the
// regexp adapter.
struct RegExpTestContext {
explicit RegExpTestContext(const string& name,
const AbstractRegExpFactory* factory)
: name(name),
factory(factory),
digits(factory->CreateRegExp("\\d+")),
parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
single_digit(factory->CreateRegExp("\\d")),
two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")) {}
const string name;
const scoped_ptr<const AbstractRegExpFactory> factory;
const scoped_ptr<const RegExp> digits;
const scoped_ptr<const RegExp> parentheses_digits;
const scoped_ptr<const RegExp> single_digit;
const scoped_ptr<const RegExp> two_digit_groups;
};
class RegExpAdapterTest : public testing::Test {
protected:
RegExpAdapterTest()
: digits_(RegExp::Create("\\d+")),
parentheses_digits_(RegExp::Create("\\((\\d+)\\)")),
single_digit_(RegExp::Create("\\d")),
two_digit_groups_(RegExp::Create("(\\d+)-(\\d+)")) {}
const scoped_ptr<const RegExp> digits_;
const scoped_ptr<const RegExp> parentheses_digits_;
const scoped_ptr<const RegExp> single_digit_;
const scoped_ptr<const RegExp> two_digit_groups_;
RegExpAdapterTest() {
#ifdef USE_RE2
contexts_.push_back(
new RegExpTestContext("RE2", new RE2RegExpFactory()));
#else
contexts_.push_back(
new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
#endif // USE_RE2
}
~RegExpAdapterTest() {
STLDeleteElements(&contexts_);
}
static string ErrorMessage(const RegExpTestContext& context) {
return StrCat("Test failed with ", context.name, " implementation.");
}
typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
vector<const RegExpTestContext*> contexts_;
};
TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
const scoped_ptr<RegExpInput> input(RegExpInput::Create("+1-123-456-789"));
// When 'true' is passed to Consume(), the match occurs from the beginning of
// the input.
ASSERT_FALSE(digits_->Consume(input.get(), true, NULL, NULL, NULL));
ASSERT_EQ("+1-123-456-789", input->ToString());
string res1;
ASSERT_FALSE(parentheses_digits_->Consume(
input.get(), true, &res1, NULL, NULL));
ASSERT_EQ("+1-123-456-789", input->ToString());
ASSERT_EQ("", res1);
for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const scoped_ptr<RegExpInput> input(
context.factory->CreateInput("+1-123-456-789"));
// When 'true' is passed to Consume(), the match occurs from the beginning
// of the input.
ASSERT_FALSE(context.digits->Consume(input.get(), true, NULL, NULL, NULL))
<< ErrorMessage(context);
ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
string res1;
ASSERT_FALSE(context.parentheses_digits->Consume(
input.get(), true, &res1, NULL, NULL)) << ErrorMessage(context);
ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
ASSERT_EQ("", res1) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
const scoped_ptr<RegExpInput> input(RegExpInput::Create("+123"));
const scoped_ptr<const RegExp> plus_sign(RegExp::Create("(\\+)"));
ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL));
ASSERT_EQ("123", input->ToString());
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const AbstractRegExpFactory& factory = *context.factory;
const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL))
<< ErrorMessage(context);
ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
const scoped_ptr<RegExpInput> input(RegExpInput::Create("1-123-456-789"));
string res1, res2;
ASSERT_TRUE(two_digit_groups_->Consume(
input.get(), true, &res1, &res2, NULL));
ASSERT_EQ("-456-789", input->ToString());
ASSERT_EQ("1", res1);
ASSERT_EQ("123", res2);
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const scoped_ptr<RegExpInput> input(
context.factory->CreateInput("1-123-456-789"));
string res1, res2;
ASSERT_TRUE(context.two_digit_groups->Consume(
input.get(), true, &res1, &res2, NULL)) << ErrorMessage(context);
ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
ASSERT_EQ("1", res1) << ErrorMessage(context);
ASSERT_EQ("123", res2) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestFindAndConsume) {
const scoped_ptr<RegExpInput> input(RegExpInput::Create("+1-123-456-789"));
// When 'false' is passed to Consume(), the match can occur from any place in
// the input.
ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL));
ASSERT_EQ("-123-456-789", input->ToString());
ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL));
ASSERT_EQ("-456-789", input->ToString());
ASSERT_FALSE(parentheses_digits_->Consume(
input.get(), false, NULL, NULL, NULL));
ASSERT_EQ("-456-789", input->ToString());
string res1, res2;
ASSERT_TRUE(two_digit_groups_->Consume(
input.get(), false, &res1, &res2, NULL));
ASSERT_EQ("", input->ToString());
ASSERT_EQ("456", res1);
ASSERT_EQ("789", res2);
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const scoped_ptr<RegExpInput> input(
context.factory->CreateInput("+1-123-456-789"));
// When 'false' is passed to Consume(), the match can occur from any place
// in the input.
ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
<< ErrorMessage(context);
ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
<< ErrorMessage(context);
ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
ASSERT_FALSE(context.parentheses_digits->Consume(
input.get(), false, NULL, NULL, NULL)) << ErrorMessage(context);
ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
string res1, res2;
ASSERT_TRUE(context.two_digit_groups->Consume(
input.get(), false, &res1, &res2, NULL)) << ErrorMessage(context);
ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
ASSERT_EQ("456", res1) << ErrorMessage(context);
ASSERT_EQ("789", res2) << ErrorMessage(context);
}
}
TEST(RegExpAdapter, TestPartialMatch) {
const scoped_ptr<const RegExp> reg_exp(RegExp::Create("([\\da-z]+)"));
string matched;
EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched));
EXPECT_EQ("12345af", matched);
EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL));
EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched));
EXPECT_EQ("12", matched);
matched.clear();
EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched));
EXPECT_EQ("", matched);
TEST_F(RegExpAdapterTest, TestPartialMatch) {
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const AbstractRegExpFactory& factory = *context.factory;
const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
string matched;
EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
<< ErrorMessage(context);
EXPECT_EQ("12345af", matched) << ErrorMessage(context);
EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
<< ErrorMessage(context);
EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
<< ErrorMessage(context);
EXPECT_EQ("12", matched) << ErrorMessage(context);
matched.clear();
EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
<< ErrorMessage(context);
EXPECT_EQ("", matched) << ErrorMessage(context);
}
}
TEST(RegExpAdapter, TestFullMatch) {
const scoped_ptr<const RegExp> reg_exp(RegExp::Create("([\\da-z]+)"));
string matched;
TEST_F(RegExpAdapterTest, TestFullMatch) {
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const AbstractRegExpFactory& factory = *context.factory;
const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
string matched;
EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched));
EXPECT_EQ("12345af", matched);
EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
<< ErrorMessage(context);
EXPECT_EQ("12345af", matched) << ErrorMessage(context);
EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL));
EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
matched.clear();
EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched));
EXPECT_EQ("", matched);
matched.clear();
EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
EXPECT_EQ("", matched) << ErrorMessage(context);
matched.clear();
EXPECT_FALSE(reg_exp->FullMatch("[]", &matched));
EXPECT_EQ("", matched);
matched.clear();
EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
EXPECT_EQ("", matched) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestReplace) {
string input("123-4567 ");
ASSERT_TRUE(single_digit_->Replace(&input, "+"));
ASSERT_EQ("+23-4567 ", input);
ASSERT_TRUE(single_digit_->Replace(&input, "+"));
ASSERT_EQ("++3-4567 ", input);
const scoped_ptr<const RegExp> single_letter(RegExp::Create("[a-z]"));
ASSERT_FALSE(single_letter->Replace(&input, "+"));
ASSERT_EQ("++3-4567 ", input);
for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
string input("123-4567 ");
ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
<< ErrorMessage(context);
ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
<< ErrorMessage(context);
ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
const scoped_ptr<const RegExp> single_letter(
context.factory->CreateRegExp("[a-z]"));
ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
// Make sure referencing groups in the regexp in the replacement string works.
// $[0-9] notation is used.
string input = "123-4567 abc";
ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2"));
ASSERT_EQ("4567 abc", input);
input = "123-4567";
ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1"));
ASSERT_EQ("123", input);
input = "123-4567";
ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2"));
ASSERT_EQ("4567", input);
input = "123-4567";
ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1 $2"));
ASSERT_EQ("123 4567", input);
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
// Make sure referencing groups in the regexp in the replacement string
// works. $[0-9] notation is used.
string input = "123-4567 abc";
ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
<< ErrorMessage(context);
ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
input = "123-4567";
ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
<< ErrorMessage(context);
ASSERT_EQ("123", input) << ErrorMessage(context);
input = "123-4567";
ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
<< ErrorMessage(context);
ASSERT_EQ("4567", input) << ErrorMessage(context);
input = "123-4567";
ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
<< ErrorMessage(context);
ASSERT_EQ("123 4567", input) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
// Make sure '$' can be used in the replacement string when escaped.
string input = "123-4567";
ASSERT_TRUE(two_digit_groups_->Replace(&input, "\\$1 \\$2"));
ASSERT_EQ("$1 $2", input);
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
// Make sure '$' can be used in the replacement string when escaped.
string input = "123-4567";
ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
<< ErrorMessage(context);
ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
}
}
TEST_F(RegExpAdapterTest, TestGlobalReplace) {
string input("123-4567 ");
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
string input("123-4567 ");
ASSERT_TRUE(single_digit_->GlobalReplace(&input, "*"));
ASSERT_EQ("***-**** ", input);
ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
<< ErrorMessage(context);
ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
ASSERT_FALSE(single_digit_->GlobalReplace(&input, "*"));
ASSERT_EQ("***-**** ", input);
ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
<< ErrorMessage(context);
ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
}
}
TEST(RegExpAdapter, TestUtf8) {
const scoped_ptr<const RegExp> reg_exp(RegExp::Create(
"\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
/* "℡⊏([α-ω]*)⊐" */));
string matched;
EXPECT_FALSE(reg_exp->Match(
"\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
&matched));
EXPECT_TRUE(reg_exp->Match(
"\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
/* "℡⊏αβ⊐" */, true, &matched));
EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched);
TEST_F(RegExpAdapterTest, TestUtf8) {
for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
++it) {
const RegExpTestContext& context = **it;
const AbstractRegExpFactory& factory = *context.factory;
const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
"\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
/* "℡⊏([α-ω]*)⊐" */));
string matched;
EXPECT_FALSE(reg_exp->Match(
"\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
&matched)) << ErrorMessage(context);
EXPECT_TRUE(reg_exp->Match(
"\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
/* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
}
}
} // namespace phonenumbers


+ 6
- 4
cpp/src/phonenumbers/regexp_cache.cc View File

@ -31,11 +31,13 @@ namespace phonenumbers {
using base::AutoLock;
RegExpCache::RegExpCache(size_t min_items)
RegExpCache::RegExpCache(const AbstractRegExpFactory& regexp_factory,
size_t min_items)
: regexp_factory_(regexp_factory),
#ifdef USE_TR1_UNORDERED_MAP
: cache_impl_(new CacheImpl(min_items))
cache_impl_(new CacheImpl(min_items))
#else
: cache_impl_(new CacheImpl())
cache_impl_(new CacheImpl())
#endif
{}
@ -52,7 +54,7 @@ const RegExp& RegExpCache::GetRegExp(const string& pattern) {
CacheImpl::const_iterator it = cache_impl_->find(pattern);
if (it != cache_impl_->end()) return *it->second;
const RegExp* regexp = RegExp::Create(pattern);
const RegExp* regexp = regexp_factory_.CreateRegExp(pattern);
cache_impl_->insert(make_pair(pattern, regexp));
return *regexp;
}


+ 4
- 1
cpp/src/phonenumbers/regexp_cache.h View File

@ -45,6 +45,7 @@ namespace phonenumbers {
using std::string;
class AbstractRegExpFactory;
class RegExp;
class RegExpCache {
@ -56,12 +57,14 @@ class RegExpCache {
#endif
public:
explicit RegExpCache(size_t min_items);
explicit RegExpCache(const AbstractRegExpFactory& regexp_factory,
size_t min_items);
~RegExpCache();
const RegExp& GetRegExp(const string& pattern);
private:
const AbstractRegExpFactory& regexp_factory_;
base::Lock lock_; // protects cache_impl_
scoped_ptr<CacheImpl> cache_impl_; // protected by lock_
friend class RegExpCacheTest_CacheConstructor_Test;


+ 3
- 2
cpp/src/phonenumbers/regexp_cache_test.cc View File

@ -19,8 +19,8 @@
#include <gtest/gtest.h>
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_cache.h"
#include "phonenumbers/regexp_factory.h"
namespace i18n {
namespace phonenumbers {
@ -31,9 +31,10 @@ class RegExpCacheTest : public testing::Test {
protected:
static const size_t min_items_ = 2;
RegExpCacheTest() : cache_(min_items_) {}
RegExpCacheTest() : cache_(regexp_factory_, min_items_) {}
virtual ~RegExpCacheTest() {}
RegExpFactory regexp_factory_;
RegExpCache cache_;
};


+ 44
- 0
cpp/src/phonenumbers/regexp_factory.h View File

@ -0,0 +1,44 @@
// Copyright (C) 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: Philippe Liard
#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_
#define I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_
// This file selects the right implementation of the abstract regexp factory at
// compile time depending on the compilation flags (USE_RE2). The default
// abstract regexp factory implementation can be obtained using the type
// RegExpFactory. This will be set to RE2RegExpFactory if RE2 is used or
// ICURegExpFactory otherwise.
#ifdef USE_RE2
#include "phonenumbers/regexp_adapter_re2.h"
#else
#include "phonenumbers/regexp_adapter_icu.h"
#endif // USE_RE2
namespace i18n {
namespace phonenumbers {
#ifdef USE_RE2
typedef RE2RegExpFactory RegExpFactory;
#else
typedef ICURegExpFactory RegExpFactory;
#endif // USE_RE2
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_

+ 9
- 0
cpp/src/phonenumbers/stl_util.h View File

@ -36,6 +36,15 @@ void STLDeleteContainerPairSecondPointers(const ForwardIterator& begin,
}
}
// Deletes the pointers contained in the provided container.
template <typename T>
void STLDeleteElements(T* container) {
for (typename T::iterator it = container->begin(); it != container->end();
++it) {
delete *it;
}
}
} // namespace phonenumbers
} // namespace i18n


Loading…
Cancel
Save