diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ef154c5b1..fbb26757b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -167,6 +167,9 @@ set ( # Add regexp engine sources. ICU is used by default. if (${USE_RE2} STREQUAL "ON") + # Add a flag to select the right regexp factory implementation used by + # regexp_factory.h and regexp_adapter_test.cc. + add_definitions (-DUSE_RE2) list (APPEND SOURCES "src/phonenumbers/regexp_adapter_re2.cc") else () list (APPEND SOURCES "src/phonenumbers/regexp_adapter_icu.cc") diff --git a/cpp/src/phonenumbers/phonenumberutil.cc b/cpp/src/phonenumbers/phonenumberutil.cc index 690147c38..bd7aa39e2 100644 --- a/cpp/src/phonenumbers/phonenumberutil.cc +++ b/cpp/src/phonenumbers/phonenumberutil.cc @@ -43,6 +43,7 @@ #include "phonenumbers/phonenumber.pb.h" #include "phonenumbers/regexp_adapter.h" #include "phonenumbers/regexp_cache.h" +#include "phonenumbers/regexp_factory.h" #include "phonenumbers/region_code.h" #include "phonenumbers/stl_util.h" #include "phonenumbers/stringutil.h" @@ -78,6 +79,7 @@ const char PhoneNumberUtil::kValidPunctuation[] = namespace { scoped_ptr logger_; +scoped_ptr regexp_factory; scoped_ptr regexp_cache; // These objects are created in the function InitializeStaticMapsAndSets. @@ -290,7 +292,7 @@ void FormatAccordingToFormatsWithCarrier( int size = it->leading_digits_pattern_size(); if (size > 0) { const scoped_ptr number_copy( - RegExpInput::Create(number_for_leading_digits_match)); + regexp_factory->CreateInput(number_for_leading_digits_match)); // We always use the last leading_digits_pattern, as it is the most // detailed. if (!regexp_cache->GetRegExp(it->leading_digits_pattern(size - 1)) @@ -443,7 +445,8 @@ char32 ToUnicodeCodepoint(const char* unicode_char) { void InitializeStaticMapsAndSets() { // Create global objects. - regexp_cache.reset(new RegExpCache(128)); + regexp_factory.reset(new RegExpFactory()); + regexp_cache.reset(new RegExpCache(*regexp_factory.get(), 128)); all_plus_number_grouping_symbols.reset(new map); alpha_mappings.reset(new map); alpha_phone_mappings.reset(new map); @@ -579,30 +582,6 @@ void NormalizeHelper(const map& normalization_replacements, number->assign(normalized_number); } -// Strips the IDD from the start of the number if present. Helper function used -// by MaybeStripInternationalPrefixAndNormalize. -bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) { - DCHECK(number); - const scoped_ptr number_copy(RegExpInput::Create(*number)); - // First attempt to strip the idd_pattern at the start, if present. We make a - // copy so that we can revert to the original string if necessary. - if (idd_pattern.Consume(number_copy.get())) { - // Only strip this if the first digit after the match is not a 0, since - // country calling codes cannot begin with 0. - string extracted_digit; - if (capturing_digit_pattern->PartialMatch(number_copy->ToString(), - &extracted_digit)) { - PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit); - if (extracted_digit == "0") { - return false; - } - } - number->assign(number_copy->ToString()); - return true; - } - return false; -} - PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern( const RegExp& number_pattern, const string& number) { string extracted_number; @@ -694,7 +673,7 @@ PhoneNumberUtil* PhoneNumberUtil::GetInstance() { #endif void PhoneNumberUtil::CreateRegularExpressions() const { - unique_international_prefix.reset(RegExp::Create( + unique_international_prefix.reset(regexp_factory->CreateRegExp( /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */ "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")); // The first_group_capturing_pattern was originally set to $1 but there are @@ -702,17 +681,22 @@ void PhoneNumberUtil::CreateRegularExpressions() const { // pattern (e.g. Argentina) so the $1 group does not match correctly. // Therefore, we use \d, so that the first group actually used in the pattern // will be matched. - first_group_capturing_pattern.reset(RegExp::Create("(\\$\\d)")); - carrier_code_pattern.reset(RegExp::Create("\\$CC")); - digits_pattern.reset(RegExp::Create(StrCat("[", kDigits, "]*"))); - capturing_digit_pattern.reset(RegExp::Create(StrCat("([", kDigits, "])"))); - capturing_ascii_digits_pattern.reset(RegExp::Create("(\\d+)")); + first_group_capturing_pattern.reset(regexp_factory->CreateRegExp("(\\$\\d)")); + carrier_code_pattern.reset(regexp_factory->CreateRegExp("\\$CC")); + digits_pattern.reset( + regexp_factory->CreateRegExp(StrCat("[", kDigits, "]*"))); + capturing_digit_pattern.reset( + regexp_factory->CreateRegExp(StrCat("([", kDigits, "])"))); + capturing_ascii_digits_pattern.reset(regexp_factory->CreateRegExp("(\\d+)")); valid_start_char.reset(new string(StrCat("[", kPlusChars, kDigits, "]"))); - valid_start_char_pattern.reset(RegExp::Create(*valid_start_char)); - capture_up_to_second_number_start_pattern.reset(RegExp::Create( + valid_start_char_pattern.reset( + regexp_factory->CreateRegExp(*valid_start_char)); + capture_up_to_second_number_start_pattern.reset(regexp_factory->CreateRegExp( kCaptureUpToSecondNumberStart)); - unwanted_end_char_pattern.reset(RegExp::Create(kUnwantedEndChar)); - separator_pattern.reset(RegExp::Create(StrCat("[", kValidPunctuation, "]+"))); + unwanted_end_char_pattern.reset( + regexp_factory->CreateRegExp(kUnwantedEndChar)); + separator_pattern.reset( + regexp_factory->CreateRegExp(StrCat("[", kValidPunctuation, "]+"))); valid_phone_number.reset(new string( StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kDigits, "]){3,}[", kValidAlpha, kValidPunctuation, kDigits, "]*"))); @@ -734,13 +718,14 @@ void PhoneNumberUtil::CreateRegularExpressions() const { "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|[- ]+([", kDigits, "]{1,5})#"))); - extn_pattern.reset(RegExp::Create( + extn_pattern.reset(regexp_factory->CreateRegExp( StrCat("(?i)(?:", *known_extn_patterns, ")$"))); - valid_phone_number_pattern.reset(RegExp::Create( + valid_phone_number_pattern.reset(regexp_factory->CreateRegExp( StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?"))); - valid_alpha_phone_pattern.reset(RegExp::Create( + valid_alpha_phone_pattern.reset(regexp_factory->CreateRegExp( StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))); - plus_chars_pattern.reset(RegExp::Create(StrCat("[", kPlusChars, "]+"))); + plus_chars_pattern.reset( + regexp_factory->CreateRegExp(StrCat("[", kPlusChars, "]+"))); } const string& PhoneNumberUtil::GetExtnPatterns() const { @@ -1288,7 +1273,7 @@ void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList( const PhoneMetadata* metadata = GetMetadataForRegion(*it); if (metadata->has_leading_digits()) { const scoped_ptr number( - RegExpInput::Create(national_number)); + regexp_factory->CreateInput(national_number)); if (regexp_cache->GetRegExp(metadata->leading_digits()).Consume( number.get())) { *region_code = *it; @@ -1366,7 +1351,8 @@ bool PhoneNumberUtil::CheckRegionForParsing( const string& number_to_parse, const string& default_region) const { if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) { - const scoped_ptr number(RegExpInput::Create(number_to_parse)); + const scoped_ptr number( + regexp_factory->CreateInput(number_to_parse)); if (!plus_chars_pattern->Consume(number.get())) { return false; } @@ -1688,7 +1674,7 @@ int PhoneNumberUtil::GetLengthOfNationalDestinationCode( string formatted_number; Format(copied_proto, INTERNATIONAL, &formatted_number); const scoped_ptr i18n_number( - RegExpInput::Create(formatted_number)); + regexp_factory->CreateInput(formatted_number)); string digit_group; string ndc; string third_group; @@ -1717,13 +1703,12 @@ int PhoneNumberUtil::GetLengthOfNationalDestinationCode( return ndc.size(); } -// static -void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { +void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const { DCHECK(number); - static const scoped_ptr non_digits_pattern(RegExp::Create( - StrCat("[^", kDigits, "]"))); + const RegExp& non_digits_pattern = regexp_cache->GetRegExp( + StrCat("[^", kDigits, "]")); // Delete everything that isn't valid digits. - non_digits_pattern->GlobalReplace(number, ""); + non_digits_pattern.GlobalReplace(number, ""); // Normalize all decimal digits to ASCII digits. number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); } @@ -1779,6 +1764,32 @@ bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const { return valid_phone_number_pattern->FullMatch(number); } +// Strips the IDD from the start of the number if present. Helper function used +// by MaybeStripInternationalPrefixAndNormalize. +bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern, + string* number) const { + DCHECK(number); + const scoped_ptr number_copy( + regexp_factory->CreateInput(*number)); + // First attempt to strip the idd_pattern at the start, if present. We make a + // copy so that we can revert to the original string if necessary. + if (idd_pattern.Consume(number_copy.get())) { + // Only strip this if the first digit after the match is not a 0, since + // country calling codes cannot begin with 0. + string extracted_digit; + if (capturing_digit_pattern->PartialMatch(number_copy->ToString(), + &extracted_digit)) { + NormalizeDigitsOnly(&extracted_digit); + if (extracted_digit == "0") { + return false; + } + } + number->assign(number_copy->ToString()); + return true; + } + return false; +} + // Strips any international prefix (such as +, 00, 011) present in the number // provided, normalizes the resulting number, and indicates if an international // prefix was present. @@ -1797,7 +1808,7 @@ PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize( return PhoneNumber::FROM_DEFAULT_COUNTRY; } const scoped_ptr number_string_piece( - RegExpInput::Create(*number)); + regexp_factory->CreateInput(*number)); if (plus_chars_pattern->Consume(number_string_piece.get())) { number->assign(number_string_piece->ToString()); // Can now normalize the rest of the number since we've consumed the "+" @@ -1839,9 +1850,10 @@ void PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode( } // We use two copies here since Consume modifies the phone number, and if the // first if-clause fails the number will already be changed. - const scoped_ptr number_copy(RegExpInput::Create(*number)); + const scoped_ptr number_copy( + regexp_factory->CreateInput(*number)); const scoped_ptr number_copy_without_transform( - RegExpInput::Create(*number)); + regexp_factory->CreateInput(*number)); string number_string_copy(*number); string captured_part_of_prefix; const RegExp& national_number_rule = regexp_cache->GetRegExp( @@ -1903,7 +1915,7 @@ bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension) string possible_extension_three; string number_copy(*number); const scoped_ptr number_copy_as_regexp_input( - RegExpInput::Create(number_copy)); + regexp_factory->CreateInput(number_copy)); if (extn_pattern->Consume(number_copy_as_regexp_input.get(), false, &possible_extension_one, diff --git a/cpp/src/phonenumbers/phonenumberutil.h b/cpp/src/phonenumbers/phonenumberutil.h index b23819504..f80d05b6b 100644 --- a/cpp/src/phonenumbers/phonenumberutil.h +++ b/cpp/src/phonenumbers/phonenumberutil.h @@ -51,6 +51,7 @@ class NumberFormat; class PhoneMetadata; class PhoneMetadataCollection; class PhoneNumber; +class RegExp; // NOTE: A lot of methods in this class require Region Code strings. These must // be provided using ISO 3166-1 two-letter country-code format. The list of the @@ -170,7 +171,7 @@ class PhoneNumberUtil : public Singleton { // Normalizes a string of characters representing a phone number. This // converts wide-ascii and arabic-indic numerals to European numerals, and // strips punctuation and alpha characters. - static void NormalizeDigitsOnly(string* number); + void NormalizeDigitsOnly(string* number) const; // Gets the national significant number of a phone number. Note a national // significant number doesn't contain a national prefix or any formatting. @@ -611,6 +612,10 @@ class PhoneNumberUtil : public Singleton { const list& region_codes, string* region_code) const; + // Strips the IDD from the start of the number if present. Helper function + // used by MaybeStripInternationalPrefixAndNormalize. + bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const; + void Normalize(string* number) const; PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize( const string& possible_idd_prefix, diff --git a/cpp/src/phonenumbers/regexp_adapter.h b/cpp/src/phonenumbers/regexp_adapter.h index 35681526f..38ec11a03 100644 --- a/cpp/src/phonenumbers/regexp_adapter.h +++ b/cpp/src/phonenumbers/regexp_adapter.h @@ -15,7 +15,7 @@ // Author: George Yakovlev // Philippe Liard // -// Regexp adapter to allow a pluggable regexp engine. It has been introduced +// RegExp adapter to allow a pluggable regexp engine. It has been introduced // during the integration of the open-source version of this library into // Chromium to be able to use the ICU Regex engine instead of RE2, which is not // officially supported on Windows. @@ -40,11 +40,6 @@ class RegExpInput { public: virtual ~RegExpInput() {} - // Creates a new instance of the default RegExpInput implementation. The - // deletion of the returned instance is under the responsibility of the - // caller. - static RegExpInput* Create(const string& utf8_input); - // Converts to a C++ string. virtual string ToString() const = 0; }; @@ -56,10 +51,6 @@ class RegExp { public: virtual ~RegExp() {} - // Creates a new instance of the default RegExp implementation. The deletion - // of the returned instance is under the responsibility of the caller. - static RegExp* Create(const string& utf8_regexp); - // Matches string to regular expression, returns true if expression was // matched, false otherwise, advances position in the match. // input_string - string to be searched. @@ -156,6 +147,21 @@ class RegExp { } }; +// Abstract factory class that lets its subclasses instantiate the classes +// implementing RegExp and RegExpInput. +class AbstractRegExpFactory { + public: + virtual ~AbstractRegExpFactory() {} + + // Creates a new instance of RegExpInput. The deletion of the returned + // instance is under the responsibility of the caller. + virtual RegExpInput* CreateInput(const string& utf8_input) const = 0; + + // Creates a new instance of RegExp. The deletion of the returned instance is + // under the responsibility of the caller. + virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0; +}; + } // namespace phonenumbers } // namespace i18n diff --git a/cpp/src/phonenumbers/regexp_adapter_icu.cc b/cpp/src/phonenumbers/regexp_adapter_icu.cc index bada8e39c..977bc7d9b 100644 --- a/cpp/src/phonenumbers/regexp_adapter_icu.cc +++ b/cpp/src/phonenumbers/regexp_adapter_icu.cc @@ -15,7 +15,7 @@ // Author: George Yakovlev // Philippe Liard -#include "phonenumbers/regexp_adapter.h" +#include "phonenumbers/regexp_adapter_icu.h" #include @@ -197,11 +197,11 @@ class IcuRegExp : public RegExp { DISALLOW_COPY_AND_ASSIGN(IcuRegExp); }; -RegExpInput* RegExpInput::Create(const string& utf8_input) { +RegExpInput* ICURegExpFactory::CreateInput(const string& utf8_input) const { return new IcuRegExpInput(utf8_input); } -RegExp* RegExp::Create(const string& utf8_regexp) { +RegExp* ICURegExpFactory::CreateRegExp(const string& utf8_regexp) const { return new IcuRegExp(utf8_regexp); } diff --git a/cpp/src/phonenumbers/regexp_adapter_icu.h b/cpp/src/phonenumbers/regexp_adapter_icu.h new file mode 100644 index 000000000..ad6a84792 --- /dev/null +++ b/cpp/src/phonenumbers/regexp_adapter_icu.h @@ -0,0 +1,39 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_ +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_ + +#include "phonenumbers/regexp_adapter.h" + +namespace i18n { +namespace phonenumbers { + +// ICU regexp factory that lets the user instantiate the underlying +// implementation of RegExp and RegExpInput classes based on the ICU regexp +// engine. +class ICURegExpFactory : public AbstractRegExpFactory { + public: + virtual ~ICURegExpFactory() {} + + virtual RegExpInput* CreateInput(const string& utf8_input) const; + virtual RegExp* CreateRegExp(const string& utf8_regexp) const; +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_ diff --git a/cpp/src/phonenumbers/regexp_adapter_re2.cc b/cpp/src/phonenumbers/regexp_adapter_re2.cc index a0d459ef6..9b76cbc09 100644 --- a/cpp/src/phonenumbers/regexp_adapter_re2.cc +++ b/cpp/src/phonenumbers/regexp_adapter_re2.cc @@ -15,7 +15,7 @@ // Author: George Yakovlev // Philippe Liard -#include "phonenumbers/regexp_adapter.h" +#include "phonenumbers/regexp_adapter_re2.h" #include #include @@ -149,13 +149,11 @@ class RE2RegExp : public RegExp { RE2 utf8_regexp_; }; -// Implementation of the adapter static factory methods. -// RE2 RegExp engine is the default implementation. -RegExpInput* RegExpInput::Create(const string& utf8_input) { +RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { return new RE2RegExpInput(utf8_input); } -RegExp* RegExp::Create(const string& utf8_regexp) { +RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { return new RE2RegExp(utf8_regexp); } diff --git a/cpp/src/phonenumbers/regexp_adapter_re2.h b/cpp/src/phonenumbers/regexp_adapter_re2.h new file mode 100644 index 000000000..194983170 --- /dev/null +++ b/cpp/src/phonenumbers/regexp_adapter_re2.h @@ -0,0 +1,38 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_ +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_ + +#include "phonenumbers/regexp_adapter.h" + +namespace i18n { +namespace phonenumbers { + +// RE2 regexp factory that lets the user instantiate the underlying +// implementation of RegExp and RegExpInput classes based on RE2. +class RE2RegExpFactory : public AbstractRegExpFactory { + public: + virtual ~RE2RegExpFactory() {} + + virtual RegExpInput* CreateInput(const string& utf8_input) const; + virtual RegExp* CreateRegExp(const string& utf8_regexp) const; +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_ diff --git a/cpp/src/phonenumbers/regexp_adapter_test.cc b/cpp/src/phonenumbers/regexp_adapter_test.cc index 2b0334224..ebc2c062e 100644 --- a/cpp/src/phonenumbers/regexp_adapter_test.cc +++ b/cpp/src/phonenumbers/regexp_adapter_test.cc @@ -18,186 +18,305 @@ #include "phonenumbers/regexp_adapter.h" #include +#include #include +#include "base/basictypes.h" #include "base/scoped_ptr.h" +#include "stl_util.h" +#include "stringutil.h" + +#ifdef USE_RE2 +#include "phonenumbers/regexp_adapter_re2.h" +#else +#include "phonenumbers/regexp_adapter_icu.h" +#endif // USE_RE2 namespace i18n { namespace phonenumbers { using std::string; +using std::vector; + +// Structure that contains the attributes used to test an implementation of the +// regexp adapter. +struct RegExpTestContext { + explicit RegExpTestContext(const string& name, + const AbstractRegExpFactory* factory) + : name(name), + factory(factory), + digits(factory->CreateRegExp("\\d+")), + parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")), + single_digit(factory->CreateRegExp("\\d")), + two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")) {} + + const string name; + const scoped_ptr factory; + const scoped_ptr digits; + const scoped_ptr parentheses_digits; + const scoped_ptr single_digit; + const scoped_ptr two_digit_groups; +}; class RegExpAdapterTest : public testing::Test { protected: - RegExpAdapterTest() - : digits_(RegExp::Create("\\d+")), - parentheses_digits_(RegExp::Create("\\((\\d+)\\)")), - single_digit_(RegExp::Create("\\d")), - two_digit_groups_(RegExp::Create("(\\d+)-(\\d+)")) {} - - const scoped_ptr digits_; - const scoped_ptr parentheses_digits_; - const scoped_ptr single_digit_; - const scoped_ptr two_digit_groups_; + RegExpAdapterTest() { +#ifdef USE_RE2 + contexts_.push_back( + new RegExpTestContext("RE2", new RE2RegExpFactory())); +#else + contexts_.push_back( + new RegExpTestContext("ICU Regex", new ICURegExpFactory())); +#endif // USE_RE2 + } + + ~RegExpAdapterTest() { + STLDeleteElements(&contexts_); + } + + static string ErrorMessage(const RegExpTestContext& context) { + return StrCat("Test failed with ", context.name, " implementation."); + } + + typedef vector::const_iterator TestContextIterator; + vector contexts_; }; TEST_F(RegExpAdapterTest, TestConsumeNoMatch) { - const scoped_ptr input(RegExpInput::Create("+1-123-456-789")); - - // When 'true' is passed to Consume(), the match occurs from the beginning of - // the input. - ASSERT_FALSE(digits_->Consume(input.get(), true, NULL, NULL, NULL)); - ASSERT_EQ("+1-123-456-789", input->ToString()); - - string res1; - ASSERT_FALSE(parentheses_digits_->Consume( - input.get(), true, &res1, NULL, NULL)); - ASSERT_EQ("+1-123-456-789", input->ToString()); - ASSERT_EQ("", res1); + for (vector::const_iterator it = contexts_.begin(); + it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const scoped_ptr input( + context.factory->CreateInput("+1-123-456-789")); + + // When 'true' is passed to Consume(), the match occurs from the beginning + // of the input. + ASSERT_FALSE(context.digits->Consume(input.get(), true, NULL, NULL, NULL)) + << ErrorMessage(context); + ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context); + + string res1; + ASSERT_FALSE(context.parentheses_digits->Consume( + input.get(), true, &res1, NULL, NULL)) << ErrorMessage(context); + ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context); + ASSERT_EQ("", res1) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestConsumeWithNull) { - const scoped_ptr input(RegExpInput::Create("+123")); - const scoped_ptr plus_sign(RegExp::Create("(\\+)")); - - ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL)); - ASSERT_EQ("123", input->ToString()); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const AbstractRegExpFactory& factory = *context.factory; + const scoped_ptr input(factory.CreateInput("+123")); + const scoped_ptr plus_sign(factory.CreateRegExp("(\\+)")); + + ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL)) + << ErrorMessage(context); + ASSERT_EQ("123", input->ToString()) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) { - const scoped_ptr input(RegExpInput::Create("1-123-456-789")); - - string res1, res2; - ASSERT_TRUE(two_digit_groups_->Consume( - input.get(), true, &res1, &res2, NULL)); - ASSERT_EQ("-456-789", input->ToString()); - ASSERT_EQ("1", res1); - ASSERT_EQ("123", res2); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const scoped_ptr input( + context.factory->CreateInput("1-123-456-789")); + + string res1, res2; + ASSERT_TRUE(context.two_digit_groups->Consume( + input.get(), true, &res1, &res2, NULL)) << ErrorMessage(context); + ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); + ASSERT_EQ("1", res1) << ErrorMessage(context); + ASSERT_EQ("123", res2) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestFindAndConsume) { - const scoped_ptr input(RegExpInput::Create("+1-123-456-789")); - - // When 'false' is passed to Consume(), the match can occur from any place in - // the input. - ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL)); - ASSERT_EQ("-123-456-789", input->ToString()); - - ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL)); - ASSERT_EQ("-456-789", input->ToString()); - - ASSERT_FALSE(parentheses_digits_->Consume( - input.get(), false, NULL, NULL, NULL)); - ASSERT_EQ("-456-789", input->ToString()); - - string res1, res2; - ASSERT_TRUE(two_digit_groups_->Consume( - input.get(), false, &res1, &res2, NULL)); - ASSERT_EQ("", input->ToString()); - ASSERT_EQ("456", res1); - ASSERT_EQ("789", res2); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const scoped_ptr input( + context.factory->CreateInput("+1-123-456-789")); + + // When 'false' is passed to Consume(), the match can occur from any place + // in the input. + ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL)) + << ErrorMessage(context); + ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context); + + ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL)) + << ErrorMessage(context); + ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); + + ASSERT_FALSE(context.parentheses_digits->Consume( + input.get(), false, NULL, NULL, NULL)) << ErrorMessage(context); + ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context); + + string res1, res2; + ASSERT_TRUE(context.two_digit_groups->Consume( + input.get(), false, &res1, &res2, NULL)) << ErrorMessage(context); + ASSERT_EQ("", input->ToString()) << ErrorMessage(context); + ASSERT_EQ("456", res1) << ErrorMessage(context); + ASSERT_EQ("789", res2) << ErrorMessage(context); + } } -TEST(RegExpAdapter, TestPartialMatch) { - const scoped_ptr reg_exp(RegExp::Create("([\\da-z]+)")); - string matched; - - EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched)); - EXPECT_EQ("12345af", matched); - - EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL)); - - EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched)); - EXPECT_EQ("12", matched); - - matched.clear(); - EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched)); - EXPECT_EQ("", matched); +TEST_F(RegExpAdapterTest, TestPartialMatch) { + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const AbstractRegExpFactory& factory = *context.factory; + const scoped_ptr reg_exp(factory.CreateRegExp("([\\da-z]+)")); + string matched; + + EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched)) + << ErrorMessage(context); + EXPECT_EQ("12345af", matched) << ErrorMessage(context); + + EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL)) + << ErrorMessage(context); + + EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched)) + << ErrorMessage(context); + EXPECT_EQ("12", matched) << ErrorMessage(context); + + matched.clear(); + EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched)) + << ErrorMessage(context); + EXPECT_EQ("", matched) << ErrorMessage(context); + } } -TEST(RegExpAdapter, TestFullMatch) { - const scoped_ptr reg_exp(RegExp::Create("([\\da-z]+)")); - string matched; +TEST_F(RegExpAdapterTest, TestFullMatch) { + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const AbstractRegExpFactory& factory = *context.factory; + const scoped_ptr reg_exp(factory.CreateRegExp("([\\da-z]+)")); + string matched; - EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched)); - EXPECT_EQ("12345af", matched); + EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched)) + << ErrorMessage(context); + EXPECT_EQ("12345af", matched) << ErrorMessage(context); - EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)); + EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context); - matched.clear(); - EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)); - EXPECT_EQ("", matched); + matched.clear(); + EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context); + EXPECT_EQ("", matched) << ErrorMessage(context); - matched.clear(); - EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)); - EXPECT_EQ("", matched); + matched.clear(); + EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context); + EXPECT_EQ("", matched) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestReplace) { - string input("123-4567 "); - - ASSERT_TRUE(single_digit_->Replace(&input, "+")); - ASSERT_EQ("+23-4567 ", input); - - ASSERT_TRUE(single_digit_->Replace(&input, "+")); - ASSERT_EQ("++3-4567 ", input); - - const scoped_ptr single_letter(RegExp::Create("[a-z]")); - ASSERT_FALSE(single_letter->Replace(&input, "+")); - ASSERT_EQ("++3-4567 ", input); + for (vector::const_iterator it = contexts_.begin(); + it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + string input("123-4567 "); + + ASSERT_TRUE(context.single_digit->Replace(&input, "+")) + << ErrorMessage(context); + ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context); + + ASSERT_TRUE(context.single_digit->Replace(&input, "+")) + << ErrorMessage(context); + ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context); + + const scoped_ptr single_letter( + context.factory->CreateRegExp("[a-z]")); + ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context); + ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestReplaceWithGroup) { - // Make sure referencing groups in the regexp in the replacement string works. - // $[0-9] notation is used. - string input = "123-4567 abc"; - ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2")); - ASSERT_EQ("4567 abc", input); - - input = "123-4567"; - ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1")); - ASSERT_EQ("123", input); - - input = "123-4567"; - ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2")); - ASSERT_EQ("4567", input); - - input = "123-4567"; - ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1 $2")); - ASSERT_EQ("123 4567", input); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + + // Make sure referencing groups in the regexp in the replacement string + // works. $[0-9] notation is used. + string input = "123-4567 abc"; + ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2")) + << ErrorMessage(context); + ASSERT_EQ("4567 abc", input) << ErrorMessage(context); + + input = "123-4567"; + ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1")) + << ErrorMessage(context); + ASSERT_EQ("123", input) << ErrorMessage(context); + + input = "123-4567"; + ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2")) + << ErrorMessage(context); + ASSERT_EQ("4567", input) << ErrorMessage(context); + + input = "123-4567"; + ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2")) + << ErrorMessage(context); + ASSERT_EQ("123 4567", input) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) { - // Make sure '$' can be used in the replacement string when escaped. - string input = "123-4567"; - ASSERT_TRUE(two_digit_groups_->Replace(&input, "\\$1 \\$2")); - ASSERT_EQ("$1 $2", input); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + + // Make sure '$' can be used in the replacement string when escaped. + string input = "123-4567"; + ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2")) + << ErrorMessage(context); + + ASSERT_EQ("$1 $2", input) << ErrorMessage(context); + } } TEST_F(RegExpAdapterTest, TestGlobalReplace) { - string input("123-4567 "); + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + + string input("123-4567 "); - ASSERT_TRUE(single_digit_->GlobalReplace(&input, "*")); - ASSERT_EQ("***-**** ", input); + ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*")) + << ErrorMessage(context); + ASSERT_EQ("***-**** ", input) << ErrorMessage(context); - ASSERT_FALSE(single_digit_->GlobalReplace(&input, "*")); - ASSERT_EQ("***-**** ", input); + ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*")) + << ErrorMessage(context); + ASSERT_EQ("***-**** ", input) << ErrorMessage(context); + } } -TEST(RegExpAdapter, TestUtf8) { - const scoped_ptr reg_exp(RegExp::Create( - "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90" - /* "℡⊏([α-ω]*)⊐" */)); - string matched; - - EXPECT_FALSE(reg_exp->Match( - "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true, - &matched)); - EXPECT_TRUE(reg_exp->Match( - "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90" - /* "℡⊏αβ⊐" */, true, &matched)); - EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched); +TEST_F(RegExpAdapterTest, TestUtf8) { + for (TestContextIterator it = contexts_.begin(); it != contexts_.end(); + ++it) { + const RegExpTestContext& context = **it; + const AbstractRegExpFactory& factory = *context.factory; + + const scoped_ptr reg_exp(factory.CreateRegExp( + "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90" + /* "℡⊏([α-ω]*)⊐" */)); + string matched; + + EXPECT_FALSE(reg_exp->Match( + "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true, + &matched)) << ErrorMessage(context); + EXPECT_TRUE(reg_exp->Match( + "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90" + /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context); + + EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context); + } } } // namespace phonenumbers diff --git a/cpp/src/phonenumbers/regexp_cache.cc b/cpp/src/phonenumbers/regexp_cache.cc index 7a0c9e3ac..36af60134 100644 --- a/cpp/src/phonenumbers/regexp_cache.cc +++ b/cpp/src/phonenumbers/regexp_cache.cc @@ -31,11 +31,13 @@ namespace phonenumbers { using base::AutoLock; -RegExpCache::RegExpCache(size_t min_items) +RegExpCache::RegExpCache(const AbstractRegExpFactory& regexp_factory, + size_t min_items) + : regexp_factory_(regexp_factory), #ifdef USE_TR1_UNORDERED_MAP - : cache_impl_(new CacheImpl(min_items)) + cache_impl_(new CacheImpl(min_items)) #else - : cache_impl_(new CacheImpl()) + cache_impl_(new CacheImpl()) #endif {} @@ -52,7 +54,7 @@ const RegExp& RegExpCache::GetRegExp(const string& pattern) { CacheImpl::const_iterator it = cache_impl_->find(pattern); if (it != cache_impl_->end()) return *it->second; - const RegExp* regexp = RegExp::Create(pattern); + const RegExp* regexp = regexp_factory_.CreateRegExp(pattern); cache_impl_->insert(make_pair(pattern, regexp)); return *regexp; } diff --git a/cpp/src/phonenumbers/regexp_cache.h b/cpp/src/phonenumbers/regexp_cache.h index 08e19127d..185a8b8b9 100644 --- a/cpp/src/phonenumbers/regexp_cache.h +++ b/cpp/src/phonenumbers/regexp_cache.h @@ -45,6 +45,7 @@ namespace phonenumbers { using std::string; +class AbstractRegExpFactory; class RegExp; class RegExpCache { @@ -56,12 +57,14 @@ class RegExpCache { #endif public: - explicit RegExpCache(size_t min_items); + explicit RegExpCache(const AbstractRegExpFactory& regexp_factory, + size_t min_items); ~RegExpCache(); const RegExp& GetRegExp(const string& pattern); private: + const AbstractRegExpFactory& regexp_factory_; base::Lock lock_; // protects cache_impl_ scoped_ptr cache_impl_; // protected by lock_ friend class RegExpCacheTest_CacheConstructor_Test; diff --git a/cpp/src/phonenumbers/regexp_cache_test.cc b/cpp/src/phonenumbers/regexp_cache_test.cc index 06039cdbe..ebc1f27a0 100644 --- a/cpp/src/phonenumbers/regexp_cache_test.cc +++ b/cpp/src/phonenumbers/regexp_cache_test.cc @@ -19,8 +19,8 @@ #include -#include "phonenumbers/regexp_adapter.h" #include "phonenumbers/regexp_cache.h" +#include "phonenumbers/regexp_factory.h" namespace i18n { namespace phonenumbers { @@ -31,9 +31,10 @@ class RegExpCacheTest : public testing::Test { protected: static const size_t min_items_ = 2; - RegExpCacheTest() : cache_(min_items_) {} + RegExpCacheTest() : cache_(regexp_factory_, min_items_) {} virtual ~RegExpCacheTest() {} + RegExpFactory regexp_factory_; RegExpCache cache_; }; diff --git a/cpp/src/phonenumbers/regexp_factory.h b/cpp/src/phonenumbers/regexp_factory.h new file mode 100644 index 000000000..b471b8109 --- /dev/null +++ b/cpp/src/phonenumbers/regexp_factory.h @@ -0,0 +1,44 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: Philippe Liard + +#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_ +#define I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_ + +// This file selects the right implementation of the abstract regexp factory at +// compile time depending on the compilation flags (USE_RE2). The default +// abstract regexp factory implementation can be obtained using the type +// RegExpFactory. This will be set to RE2RegExpFactory if RE2 is used or +// ICURegExpFactory otherwise. + +#ifdef USE_RE2 +#include "phonenumbers/regexp_adapter_re2.h" +#else +#include "phonenumbers/regexp_adapter_icu.h" +#endif // USE_RE2 + +namespace i18n { +namespace phonenumbers { + +#ifdef USE_RE2 +typedef RE2RegExpFactory RegExpFactory; +#else +typedef ICURegExpFactory RegExpFactory; +#endif // USE_RE2 + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_ diff --git a/cpp/src/phonenumbers/stl_util.h b/cpp/src/phonenumbers/stl_util.h index 29e86b39c..0f1c5b318 100644 --- a/cpp/src/phonenumbers/stl_util.h +++ b/cpp/src/phonenumbers/stl_util.h @@ -36,6 +36,15 @@ void STLDeleteContainerPairSecondPointers(const ForwardIterator& begin, } } +// Deletes the pointers contained in the provided container. +template +void STLDeleteElements(T* container) { + for (typename T::iterator it = container->begin(); it != container->end(); + ++it) { + delete *it; + } +} + } // namespace phonenumbers } // namespace i18n