CPP: Add phonenumbermatcher.

14 years ago · 7b30af6d59
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -67,8 +67,9 @@ function (find_required_program NAME FILENAME DESCRIPTION)
 endfunction (find_required_program)

 # Options that can be passed to CMake using 'cmake -DKEY=VALUE'.
 option ("USE_ICU_REGEXP" "Use ICU regexp engine" "ON")
 option ("USE_LITE_METADATA" "Use lite metadata" "OFF")
 option ("USE_RE2" "Use RE2 instead of ICU" "OFF")
 option ("USE_RE2" "Use RE2" "OFF")
 option ("USE_STD_MAP" "Force the use of std::map" "OFF")

 # Find all the required libraries and programs.
@ -94,7 +95,7 @@ check_library_version (PC_ICU_UC icu-uc>=4.4)
 set (ICU_INCLUDE_DIR ${ICU_UC_INCLUDE_DIR})
 set (ICU_LIB ${ICU_UC_LIB})
 # If ICU regexp engine is used, use icui18n as well.
 if (${USE_RE2} STREQUAL "OFF")
 if (${USE_ICU_REGEXP} STREQUAL "ON")
  find_required_library (ICU_I18N unicode/regex.h icui18n "ICU")
  check_library_version (PC_ICU_I18N icu-i18n>=4.4)
  list (APPEND ICU_INCLUDE_DIR ${ICU_I18N_INCLUDE_DIR})
@ -169,14 +170,22 @@ set (
  "src/phonenumbers/utf/unilib.cc"
 )

 # Add regexp engine sources. ICU is used by default.
 # Add regexp engine-dependent sources. ICU is used by default.
 if (${USE_RE2} STREQUAL "ON")
  # Add a flag to select the right regexp factory implementation used by
  # regexp_factory.h and regexp_adapter_test.cc.
  # When both ICU regexp and RE2 are defined, the regexp engine adapter defaults
  # to RE2 unless the ICU implementation is instantiated explictly obviously.
  add_definitions (-DUSE_RE2)
  list (APPEND SOURCES "src/phonenumbers/regexp_adapter_re2.cc")
 else ()
 endif ()

 if (${USE_ICU_REGEXP} STREQUAL "ON")
  add_definitions (-DUSE_ICU_REGEXP)
  list (APPEND SOURCES "src/phonenumbers/regexp_adapter_icu.cc")
  # The phone number matcher needs ICU.
  list (APPEND SOURCES "src/phonenumbers/phonenumbermatch.cc")
  list (APPEND SOURCES "src/phonenumbers/phonenumbermatcher.cc")
 endif ()

 # Library sources excluding the metadata files, since special metadata is used
@ -300,9 +309,15 @@ set (TEST_SOURCES
  "test/phonenumbers/regexp_cache_test.cc"
  "test/phonenumbers/run_tests.cc"
  "test/phonenumbers/stringutil_test.cc"
  "test/phonenumbers/test_util.cc"
  "test/phonenumbers/unicodestring_test.cc"
  "test/phonenumbers/utf/unicodetext_test.cc"
 )
 if (${USE_ICU_REGEXP} STREQUAL "ON")
  # Add the phone number matcher tests.
  list (APPEND TEST_SOURCES "test/phonenumbers/phonenumbermatch_test.cc")
  list (APPEND TEST_SOURCES "test/phonenumbers/phonenumbermatcher_test.cc")
 endif ()

 # Build the testing binary.
 include_directories ("test")
@ -329,8 +344,17 @@ install (FILES
 install (FILES "src/phonenumbers/utf/unicodetext.h"
         DESTINATION include/phonenumbers/utf/)

 install (FILES src/base/basictypes.h
         DESTINATION include/base/)
 if (${USE_ICU_REGEXP} STREQUAL "ON")
  # Install the phone number matcher headers.
  install (FILES
    "src/phonenumbers/phonenumbermatch.h"
    "src/phonenumbers/phonenumbermatcher.h"
    "src/phonenumbers/regexp_adapter.h"
    DESTINATION include/phonenumbers/
  )
 endif ()

 install (FILES "src/base/basictypes.h" DESTINATION include/base/)

 install (FILES
  "src/base/memory/scoped_ptr.h"
@ -338,7 +362,7 @@ install (FILES
  DESTINATION include/base/memory/
 )

 install (FILES src/base/synchronization/lock.h
 install (FILES "src/base/synchronization/lock.h"
         DESTINATION include/base/synchronization/)

 install (TARGETS phonenumber LIBRARY DESTINATION lib/ ARCHIVE DESTINATION lib/)
--- a/cpp/README
+++ b/cpp/README
@ -80,12 +80,15 @@ How to build libphonenumber C++:
  $ cd libphonenumber
  $ mkdir build
  $ cd build
  $ cmake ../cpp/
  $ cmake ..
  $ make

 Supported build parameters:
  Build parameters can be specified invoking CMake with '-DKEY=VALUE' or using a
  CMake user interface (ccmake or cmake-gui).

  USE_ICU_REGEXP    = ON | OFF [ON]  -- Use ICU regexp engine.
  USE_LITE_METADATA = ON | OFF [OFF] -- Generates smaller metadata that doesn't
                                        include example numbers.
  USE_RE2           = ON | OFF [OFF] -- Use RE2.
  USE_STD_MAP       = ON | OFF [OFF] -- Force the use of std::map.
--- a/cpp/src/base/logging.h
+++ b/cpp/src/base/logging.h
@ -23,8 +23,11 @@

 #define CHECK_EQ(X, Y) assert((X) == (Y))

 #define DCHECK(X) assert(X)
 #define DCHECK_EQ(X, Y) CHECK_EQ((X), (Y))
 # define DCHECK(X) assert(X)
 # define DCHECK_EQ(X, Y) CHECK_EQ((X), (Y))
 # define DCHECK_GE(X, Y) assert((X) >= (Y))
 # define DCHECK_GT(X, Y) assert((X) > (Y))
 # define DCHECK_LT(X, Y) assert((X) < (Y))

 template <typename T> T* CHECK_NOTNULL(T* ptr) {
  assert(ptr);
--- a/cpp/src/phonenumbers/encoding_utils.h
+++ b/cpp/src/phonenumbers/encoding_utils.h
@ -16,6 +16,7 @@
 #define I18N_PHONENUMBERS_ENCODING_UTILS_H_

 #include "base/basictypes.h"
 #include "phonenumbers/utf/unilib.h"
 #include "phonenumbers/utf/utf.h"

 namespace i18n {
@ -32,6 +33,16 @@ class EncodingUtils {
    *out = r;
    return len;
  }

  static const char* AdvanceOneUTF8Character(const char* buf_utf8) {
      return buf_utf8 + UniLib::OneCharLen(buf_utf8);
  }

  static const char* BackUpOneUTF8Character(const char* start,
                                            const char* end) {
    while (start < end && UniLib::IsTrailByte(*--end)) {}
    return end;
  }
 };

 }  // namespace phonenumbers
--- a/cpp/src/phonenumbers/phonenumbermatch.cc
+++ b/cpp/src/phonenumbers/phonenumbermatch.cc
@ -0,0 +1,91 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Tao Huang
 //
 // Implementation of a mutable match of a phone number within a piece of
 // text. Matches may be found using PhoneNumberUtil::FindNumbers.

 #include "phonenumbers/phonenumbermatch.h"

 #include <string>

 #include "phonenumbers/phonenumber.h"
 #include "phonenumbers/phonenumber.pb.h"
 #include "phonenumbers/stringutil.h"

 namespace i18n {
 namespace phonenumbers {

 PhoneNumberMatch::PhoneNumberMatch(int start,
                                   const string& raw_string,
                                   const PhoneNumber& number)
    : start_(start), raw_string_(raw_string), number_(number) {
 }

 PhoneNumberMatch::PhoneNumberMatch()
    : start_(-1), raw_string_(""), number_(PhoneNumber::default_instance()) {
 }

 const PhoneNumber& PhoneNumberMatch::number() const {
  return number_;
 }

 int PhoneNumberMatch::start() const {
  return start_;
 }

 int PhoneNumberMatch::end() const {
  return start_ + raw_string_.length();
 }

 int PhoneNumberMatch::length() const {
  return raw_string_.length();
 }

 const string& PhoneNumberMatch::raw_string() const {
  return raw_string_;
 }

 void PhoneNumberMatch::set_start(int start) {
  start_ = start;
 }

 void PhoneNumberMatch::set_raw_string(const string& raw_string) {
  raw_string_ = raw_string;
 }

 void PhoneNumberMatch::set_number(const PhoneNumber& number) {
  number_.CopyFrom(number);
 }

 string PhoneNumberMatch::ToString() const {
  return StrCat("PhoneNumberMatch [", start(), ",", end(), ") ",
                raw_string_.c_str());
 }

 bool PhoneNumberMatch::Equals(const PhoneNumberMatch& match) const {
  return ExactlySameAs(match.number_, number_) &&
      match.raw_string_.compare(raw_string_) == 0 &&
      match.start_ == start_;
 }

 void PhoneNumberMatch::CopyFrom(const PhoneNumberMatch& match) {
  raw_string_ = match.raw_string();
  start_ = match.start();
  number_ = match.number();
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/cpp/src/phonenumbers/phonenumbermatch.h
+++ b/cpp/src/phonenumbers/phonenumbermatch.h
@ -0,0 +1,125 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Tao Huang
 //
 // A mutable match of a phone number within a piece of text.
 // Matches may be found using PhoneNumberUtil::FindNumbers.
 //
 // A match consists of the phone number as well as the start and end offsets of
 // the corresponding subsequence of the searched text. Use raw_string() to
 // obtain a copy of the matched subsequence.
 //
 // The following annotated example clarifies the relationship between the
 // searched text, the match offsets, and the parsed number:
 //
 // string text = "Call me at +1 425 882-8080 for details.";
 // const string country = "US";
 //
 // // Find the first phone number match:
 // PhoneNumberMatcher matcher(text, country);
 // if (matcher.HasNext()) {
 //   PhoneNumberMatch match;
 //   matcher.Next(&match);
 // }
 //
 // // raw_string() contains the phone number as it appears in the text.
 // "+1 425 882-8080" == match.raw_string();
 //
 // // start() and end() define the range of the matched subsequence.
 // string subsequence = text.substr(match.start(), match.end());
 // "+1 425 882-8080" == subsequence;
 //
 // // number() returns the the same result as PhoneNumberUtil::Parse()
 // // invoked on raw_string().
 // const PhoneNumberUtil& util = *PhoneNumberUtil::GetInstance();
 // util.Parse(match.raw_string(), country).Equals(match.number());
 //
 // This class is a port of PhoneNumberMatch.java

 #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCH_H_
 #define I18N_PHONENUMBERS_PHONENUMBERMATCH_H_

 #include <string>

 #include "base/basictypes.h"
 #include "phonenumbers/phonenumber.pb.h"

 namespace i18n {
 namespace phonenumbers {

 using std::string;

 class PhoneNumberMatch {
 public:
  // Creates a new match.
  // - start is the index into the target text.
  // - match is the matched string of the target text.
  // - number is the matched phone number.
  PhoneNumberMatch(int start,
                   const string& raw_string,
                   const PhoneNumber& number);

  // Default constructor.
  PhoneNumberMatch();

  ~PhoneNumberMatch() {}

  // Returns the phone number matched by the receiver.
  const PhoneNumber& number() const;

  // Returns the start index of the matched phone number within the searched
  // text.
  int start() const;

  // Returns the exclusive end index of the matched phone number within the
  // searched text.
  int end() const;

  // Returns the length of the text matched in the searched text.
  int length() const;

  // Returns the raw string matched as a phone number in the searched text.
  const string& raw_string() const;

  // Returns a string containing debug information.
  string ToString() const;

  void set_start(int start);

  void set_raw_string(const string& raw_string);

  void set_number(const PhoneNumber& number);

  bool Equals(const PhoneNumberMatch& number) const;

  void CopyFrom(const PhoneNumberMatch& number);

 private:
  // The start index into the text.
  int start_;

  // The raw substring matched.
  string raw_string_;

  // The matched phone number.
  PhoneNumber number_;

  DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatch);
 };

 }  // namespace phonenumbers
 }  // namespace i18n

 #endif  // I18N_PHONENUMBERS_PHONENUMBERMATCH_H_
--- a/cpp/src/phonenumbers/phonenumbermatcher.cc
+++ b/cpp/src/phonenumbers/phonenumbermatcher.cc
@ -0,0 +1,626 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Lara Rennie
 // Author: Tao Huang
 //
 // Implementation of a stateful class that finds and extracts telephone numbers
 // from text.

 #include "phonenumbers/phonenumbermatcher.h"

 #ifndef USE_ICU_REGEXP
 #error phonenumbermatcher depends on ICU (i.e. USE_ICU_REGEXP must be set)
 #endif  // USE_ICU_REGEXP

 #include <limits>
 #include <string>
 #include <unicode/uchar.h>

 #include "base/logging.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
 #include "phonenumbers/default_logger.h"
 #include "phonenumbers/encoding_utils.h"
 #include "phonenumbers/normalize_utf8.h"
 #include "phonenumbers/phonenumber.pb.h"
 #include "phonenumbers/phonenumbermatch.h"
 #include "phonenumbers/phonenumberutil.h"
 #include "phonenumbers/regexp_adapter.h"
 #include "phonenumbers/regexp_adapter_icu.h"
 #include "phonenumbers/stringutil.h"

 #ifdef USE_RE2
 #include "phonenumbers/regexp_adapter_re2.h"
 #endif  // USE_RE2_AND_ICU

 using std::numeric_limits;
 using std::string;
 using std::vector;

 namespace i18n {
 namespace phonenumbers {

 namespace {
 // Returns a regular expression quantifier with an upper and lower limit.
 string Limit(int lower, int upper) {
  DCHECK_GE(lower, 0);
  DCHECK_GT(upper, 0);
  DCHECK_LT(lower, upper);
  return StrCat("{", lower, ",", upper, "}");
 }

 bool IsCurrencySymbol(char32 character) {
  return (u_charType(character) == U_CURRENCY_SYMBOL);
 }

 // Helper method to get the national-number part of a number, formatted without
 // any national prefix, and return it as a set of digit blocks that would be
 // formatted together.
 void GetNationalNumberGroups(const PhoneNumberUtil& util,
                             const PhoneNumber& number,
                             vector<string>* digit_blocks) {
  // This will be in the format +CC-DG;ext=EXT where DG represents groups of
  // digits.
  string rfc3966_format;
  util.Format(number, PhoneNumberUtil::RFC3966, &rfc3966_format);
  // We remove the extension part from the formatted string before splitting it
  // into different groups.
  size_t end_index = rfc3966_format.find(';');
  if (end_index == string::npos) {
    end_index = rfc3966_format.length();
  }
  // The country-code will have a '-' following it.
  size_t start_index = rfc3966_format.find('-') + 1;
  SplitStringUsing(rfc3966_format.substr(start_index, end_index - start_index),
                   "-", digit_blocks);
 }

 bool ContainsOnlyValidXChars(const PhoneNumber& number, const string& candidate,
                             const PhoneNumberUtil& util) {
  // The characters 'x' and 'X' can be (1) a carrier code, in which case they
  // always precede the national significant number or (2) an extension sign,
  // in which case they always precede the extension number. We assume a
  // carrier code is more than 1 digit, so the first case has to have more than
  // 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1
  // 'x' or 'X'.
  size_t found;
  found = candidate.find_first_of("xX");
  // We ignore the character if 'x' or 'X' appears as the last character of
  // the string.
  while (found != string::npos && found < candidate.length() - 1) {
    // We only look for 'x' or 'X' in ASCII form.
    char next_char = candidate[found + 1];
    if (next_char == 'x' || next_char == 'X') {
      // This is the carrier code case, in which the 'X's always precede the
      // national significant number.
      ++found;
      if (util.IsNumberMatchWithOneString(
              number, candidate.substr(found, candidate.length() - found))
          != PhoneNumberUtil::NSN_MATCH) {
        return false;
      }
    } else {
      string normalized_extension(candidate.substr(found,
                                                   candidate.length() - found));
      util.NormalizeDigitsOnly(&normalized_extension);
      if (normalized_extension != number.extension()) {
        return false;
      }
    }
    found = candidate.find_first_of("xX", found + 1);
  }
  return true;
 }
 }  // namespace

 #ifdef USE_GOOGLE_BASE
 class PhoneNumberMatcherRegExps {
  friend struct DefaultSingletonTraits<PhoneNumberMatcherRegExps>;
 #else
 class PhoneNumberMatcherRegExps : public Singleton<PhoneNumberMatcherRegExps> {
  friend class Singleton<PhoneNumberMatcherRegExps>;
 #endif  // USE_GOOGLE_BASE
 private:
  string opening_parens_;
  string closing_parens_;
  string non_parens_;
  // Limit on the number of pairs of brackets in a phone number.
  string bracket_pair_limit_;
  // Helper strings for the matching_brackets_ pattern.
  // An opening bracket at the beginning may not be closed, but subsequent ones
  // should be. It's also possible that the leading bracket was dropped, so we
  // shouldn't be surprised if we see a closing bracket first.
  string leading_maybe_matched_bracket_;
  string bracket_pairs_;
  // Limit on the number of leading (plus) characters.
  string lead_limit_;
  // Limit on the number of consecutive punctuation characters.
  string punctuation_limit_;
  // The maximum number of digits allowed in a digit-separated block. As we
  // allow all digits in a single block, this should be set high enough to
  // accommodate the entire national number and the international country code.
  int digit_block_limit_;
  // Limit on the number of blocks separated by punctuation. Uses
  // kDigitBlockLimit since some formats use spaces to separate each digit.
  string block_limit_;
  // A punctuation sequence allowing white space.
  string punctuation_;
  // A digits block without punctuation.
  string digit_sequence_;
  // Punctuation that may be at the start of a phone number - brackets and plus
  // signs.
  string lead_class_chars_;
  // Same as lead_class_chars_, but enclosed as a character class.
  string lead_class_;
  // Extra helper strings that form part of pattern_. These are stored
  // separately since StrCat has a limit of 12 args.
  string opening_punctuation_;
  string optional_extn_pattern_;

 public:
  // We use two different reg-ex factories here for performance reasons. RE2 is
  // much faster for smaller reg-ex patterns, but the main pattern cannot be
  // handled by RE2 in an efficient way.
  scoped_ptr<const AbstractRegExpFactory> regexp_factory_for_pattern_;
  scoped_ptr<const AbstractRegExpFactory> regexp_factory_;

  // Matches strings that look like publication pages. Example:
  // Computing Complete Answers to Queries in the Presence of Limited Access
  // Patterns. Chen Li. VLDB J. 12(3): 211-227 (2003).
  //
  // The string "211-227 (2003)" is not a telephone number.
  scoped_ptr<const RegExp> pub_pages_;
  // Matches strings that look like dates using "/" as a separator. Examples:
  // 3/10/2011, 31/10/96 or 08/31/95.
  scoped_ptr<const RegExp> slash_separated_dates_;
  // Pattern to check that brackets match. Opening brackets should be closed
  // within a phone number. This also checks that there is something inside the
  // brackets. Having no brackets at all is also fine.
  scoped_ptr<const RegExp> matching_brackets_;
  // Matches white-space, which may indicate the end of a phone number and the
  // start of something else (such as a neighbouring zip-code). If white-space
  // is found, continues to match all characters that are not typically used to
  // start a phone number.
  scoped_ptr<const RegExp> group_separator_;
  scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern_;
  scoped_ptr<const RegExp> capturing_ascii_digits_pattern_;
  // Compiled reg-ex representing lead_class_;
  scoped_ptr<const RegExp> lead_class_pattern_;
  // Phone number pattern allowing optional punctuation.
  scoped_ptr<const RegExp> pattern_;

 #ifdef USE_GOOGLE_BASE
  PhoneNumberMatcherRegExps* PhoneNumberMatcherRegExps::GetInstance() {
    return Singleton<PhoneNumberMatcherRegExps>::get();
  }
 #endif  // USE_GOOGLE_BASE

  PhoneNumberMatcherRegExps()
      : opening_parens_("(\\[\xEF\xBC\x88\xEF\xBC\xBB" /* "(\\[（［" */),
        closing_parens_(")\\]\xEF\xBC\x89\xEF\xBC\xBD" /* ")\\]）］" */),
        non_parens_(StrCat("[^", opening_parens_, closing_parens_, "]")),
        bracket_pair_limit_(Limit(0, 3)),
        leading_maybe_matched_bracket_(StrCat(
            "(?:[", opening_parens_, "])?",
            "(?:", non_parens_, "+[", closing_parens_, "])?")),
        bracket_pairs_(StrCat(
            "(?:[", opening_parens_, "]", non_parens_, "+",
            "[", closing_parens_, "])", bracket_pair_limit_)),
        lead_limit_(Limit(0, 2)),
        punctuation_limit_(Limit(0, 4)),
        digit_block_limit_(PhoneNumberUtil::kMaxLengthForNsn +
                           PhoneNumberUtil::kMaxLengthCountryCode),
        block_limit_(Limit(0, digit_block_limit_)),
        punctuation_(StrCat("[", PhoneNumberUtil::kValidPunctuation, "]",
                            punctuation_limit_)),
        digit_sequence_(StrCat("\\p{Nd}", Limit(1, digit_block_limit_))),
        lead_class_chars_(StrCat(opening_parens_, PhoneNumberUtil::kPlusChars)),
        lead_class_(StrCat("[", lead_class_chars_, "]")),
        opening_punctuation_(StrCat("(?:", lead_class_, punctuation_, ")")),
        optional_extn_pattern_(StrCat(
            "(?i)(?:",
            PhoneNumberUtil::GetInstance()->GetExtnPatternsForMatching(),
            ")?")),
        regexp_factory_for_pattern_(new ICURegExpFactory()),
 #ifdef USE_RE2
        regexp_factory_(new RE2RegExpFactory()),
 #else
        regexp_factory_(new ICURegExpFactory()),
 #endif  // USE_RE2
        pub_pages_(regexp_factory_->CreateRegExp(
            "\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}")),
        slash_separated_dates_(regexp_factory_->CreateRegExp(
            "(?:(?:[0-3]?\\d/[01]?\\d)|"
            "(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}")),
        matching_brackets_(regexp_factory_->CreateRegExp(
            StrCat(leading_maybe_matched_bracket_, non_parens_, "+",
                   bracket_pairs_, non_parens_, "*"))),
        group_separator_(regexp_factory_->CreateRegExp(
            StrCat("\\p{Z}", "[^", lead_class_chars_, "\\p{Nd}]*"))),
        capture_up_to_second_number_start_pattern_(
            regexp_factory_->CreateRegExp(
                PhoneNumberUtil::kCaptureUpToSecondNumberStart)),
        capturing_ascii_digits_pattern_(
            regexp_factory_->CreateRegExp("(\\d+)")),
        lead_class_pattern_(regexp_factory_->CreateRegExp(lead_class_)),
        pattern_(regexp_factory_for_pattern_->CreateRegExp(
            StrCat("(", opening_punctuation_, lead_limit_,
                   digit_sequence_, "(?:", punctuation_, digit_sequence_, ")",
                   block_limit_, optional_extn_pattern_, ")"))) {
  }

 private:
  DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcherRegExps);
 };

 PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util,
                                       const string& text,
                                       const string& region_code,
                                       PhoneNumberMatcher::Leniency leniency,
                                       int max_tries)
    : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
      phone_util_(util),
      text_(text),
      preferred_region_(region_code),
      leniency_(leniency),
      max_tries_(max_tries),
      state_(NOT_READY),
      last_match_(NULL),
      search_index_(0) {
 }

 PhoneNumberMatcher::PhoneNumberMatcher(const string& text,
                                       const string& region_code)
    : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()),
      phone_util_(*PhoneNumberUtil::GetInstance()),
      text_(text),
      preferred_region_(region_code),
      leniency_(VALID),
      max_tries_(numeric_limits<int>::max()),
      state_(NOT_READY),
      last_match_(NULL),
      search_index_(0) {
 }

 PhoneNumberMatcher::~PhoneNumberMatcher() {
 }

 // static
 bool PhoneNumberMatcher::IsLatinLetter(char32 letter) {
  // Combining marks are a subset of non-spacing-mark.
  if (!u_isalpha(letter) && (u_charType(letter) != U_NON_SPACING_MARK)) {
    return false;
  }
  UBlockCode block = ublock_getCode(letter);
  return ((block == UBLOCK_BASIC_LATIN) ||
      (block == UBLOCK_LATIN_1_SUPPLEMENT) ||
      (block == UBLOCK_LATIN_EXTENDED_A) ||
      (block == UBLOCK_LATIN_EXTENDED_ADDITIONAL) ||
      (block == UBLOCK_LATIN_EXTENDED_B) ||
      (block == UBLOCK_COMBINING_DIACRITICAL_MARKS));
 }

 bool PhoneNumberMatcher::ParseAndVerify(const string& candidate, int offset,
                                        PhoneNumberMatch* match) {
  DCHECK(match);
  // Check the candidate doesn't contain any formatting which would indicate
  // that it really isn't a phone number.
  if (!reg_exps_->matching_brackets_->FullMatch(candidate)) {
    return false;
  }

  // If leniency is set to VALID or stricter, we also want to skip numbers that
  // are surrounded by Latin alphabetic characters, to skip cases like
  // abc8005001234 or 8005001234def.
  if (leniency_ >= VALID) {
    // If the candidate is not at the start of the text, and does not start with
    // phone-number punctuation, check the previous character.
    scoped_ptr<RegExpInput> candidate_input(
        reg_exps_->regexp_factory_->CreateInput(candidate));
    if (offset > 0 &&
        !reg_exps_->lead_class_pattern_->Consume(candidate_input.get())) {
      char32 previous_char;
      const char* previous_char_ptr =
          EncodingUtils::BackUpOneUTF8Character(text_.c_str(),
                                                text_.c_str() + offset);
      EncodingUtils::DecodeUTF8Char(previous_char_ptr, &previous_char);
      // We return false if it is a latin letter or a currency symbol.
      if (IsCurrencySymbol(previous_char) || IsLatinLetter(previous_char)) {
        return false;
      }
    }
    size_t lastCharIndex = offset + candidate.length();
    if (lastCharIndex < text_.length()) {
      char32 next_char;
      const char* next_char_ptr =
          EncodingUtils::AdvanceOneUTF8Character(
              text_.c_str() + lastCharIndex - 1);
      EncodingUtils::DecodeUTF8Char(next_char_ptr, &next_char);
      if (IsCurrencySymbol(next_char) || IsLatinLetter(next_char)) {
        return false;
      }
    }
  }

  PhoneNumber number;
  if (phone_util_.Parse(candidate, preferred_region_, &number) !=
      PhoneNumberUtil::NO_PARSING_ERROR) {
    return false;
  }
  if (VerifyAccordingToLeniency(leniency_, number, candidate)) {
    match->set_start(offset);
    match->set_raw_string(candidate);
    match->set_number(number);
    return true;
  }
  return false;
 }

 // Helper method to replace the verification method for each enum in the Java
 // version.
 bool PhoneNumberMatcher::VerifyAccordingToLeniency(
    Leniency leniency, const PhoneNumber& number,
    const string& candidate) const {
  switch (leniency) {
    case PhoneNumberMatcher::POSSIBLE:
      return phone_util_.IsPossibleNumber(number);
    case PhoneNumberMatcher::VALID:
      if (!phone_util_.IsValidNumber(number)) {
        return false;
      }
      return ContainsOnlyValidXChars(number, candidate, phone_util_);
    case PhoneNumberMatcher::STRICT_GROUPING: {
      if (!phone_util_.IsValidNumber(number) ||
          !ContainsOnlyValidXChars(number, candidate, phone_util_) ||
          // Two or more slashes were present.
          FindNth(candidate, '/', 2) != string::npos) {
        return false;
      }
      // TODO(lararennie,shaopengjia): Evaluate how this works for other locales
      // (testing has been limited to NANPA regions) and optimise if necessary.
      string normalized_candidate =
          NormalizeUTF8::NormalizeDecimalDigits(candidate);
      vector<string> formatted_number_groups;
      GetNationalNumberGroups(phone_util_, number, &formatted_number_groups);
      size_t from_index = 0;
      // Check each group of consecutive digits are not broken into separate
      // groups in the normalized_candidate string.
      for (size_t i = 0; i < formatted_number_groups.size(); ++i) {
        // Fails if the substring of normalized_candidate starting from
        // from_index doesn't contain the consecutive digits in digit_group.
        from_index = normalized_candidate.find(formatted_number_groups.at(i),
                                               from_index);
        if (from_index == string::npos) {
          return false;
        }
        // Moves from_index forward.
        from_index += formatted_number_groups.at(i).length();
        if (i == 0 && from_index < normalized_candidate.length()) {
          // We are at the position right after the NDC. Note although
          // normalized_candidate might contain non-ASCII formatting characters,
          // they won't be treated as ASCII digits when converted to a char.
          if (isdigit(normalized_candidate.at(from_index))) {
            // This means there is no formatting symbol after the NDC. In this
            // case, we only accept the number if there is no formatting
            // symbol at all in the number, except for extensions.
            string national_significant_number;
            phone_util_.GetNationalSignificantNumber(
                number, &national_significant_number);
            return HasPrefixString(
                normalized_candidate.substr(
                    from_index - formatted_number_groups.at(i).length()),
                national_significant_number);
          }
        }
      }
      // The check here makes sure that we haven't mistakenly already used the
      // extension to match the last group of the subscriber number. Note the
      // extension cannot have formatting in-between digits.
      return
          normalized_candidate.substr(from_index).find(number.extension()) !=
          string::npos;
    }
    case PhoneNumberMatcher::EXACT_GROUPING: {
      if (!phone_util_.IsValidNumber(number) ||
          !ContainsOnlyValidXChars(number, candidate, phone_util_) ||
          // Two or more slashes were present.
          FindNth(candidate, '/', 2) != string::npos) {
        return false;
      }
      // TODO(lararennie,shaopengjia): Evaluate how this works for other locales
      // (testing has been limited to NANPA regions) and optimise if necessary.
      vector<string> candidate_groups;
      string normalized_candidate =
          NormalizeUTF8::NormalizeDecimalDigits(candidate);
      const scoped_ptr<RegExpInput> candidate_number(
          reg_exps_->regexp_factory_->CreateInput(normalized_candidate));
      string digit_block;
      while (reg_exps_->capturing_ascii_digits_pattern_->FindAndConsume(
                 candidate_number.get(),
                 &digit_block)) {
        candidate_groups.push_back(digit_block);
      }

      // Set this to the last group, skipping it if the number has an extension.
      int candidate_number_group_index =
          number.has_extension() ? candidate_groups.size() - 2
                                 : candidate_groups.size() - 1;
      // First we check if the national significant number is formatted as a
      // block. We use contains and not equals, since the national significant
      // number may be present with a prefix such as a national number prefix,
      // or the country code itself.
      string national_significant_number;
      phone_util_.GetNationalSignificantNumber(number,
                                               &national_significant_number);
      if (candidate_groups.size() == 1 ||
          candidate_groups.at(candidate_number_group_index).find(
              national_significant_number) != string::npos) {
        return true;
      }
      vector<string> formatted_number_groups;
      GetNationalNumberGroups(phone_util_, number, &formatted_number_groups);
      // Starting from the end, go through in reverse, excluding the first
      // group, and check the candidate and number groups are the same.
      for (int formatted_number_group_index =
               (formatted_number_groups.size() - 1);
           formatted_number_group_index > 0 &&
           candidate_number_group_index >= 0;
           --formatted_number_group_index, --candidate_number_group_index) {
        if (candidate_groups.at(candidate_number_group_index) !=
            formatted_number_groups.at(formatted_number_group_index)) {
          return false;
        }
      }
      // Now check the first group. There may be a national prefix at the start,
      // so we only check that the candidate group ends with the formatted
      // number group.
      return (candidate_number_group_index >= 0 &&
              HasSuffixString(candidate_groups.at(candidate_number_group_index),
                              formatted_number_groups.at(0)));
    }
    default:
      LOG(ERROR) << "No implementation defined for verification for leniency "
                 << static_cast<int>(leniency);
      return false;
  }
 }

 bool PhoneNumberMatcher::ExtractInnerMatch(const string& candidate, int offset,
                                           PhoneNumberMatch* match) {
  DCHECK(match);
  // Try removing either the first or last "group" in the number and see if this
  // gives a result. We consider white space to be a possible indication of
  // the start or end of the phone number.
  scoped_ptr<RegExpInput> candidate_input(
      reg_exps_->regexp_factory_->CreateInput(candidate));
  if (reg_exps_->group_separator_->FindAndConsume(candidate_input.get(),
                                                  NULL)) {
    // Try the first group by itself.
    int group_start_index =
        candidate.length() - candidate_input->ToString().length();
    string first_group_only = candidate.substr(0, group_start_index);
    phone_util_.TrimUnwantedEndChars(&first_group_only);
    bool success = ParseAndVerify(first_group_only, offset, match);
    if (success) {
      return true;
    }
    --max_tries_;

    // Try the rest of the candidate without the first group.
    string without_first_group(candidate_input->ToString());
    phone_util_.TrimUnwantedEndChars(&without_first_group);
    success =
        ParseAndVerify(without_first_group, offset + group_start_index, match);
    if (success) {
      return true;
    }
    --max_tries_;

    if (max_tries_ > 0) {
      while (reg_exps_->group_separator_->FindAndConsume(candidate_input.get(),
                                                         NULL)) {
        // Find the last group.
      }
      int last_group_start =
          candidate.length() - candidate_input->ToString().length();
      string without_last_group = candidate.substr(0, last_group_start);
      phone_util_.TrimUnwantedEndChars(&without_last_group);
      if (without_last_group == first_group_only) {
        // If there are only two groups, then the group "without the last group"
        // is the same as the first group. In these cases, we don't want to
        // re-check the number group, so we exit already.
        return false;
      }
      success = ParseAndVerify(without_last_group, offset, match);
      if (success) {
        return true;
      }
      --max_tries_;
    }
  }
  return false;
 }

 bool PhoneNumberMatcher::ExtractMatch(const string& candidate, int offset,
                                      PhoneNumberMatch* match) {
  DCHECK(match);
  // Skip a match that is more likely a publication page reference or a date.
  if (reg_exps_->pub_pages_->PartialMatch(candidate) ||
      reg_exps_->slash_separated_dates_->PartialMatch(candidate)) {
    return false;
  }

  // Try to come up with a valid match given the entire candidate.
  if (ParseAndVerify(candidate, offset, match)) {
    return true;
  }

  // If that failed, try to find an "inner match" - there might be a phone
  // number within this candidate.
  return ExtractInnerMatch(candidate, offset, match);
 }

 bool PhoneNumberMatcher::HasNext() {
  if (state_ == NOT_READY) {
    PhoneNumberMatch temp_match;
    if (!Find(search_index_, &temp_match)) {
      state_ = DONE;
    } else {
      last_match_.reset(new PhoneNumberMatch(temp_match.start(),
                                             temp_match.raw_string(),
                                             temp_match.number()));
      search_index_ = last_match_->end();
      state_ = READY;
    }
  }
  return state_ == READY;
 }

 bool PhoneNumberMatcher::Next(PhoneNumberMatch* match) {
  DCHECK(match);
  // Check the state and find the next match as a side-effect if necessary.
  if (!HasNext()) {
    return false;
  }
  match->CopyFrom(*last_match_);
  state_ = NOT_READY;
  last_match_.reset(NULL);
  return true;
 }

 bool PhoneNumberMatcher::Find(int index, PhoneNumberMatch* match) {
  DCHECK(match);

  scoped_ptr<RegExpInput> text(
      reg_exps_->regexp_factory_for_pattern_->CreateInput(text_.substr(index)));
  string candidate;
  while ((max_tries_ > 0) &&
         reg_exps_->pattern_->FindAndConsume(text.get(), &candidate)) {
    int start = text_.length() - text->ToString().length() - candidate.length();
    // Check for extra numbers at the end.
    reg_exps_->capture_up_to_second_number_start_pattern_->
        PartialMatch(candidate, &candidate);
    if (ExtractMatch(candidate, start, match)) {
      return true;
    }

    index = start + candidate.length();
    --max_tries_;
  }
  return false;
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/cpp/src/phonenumbers/phonenumbermatcher.h
+++ b/cpp/src/phonenumbers/phonenumbermatcher.h
@ -0,0 +1,158 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Lara Rennie
 // Author: Tao Huang
 //
 // This is a direct port from PhoneNumberMatcher.java.
 // Changes to this class should also happen to the Java version, whenever it
 // makes sense.

 #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
 #define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_

 #include <string>

 #include "base/basictypes.h"
 #include "base/memory/scoped_ptr.h"
 #include "phonenumbers/regexp_adapter.h"

 namespace i18n {
 namespace phonenumbers {

 using std::string;

 class PhoneNumber;
 class PhoneNumberMatch;
 class PhoneNumberMatcherRegExps;
 class PhoneNumberUtil;

 class PhoneNumberMatcher {
  friend class PhoneNumberMatcherTest;
 public:
  // Leniency when finding potential phone numbers in text segments. The levels
  // here are ordered in increasing strictness.
  enum Leniency {
    // Phone numbers accepted are possible, but not necessarily valid.
    POSSIBLE,
    // Phone numbers accepted are possible and valid.
    VALID,
    // Phone numbers accepted are valid and are grouped in a possible way for
    // this locale. For example, a US number written as "65 02 53 00 00" is not
    // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
    // are. Numbers with more than one '/' symbol are also dropped at this
    // level.
    // Warning: This and the next level might result in lower coverage
    // especially for regions outside of country code "+1".
    STRICT_GROUPING,
    // Phone numbers accepted are valid and are grouped in the same way that we
    // would have formatted it, or as a single block. For example, a US number
    // written as "650 2530000" is not accepted at this leniency level, whereas
    // "650 253 0000" or "6502530000" are.
    EXACT_GROUPING,
  };

  // Constructs a phone number matcher.
  PhoneNumberMatcher(const PhoneNumberUtil& util,
                     const string& text,
                     const string& region_code,
                     Leniency leniency,
                     int max_tries);

  // Wrapper to construct a phone number matcher, with no limitation on the
  // number of retries and VALID Leniency.
  PhoneNumberMatcher(const string& text,
                     const string& region_code);

  ~PhoneNumberMatcher();

  // Returns true if the text sequence has another match.
  bool HasNext();

  // Gets next match from text sequence.
  bool Next(PhoneNumberMatch* match);

 private:
  // The potential states of a PhoneNumberMatcher.
  enum State {
    NOT_READY,
    READY,
    DONE,
  };

  // Attempts to extract a match from a candidate string. Returns true if a
  // match is found, otherwise returns false. The value "offset" refers to the
  // start index of the candidate string within the overall text.
  bool Find(int index, PhoneNumberMatch* match);

  // Attempts to extract a match from candidate. Returns true if the match was
  // found, otherwise returns false.
  bool ExtractMatch(const string& candidate, int offset,
                    PhoneNumberMatch* match);

  // Attempts to extract a match from a candidate string if the whole candidate
  // does not qualify as a match. Returns true if a match is found, otherwise
  // returns false.
  bool ExtractInnerMatch(const string& candidate, int offset,
                         PhoneNumberMatch* match);

  // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
  // verifies it matches the requested leniency. If parsing and verification
  // succeed, returns true, otherwise this method returns false;
  bool ParseAndVerify(const string& candidate, int offset,
                      PhoneNumberMatch* match);

  bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
                                 const string& candidate) const;

  // Helper method to determine if a character is a Latin-script letter or not.
  // For our purposes, combining marks should also return true since we assume
  // they have been added to a preceding Latin character.
  static bool IsLatinLetter(char32 letter);

  // Helper class holding useful regular expressions.
  const PhoneNumberMatcherRegExps* reg_exps_;

  // The phone number utility;
  const PhoneNumberUtil& phone_util_;

  // The text searched for phone numbers;
  const string text_;

  // The region(country) to assume for phone numbers without an international
  // prefix.
  const string preferred_region_;

  // The degree of validation requested.
  Leniency leniency_;

  // The maximum number of retries after matching an invalid number.
  int max_tries_;

  // The iteration tristate.
  State state_;

  // The last successful match, NULL unless in State.READY.
  scoped_ptr<PhoneNumberMatch> last_match_;

  // The next index to start searching at. Undefined in State.DONE.
  int search_index_;

  DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
 };

 }  // namespace phonenumbers
 }  // namespace i18n

 #endif  // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
--- a/cpp/src/phonenumbers/phonenumberutil.h
+++ b/cpp/src/phonenumbers/phonenumberutil.h
@ -67,6 +67,9 @@ class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
  friend class Singleton<PhoneNumberUtil>;
 #endif
  friend class AsYouTypeFormatter;
  friend class PhoneNumberMatcher;
  friend class PhoneNumberMatcherRegExps;
  friend class PhoneNumberMatcherTest;
  friend class PhoneNumberUtilTest;
 public:
  ~PhoneNumberUtil();
--- a/cpp/src/phonenumbers/stringutil.cc
+++ b/cpp/src/phonenumbers/stringutil.cc
@ -14,6 +14,7 @@

 // Author: Philippe Liard

 #include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <sstream>
@ -23,6 +24,7 @@
 namespace i18n {
 namespace phonenumbers {

 using std::equal;
 using std::stringstream;

 string operator+(const string& s, int n) {
@ -54,6 +56,43 @@ string SimpleItoa(uint64 n) {
  return GenericSimpleItoa(n);
 }

 bool HasPrefixString(const string& s, const string& prefix) {
  return s.size() >= prefix.size() &&
      equal(s.begin(), s.begin() + prefix.size(), prefix.begin());
 }

 size_t FindNth(const string& s, char c, int n) {
  size_t pos = string::npos;

  for (int i = 0; i < n; ++i) {
    pos = s.find_first_of(c, pos + 1);
    if (pos == string::npos) {
      break;
    }
  }
  return pos;
 }

 void SplitStringUsing(const string& s, const string& delimiter,
                      vector<string>* result) {
  assert(result);
  size_t start_pos = 0;
  size_t find_pos = string::npos;
  if (delimiter.empty()) {
    return;
  }
  while ((find_pos = s.find(delimiter, start_pos)) != string::npos) {
    const string substring = s.substr(start_pos, find_pos - start_pos);
    if (!substring.empty()) {
      result->push_back(substring);
    }
    start_pos = find_pos + delimiter.length();
  }
  if (start_pos != s.length()) {
    result->push_back(s.substr(start_pos));
  }
 }

 void StripString(string* s, const char* remove, char replacewith) {
  const char* str_start = s->c_str();
  const char* str = str_start;
@ -252,6 +291,25 @@ string StrCat(const StringHolder& s1, const StringHolder& s2,
  return result;
 }

 string StrCat(const StringHolder& s1, const StringHolder& s2,
              const StringHolder& s3, const StringHolder& s4,
              const StringHolder& s5, const StringHolder& s6,
              const StringHolder& s7, const StringHolder& s8) {
  string result;
  result.reserve(s1.Length() + s2.Length() + s3.Length() + s4.Length() +
                 s5.Length() + s6.Length() + s7.Length() + s8.Length() + 1);
  result += s1;
  result += s2;
  result += s3;
  result += s4;
  result += s5;
  result += s6;
  result += s7;
  result += s8;

  return result;
 }

 string StrCat(const StringHolder& s1, const StringHolder& s2,
              const StringHolder& s3, const StringHolder& s4,
              const StringHolder& s5, const StringHolder& s6,
--- a/cpp/src/phonenumbers/stringutil.h
+++ b/cpp/src/phonenumbers/stringutil.h
@ -19,6 +19,7 @@

 #include <cstddef>
 #include <string>
 #include <vector>

 #include "base/basictypes.h"

@ -26,6 +27,7 @@ namespace i18n {
 namespace phonenumbers {

 using std::string;
 using std::vector;

 // Supports string("hello") + 10.
 string operator+(const string& s, int n);
@ -34,6 +36,18 @@ string operator+(const string& s, int n);
 string SimpleItoa(uint64 n);
 string SimpleItoa(int n);

 // Returns whether the provided string starts with the supplied prefix.
 bool HasPrefixString(const string& s, const string& prefix);

 // Returns the index of the nth occurence of c in s or string::npos if less than
 // n occurrences are present.
 size_t FindNth(const string& s, char c, int n);

 // Splits a string using a character delimiter. Appends the components to the
 // provided vector. Note that empty tokens are ignored.
 void SplitStringUsing(const string& s, const string& delimiter,
                      vector<string>* result);

 // Replaces any occurrence of the character 'remove' (or the characters
 // in 'remove') with the character 'replacewith'.
 void StripString(string* s, const char* remove, char replacewith);
@ -113,6 +127,11 @@ string StrCat(const StringHolder& s1, const StringHolder& s2,
              const StringHolder& s5, const StringHolder& s6,
              const StringHolder& s7);

 string StrCat(const StringHolder& s1, const StringHolder& s2,
              const StringHolder& s3, const StringHolder& s4,
              const StringHolder& s5, const StringHolder& s6,
              const StringHolder& s7, const StringHolder& s8);

 string StrCat(const StringHolder& s1, const StringHolder& s2,
              const StringHolder& s3, const StringHolder& s4,
              const StringHolder& s5, const StringHolder& s6,
--- a/cpp/test/phonenumbers/asyoutypeformatter_test.cc
+++ b/cpp/test/phonenumbers/asyoutypeformatter_test.cc
@ -21,7 +21,7 @@
 #include "base/logging.h"
 #include "base/memory/scoped_ptr.h"
 #include "phonenumbers/phonenumberutil.h"
 #include "phonenumbers/region_code.h"
 #include "phonenumbers/test_util.h"

 namespace i18n {
 namespace phonenumbers {
@ -61,7 +61,7 @@ TEST_F(AsYouTypeFormatterTest, ConvertUnicodeStringPosition) {
 }

 TEST_F(AsYouTypeFormatterTest, Constructor) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_TRUE(GetCurrentMetadata() != NULL);
 }
@ -107,7 +107,7 @@ TEST_F(AsYouTypeFormatterTest, TooLongNumberMatchingMultipleLeadingDigits) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_US) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_EQ("6", formatter_->InputDigit('6', &result_));
  EXPECT_EQ("65", formatter_->InputDigit('5', &result_));
@ -203,7 +203,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_US) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_USFullWidthCharacters) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_EQ("\xEF\xBC\x96" /* "６" */,
            formatter_->InputDigit(UnicodeString("\xEF\xBC\x96" /* "６" */)[0],
@ -238,7 +238,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_USFullWidthCharacters) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_USMobileShortCode) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_EQ("*", formatter_->InputDigit('*', &result_));
  EXPECT_EQ("*1", formatter_->InputDigit('1', &result_));
@ -248,7 +248,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_USMobileShortCode) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_USVanityNumber) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_EQ("8", formatter_->InputDigit('8', &result_));
  EXPECT_EQ("80", formatter_->InputDigit('0', &result_));
@ -265,7 +265,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_USVanityNumber) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTFAndRememberPositionUS) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("US"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::US()));

  EXPECT_EQ("1", formatter_->InputDigitAndRememberPosition('1', &result_));
  EXPECT_EQ(1, formatter_->GetRememberedPosition());
@ -407,7 +407,7 @@ TEST_F(AsYouTypeFormatterTest, AYTFAndRememberPositionUS) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_GBFixedLine) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("GB"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::GB()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("02", formatter_->InputDigit('2', &result_));
@ -425,7 +425,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_GBFixedLine) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_GBTollFree) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("GB"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::GB()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("08", formatter_->InputDigit('8', &result_));
@ -441,7 +441,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_GBTollFree) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_GBPremiumRate) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("GB"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::GB()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("09", formatter_->InputDigit('9', &result_));
@ -457,7 +457,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_GBPremiumRate) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_NZMobile) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("NZ"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::NZ()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("02", formatter_->InputDigit('2', &result_));
@ -473,7 +473,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_NZMobile) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_DE) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("DE"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::DE()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("03", formatter_->InputDigit('3', &result_));
@ -513,7 +513,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_DE) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_AR) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("AR"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::AR()));

  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("01", formatter_->InputDigit('1', &result_));
@ -529,7 +529,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_AR) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_ARMobile) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("AR"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::AR()));

  EXPECT_EQ("+", formatter_->InputDigit('+', &result_));
  EXPECT_EQ("+5", formatter_->InputDigit('5', &result_));
@ -548,7 +548,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_ARMobile) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_KR) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("KR"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::KR()));

  // +82 51 234 5678
  EXPECT_EQ("+", formatter_->InputDigit('+', &result_));
@ -639,7 +639,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_KR) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_MX) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("MX"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::MX()));

  // +52 800 123 4567
  EXPECT_EQ("+", formatter_->InputDigit('+', &result_));
@ -724,7 +724,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_MX) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_MultipleLeadingDigitPatterns) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("JP"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::JP()));

  // +81 50 2345 6789
  EXPECT_EQ("+", formatter_->InputDigit('+', &result_));
@ -773,7 +773,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_MultipleLeadingDigitPatterns) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_LongIDD_AU) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("AU"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::AU()));
  // 0011 1 650 253 2250
  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("00", formatter_->InputDigit('0', &result_));
@ -830,7 +830,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_LongIDD_AU) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_LongIDD_KR) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("KR"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::KR()));
  // 00300 1 650 253 2250
  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("00", formatter_->InputDigit('0', &result_));
@ -851,7 +851,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_LongIDD_KR) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_LongNDD_KR) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("KR"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::KR()));
  // 08811-9876-7890
  EXPECT_EQ("0", formatter_->InputDigit('0', &result_));
  EXPECT_EQ("08", formatter_->InputDigit('8', &result_));
@ -887,7 +887,7 @@ TEST_F(AsYouTypeFormatterTest, AYTF_LongNDD_KR) {
 }

 TEST_F(AsYouTypeFormatterTest, AYTF_LongNDD_SG) {
  formatter_.reset(phone_util_.GetAsYouTypeFormatter("SG"));
  formatter_.reset(phone_util_.GetAsYouTypeFormatter(RegionCode::SG()));
  // 777777 9876 7890
  EXPECT_EQ("7", formatter_->InputDigit('7', &result_));
  EXPECT_EQ("77", formatter_->InputDigit('7', &result_));
--- a/cpp/test/phonenumbers/phonenumbermatch_test.cc
+++ b/cpp/test/phonenumbers/phonenumbermatch_test.cc
@ -0,0 +1,91 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Tao Huang
 //
 // Basic test cases for PhoneNumberMatch.

 #include "phonenumbers/phonenumber.h"
 #include "phonenumbers/phonenumbermatch.h"

 #include <gtest/gtest.h>

 #include "phonenumbers/phonenumber.pb.h"

 namespace i18n {
 namespace phonenumbers {

 TEST(PhoneNumberMatch, TestGetterMethods) {
  PhoneNumber number;
  const int start_index = 10;
  const string raw_phone_number("1 800 234 45 67");
  PhoneNumberMatch match1(start_index, raw_phone_number, number);

  EXPECT_EQ(start_index, match1.start());
  EXPECT_EQ(start_index + raw_phone_number.length(), match1.end());
  EXPECT_EQ(raw_phone_number.length(), match1.length());
  EXPECT_EQ(raw_phone_number, match1.raw_string());

  EXPECT_EQ("PhoneNumberMatch [10,25) 1 800 234 45 67", match1.ToString());
 }

 TEST(PhoneNumberMatch, TestEquals) {
  PhoneNumber number;
  PhoneNumberMatch match1(10, "1 800 234 45 67", number);
  PhoneNumberMatch match2(10, "1 800 234 45 67", number);

  match2.set_start(11);
  ASSERT_FALSE(match1.Equals(match2));
  match2.set_start(match1.start());
  EXPECT_TRUE(match1.Equals(match2));

  PhoneNumber number2;
  number2.set_raw_input("123");
  match2.set_number(number2);
  ASSERT_FALSE(match1.Equals(match2));
  match2.set_number(match1.number());
  EXPECT_TRUE(ExactlySameAs(match1.number(), match2.number()));
  EXPECT_TRUE(match1.Equals(match2));

  match2.set_raw_string("123");
  ASSERT_FALSE(match1.Equals(match2));
 }

 TEST(PhoneNumberMatch, TestAssignmentOverload) {
  PhoneNumber number;
  PhoneNumberMatch match1(10, "1 800 234 45 67", number);
  PhoneNumberMatch match2;
  ASSERT_FALSE(match1.Equals(match2));

  match2.CopyFrom(match1);
  ASSERT_TRUE(match1.Equals(match2));

  PhoneNumberMatch match3;
  PhoneNumberMatch match4;
  match4.CopyFrom(match2);
  match3.CopyFrom(match2);
  ASSERT_TRUE(match3.Equals(match4));
  ASSERT_TRUE(match4.Equals(match2));
 }

 TEST(PhoneNumberMatch, TestCopyConstructor) {
  PhoneNumber number;
  PhoneNumberMatch match1(10, "1 800 234 45 67", number);
  PhoneNumberMatch match2;
  match2.CopyFrom(match1);
  ASSERT_TRUE(match1.Equals(match2));
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/cpp/test/phonenumbers/phonenumbermatcher_test.cc
+++ b/cpp/test/phonenumbers/phonenumbermatcher_test.cc
--- a/cpp/test/phonenumbers/phonenumberutil_test.cc
+++ b/cpp/test/phonenumbers/phonenumberutil_test.cc
@ -26,6 +26,7 @@
 #include "phonenumbers/phonenumber.h"
 #include "phonenumbers/phonenumber.pb.h"
 #include "phonenumbers/phonenumberutil.h"
 #include "phonenumbers/test_util.h"

 namespace i18n {
 namespace phonenumbers {
@ -36,112 +37,6 @@ using std::ostream;

 using google::protobuf::RepeatedPtrField;

 namespace {

 // Class containing string constants of region codes for easier testing. This is
 // intended to replace region_code.h for testing in this file, with more
 // constants defined.
 class RegionCode {
 public:
  static const string& AD() {
    static const string s = "AD";
    return s;
  }

  static const string& AO() {
    static const string s = "AO";
    return s;
  }

  static const string& AR() {
    static const string s = "AR";
    return s;
  }

  static const string& AU() {
    static const string s = "AU";
    return s;
  }

  static const string& BS() {
    static const string s = "BS";
    return s;
  }

  static const string& CN() {
    static const string s = "CN";
    return s;
  }

  static const string& CS() {
    static const string s = "CS";
    return s;
  }

  static const string& DE() {
    static const string s = "DE";
    return s;
  }

  static const string& GB() {
    static const string s = "GB";
    return s;
  }

  static const string& IT() {
    static const string s = "IT";
    return s;
  }

  static const string& KR() {
    static const string s = "KR";
    return s;
  }

  static const string& MX() {
    static const string s = "MX";
    return s;
  }

  static const string& NZ() {
    static const string s = "NZ";
    return s;
  }

  static const string& PL() {
    static const string s = "PL";
    return s;
  }

  static const string& RE() {
    static const string s = "RE";
    return s;
  }

  static const string& SG() {
    static const string s = "SG";
    return s;
  }

  static const string& US() {
    static const string s = "US";
    return s;
  }

  static const string& YT() {
    static const string s = "YT";
    return s;
  }

  // Returns a region code string representing the "unknown" region.
  static const string& GetUnknown() {
    static const string s = "ZZ";
    return s;
  }
 };

 }  // namespace

 class PhoneNumberUtilTest : public testing::Test {
 protected:
  PhoneNumberUtilTest() : phone_util_(*PhoneNumberUtil::GetInstance()) {
@ -219,40 +114,6 @@ class PhoneNumberUtilTest : public testing::Test {
  const PhoneNumberUtil& phone_util_;
 };

 // Provides PhoneNumber comparison operators to support the use of EXPECT_EQ and
 // EXPECT_NE in the unittests.
 bool operator==(const PhoneNumber& number1, const PhoneNumber& number2) {
  return ExactlySameAs(number1, number2);
 }

 bool operator!=(const PhoneNumber& number1, const PhoneNumber& number2) {
  return !(number1 == number2);
 }

 // Needed by Google Test to display errors.
 ostream& operator<<(ostream& os, const PhoneNumber& number) {
  os << endl
     << "country_code: " << number.country_code() << endl
     << "national_number: " << number.national_number() << endl;
  if (number.has_extension()) {
     os << "extension: " << number.extension() << endl;
  }
  if (number.has_italian_leading_zero()) {
     os << "italian_leading_zero: " << number.italian_leading_zero() << endl;
  }
  if (number.has_raw_input()) {
     os << "raw_input: " << number.raw_input() << endl;
  }
  if (number.has_country_code_source()) {
     os << "country_code_source: " << number.country_code_source() << endl;
  }
  if (number.has_preferred_domestic_carrier_code()) {
     os << "preferred_domestic_carrier_code: "
        << number.preferred_domestic_carrier_code() << endl;
  }
  return os;
 }

 TEST_F(PhoneNumberUtilTest, GetSupportedRegions) {
  set<string> regions;

--- a/cpp/test/phonenumbers/stringutil_test.cc
+++ b/cpp/test/phonenumbers/stringutil_test.cc
@ -14,9 +14,15 @@

 // Author: Philippe Liard

 #include "phonenumbers/stringutil.h"

 #include <string>
 #include <vector>

 #include <gtest/gtest.h>

 #include "phonenumbers/stringutil.h"
 using std::string;
 using std::vector;

 namespace i18n {
 namespace phonenumbers {
@ -31,6 +37,55 @@ TEST(StringUtilTest, SimpleItoa) {
  EXPECT_EQ("10", SimpleItoa(10));
 }

 TEST(StringUtilTest, HasPrefixString) {
  EXPECT_TRUE(HasPrefixString("hello world", "hello"));
  EXPECT_FALSE(HasPrefixString("hello world", "hellO"));
 }

 TEST(StringUtilTest, FindNthWithEmptyString) {
  EXPECT_EQ(string::npos, FindNth("", 'a', 1));
 }

 TEST(StringUtilTest, FindNthWithNNegative) {
  EXPECT_EQ(string::npos, FindNth("hello world", 'o', -1));
 }

 TEST(StringUtilTest, FindNthWithNTooHigh) {
  EXPECT_EQ(string::npos, FindNth("hello world", 'o', 3));
 }

 TEST(StringUtilTest, FindNth) {
  EXPECT_EQ(7, FindNth("hello world", 'o', 2));
 }

 TEST(StringUtilTest, SplitStringUsingWithEmptyString) {
  vector<string> result;
  SplitStringUsing("", ":", &result);
  EXPECT_EQ(0, result.size());
 }

 TEST(StringUtilTest, SplitStringUsingWithEmptyDelimiter) {
  vector<string> result;
  SplitStringUsing("hello", "", &result);
  EXPECT_EQ(0, result.size());
 }

 TEST(StringUtilTest, SplitStringUsing) {
  vector<string> result;
  SplitStringUsing(":hello:world:", ":", &result);
  EXPECT_EQ(2, result.size());
  EXPECT_EQ("hello", result[0]);
  EXPECT_EQ("world", result[1]);
 }

 TEST(StringUtilTest, SplitStringUsingIgnoresEmptyToken) {
  vector<string> result;
  SplitStringUsing("hello::world", ":", &result);
  EXPECT_EQ(2, result.size());
  EXPECT_EQ("hello", result[0]);
  EXPECT_EQ("world", result[1]);
 }

 // Test TryStripPrefixString.
 TEST(StringUtilTest, TryStripPrefixString) {
  string s;
@ -205,6 +260,10 @@ TEST(StringUtilTest, StrCat) {
  s = StrCat("a", "b", "c", "d", "e", "f", "g");
  EXPECT_EQ("abcdefg", s);

  // Test with 8 arguments.
  s = StrCat("a", "b", "c", "d", "e", "f", "g", "h");
  EXPECT_EQ("abcdefgh", s);

  // Test with 9 arguments.
  s = StrCat("a", "b", "c", "d", "e", "f", "g", "h", "i");
  EXPECT_EQ("abcdefghi", s);
--- a/cpp/test/phonenumbers/test_util.cc
+++ b/cpp/test/phonenumbers/test_util.cc
@ -0,0 +1,63 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Author: Philippe Liard

 #include <iostream>
 #include <vector>

 #include "phonenumbers/phonenumber.pb.h"
 #include "phonenumbers/test_util.h"

 using std::cout;
 using std::endl;

 namespace i18n {
 namespace phonenumbers {

 ostream& operator<<(ostream& os, const PhoneNumber& number) {
  os << endl
     << "country_code: " << number.country_code() << endl
     << "national_number: " << number.national_number() << endl;
  if (number.has_extension()) {
     os << "extension: " << number.extension() << endl;
  }
  if (number.has_italian_leading_zero()) {
     os << "italian_leading_zero: " << number.italian_leading_zero() << endl;
  }
  if (number.has_raw_input()) {
     os << "raw_input: " << number.raw_input() << endl;
  }
  if (number.has_country_code_source()) {
     os << "country_code_source: " << number.country_code_source() << endl;
  }
  if (number.has_preferred_domestic_carrier_code()) {
     os << "preferred_domestic_carrier_code: "
        << number.preferred_domestic_carrier_code() << endl;
  }
  return os;
 }

 ostream& operator<<(ostream& os, const vector<PhoneNumber>& numbers) {
  os << "[" << endl;
  for (vector<PhoneNumber>::const_iterator it = numbers.begin();
       it != numbers.end(); ++it) {
    os << *it;
  }
  os << endl << "]" << endl;
  return os;
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/cpp/test/phonenumbers/test_util.h
+++ b/cpp/test/phonenumbers/test_util.h
@ -0,0 +1,162 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // Author: Philippe Liard

 #include <string>
 #include <ostream>
 #include <vector>

 #include "phonenumbers/phonenumber.h"

 namespace i18n {
 namespace phonenumbers {

 using std::string;
 using std::ostream;
 using std::vector;

 class PhoneNumber;

 // Provides PhoneNumber comparison operators to support the use of EXPECT_EQ and
 // EXPECT_NE in the unittests.
 inline bool operator==(const PhoneNumber& number1, const PhoneNumber& number2) {
  return ExactlySameAs(number1, number2);
 }

 inline bool operator!=(const PhoneNumber& number1, const PhoneNumber& number2) {
  return !(number1 == number2);
 }

 // Needed by Google Test to display errors.
 ostream& operator<<(ostream& os, const PhoneNumber& number);

 ostream& operator<<(ostream& os, const vector<PhoneNumber>& numbers);

 // Class containing string constants of region codes for easier testing.
 class RegionCode {
 public:
  static const string& AD() {
    static const string s = "AD";
    return s;
  }

  static const string& AO() {
    static const string s = "AO";
    return s;
  }

  static const string& AR() {
    static const string s = "AR";
    return s;
  }

  static const string& AU() {
    static const string s = "AU";
    return s;
  }

  static const string& BS() {
    static const string s = "BS";
    return s;
  }

  static const string& CA() {
    static const string s = "CA";
    return s;
  }

  static const string& CN() {
    static const string s = "CN";
    return s;
  }

  static const string& CS() {
    static const string s = "CS";
    return s;
  }

  static const string& DE() {
    static const string s = "DE";
    return s;
  }

  static const string& GB() {
    static const string s = "GB";
    return s;
  }

  static const string& IT() {
    static const string s = "IT";
    return s;
  }

  static const string& JP() {
    static const string s = "JP";
    return s;
  }

  static const string& KR() {
    static const string s = "KR";
    return s;
  }

  static const string& MX() {
    static const string s = "MX";
    return s;
  }

  static const string& NZ() {
    static const string s = "NZ";
    return s;
  }

  static const string& PL() {
    static const string s = "PL";
    return s;
  }

  static const string& RE() {
    static const string s = "RE";
    return s;
  }

  static const string& SG() {
    static const string s = "SG";
    return s;
  }

  static const string& US() {
    static const string s = "US";
    return s;
  }

  static const string& YT() {
    static const string s = "YT";
    return s;
  }

  // Returns a region code string representing the "unknown" region.
  static const string& GetUnknown() {
    static const string s = "ZZ";
    return s;
  }

  static const string& ZZ() {
    return GetUnknown();
  }
 };

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/tools/script/continuous-integration.sh
+++ b/tools/script/continuous-integration.sh
@ -30,11 +30,19 @@ test_cpp_version() {
  CC_TEST_FILE=`mktemp`.cc
  CC_TEST_BINARY=`mktemp`
  CMAKE_FLAGS="$1"

  # Write the program that tests the installation of the library to a temporary
  # source file.
  > $CC_TEST_FILE echo '
    #include <cassert>

    #include <base/memory/scoped_ptr.h>

    // Include all the public headers.
    #include <phonenumbers/asyoutypeformatter.h>
    #include <phonenumbers/phonenumber.pb.h>
    #include <phonenumbers/phonenumbermatch.h>
    #include <phonenumbers/phonenumbermatcher.h>
    #include <phonenumbers/phonenumberutil.h>

    using i18n::phonenumbers::AsYouTypeFormatter;
@ -44,8 +52,11 @@ test_cpp_version() {
      PhoneNumberUtil* const phone_util = PhoneNumberUtil::GetInstance();
      const scoped_ptr<AsYouTypeFormatter> asytf(
          phone_util->GetAsYouTypeFormatter("US"));
      return !(phone_util != NULL && asytf != NULL);

      assert(phone_util != NULL);
      assert(asytf != NULL);
    }'

  # Run the build and tests.
  (
    rm -rf cpp/build /tmp/libphonenumber &&
@ -64,8 +75,9 @@ test_cpp_version() {
  [ $STATUS -ne 0 ] && exit $STATUS
 }
 test_cpp_version ''
 test_cpp_version '-DUSE_RE2=ON'
 test_cpp_version '-DUSE_ICU_REGEXP=ON'
 test_cpp_version '-DUSE_LITE_METADATA=ON'
 test_cpp_version '-DUSE_RE2=ON'
 test_cpp_version '-DUSE_STD_MAP=ON'

 # Test Java version using Ant.