From 11db4344d1925b0ea7bd7a6f11fd0ed747a93c40 Mon Sep 17 00:00:00 2001 From: Lara Scheidegger Date: Fri, 17 Jun 2011 11:41:28 +0000 Subject: [PATCH] CPP: Moving a utf8 normalisation function out of the main code. Contributed by philip.liard. --- cpp/src/normalize_utf8.h | 50 ++++++++++++++++++++++++++++++++++++++ cpp/src/phonenumberutil.cc | 19 ++------------- 2 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 cpp/src/normalize_utf8.h diff --git a/cpp/src/normalize_utf8.h b/cpp/src/normalize_utf8.h new file mode 100644 index 000000000..36676cdd3 --- /dev/null +++ b/cpp/src/normalize_utf8.h @@ -0,0 +1,50 @@ +// Copyright (C) 2011 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "utf/unicodetext.h" + +namespace i18n { +namespace phonenumbers { + +using std::string; + +struct NormalizeUTF8 { + // Put a UTF-8 string in ASCII digits: All decimal digits (Nd) replaced by + // their ASCII counterparts; all other characters are copied from input to + // output. + static string NormalizeDecimalDigits(const string& number) { + string normalized; + UnicodeText number_as_unicode; + number_as_unicode.PointToUTF8(number.data(), number.size()); + for (UnicodeText::const_iterator it = number_as_unicode.begin(); + it != number_as_unicode.end(); + ++it) { + int32_t digitValue = u_charDigitValue(*it); + if (digitValue == -1) { + // Not a decimal digit. + char utf8[4]; + int len = it.get_utf8(utf8); + normalized.append(utf8, len); + } else { + normalized.push_back('0' + digitValue); + } + } + return normalized; + } +}; + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumberutil.cc b/cpp/src/phonenumberutil.cc index 5e9fc380c..7625d912d 100644 --- a/cpp/src/phonenumberutil.cc +++ b/cpp/src/phonenumberutil.cc @@ -34,6 +34,7 @@ #include "encoding_utils.h" #include "logger_adapter.h" #include "metadata.h" +#include "normalize_utf8.h" #include "phonemetadata.pb.h" #include "phonenumber.h" #include "phonenumber.pb.h" @@ -1708,23 +1709,7 @@ void PhoneNumberUtil::NormalizeDigitsOnly(string* number) { // Delete everything that isn't valid digits. non_digits_pattern->GlobalReplace(number, ""); // Normalize all decimal digits to ASCII digits. - string normalized; - UnicodeText number_as_unicode; - number_as_unicode.PointToUTF8(number->data(), number->size()); - for (UnicodeText::const_iterator it = number_as_unicode.begin(); - it != number_as_unicode.end(); - ++it) { - int32_t digitValue = u_charDigitValue(*it); - if (digitValue == -1) { - // Not a decimal digit. - char utf8[4]; - int len = it.get_utf8(utf8); - normalized.append(utf8, len); - } else { - normalized.push_back('0' + digitValue); - } - } - *number = normalized; + number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); } bool PhoneNumberUtil::IsAlphaNumber(const string& number) const {