Browse Source

CPP: Moving a utf8 normalisation function out of the main code. Contributed by philip.liard.

pull/567/head
Lara Scheidegger 15 years ago
committed by Mihaela Rosca
parent
commit
11db4344d1
2 changed files with 52 additions and 17 deletions
  1. +50
    -0
      cpp/src/normalize_utf8.h
  2. +2
    -17
      cpp/src/phonenumberutil.cc

+ 50
- 0
cpp/src/normalize_utf8.h View File

@ -0,0 +1,50 @@
// Copyright (C) 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "utf/unicodetext.h"
namespace i18n {
namespace phonenumbers {
using std::string;
struct NormalizeUTF8 {
// Put a UTF-8 string in ASCII digits: All decimal digits (Nd) replaced by
// their ASCII counterparts; all other characters are copied from input to
// output.
static string NormalizeDecimalDigits(const string& number) {
string normalized;
UnicodeText number_as_unicode;
number_as_unicode.PointToUTF8(number.data(), number.size());
for (UnicodeText::const_iterator it = number_as_unicode.begin();
it != number_as_unicode.end();
++it) {
int32_t digitValue = u_charDigitValue(*it);
if (digitValue == -1) {
// Not a decimal digit.
char utf8[4];
int len = it.get_utf8(utf8);
normalized.append(utf8, len);
} else {
normalized.push_back('0' + digitValue);
}
}
return normalized;
}
};
} // namespace phonenumbers
} // namespace i18n

+ 2
- 17
cpp/src/phonenumberutil.cc View File

@ -34,6 +34,7 @@
#include "encoding_utils.h" #include "encoding_utils.h"
#include "logger_adapter.h" #include "logger_adapter.h"
#include "metadata.h" #include "metadata.h"
#include "normalize_utf8.h"
#include "phonemetadata.pb.h" #include "phonemetadata.pb.h"
#include "phonenumber.h" #include "phonenumber.h"
#include "phonenumber.pb.h" #include "phonenumber.pb.h"
@ -1708,23 +1709,7 @@ void PhoneNumberUtil::NormalizeDigitsOnly(string* number) {
// Delete everything that isn't valid digits. // Delete everything that isn't valid digits.
non_digits_pattern->GlobalReplace(number, ""); non_digits_pattern->GlobalReplace(number, "");
// Normalize all decimal digits to ASCII digits. // Normalize all decimal digits to ASCII digits.
string normalized;
UnicodeText number_as_unicode;
number_as_unicode.PointToUTF8(number->data(), number->size());
for (UnicodeText::const_iterator it = number_as_unicode.begin();
it != number_as_unicode.end();
++it) {
int32_t digitValue = u_charDigitValue(*it);
if (digitValue == -1) {
// Not a decimal digit.
char utf8[4];
int len = it.get_utf8(utf8);
normalized.append(utf8, len);
} else {
normalized.push_back('0' + digitValue);
}
}
*number = normalized;
number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
} }
bool PhoneNumberUtil::IsAlphaNumber(const string& number) const { bool PhoneNumberUtil::IsAlphaNumber(const string& number) const {


Loading…
Cancel
Save