From 5b4c8d8774c18c3bd0f99059b6c0fa1c62d4295f Mon Sep 17 00:00:00 2001 From: bgertzfield Date: Thu, 28 Apr 2011 13:26:24 +0000 Subject: [PATCH] Fix UnicodeText const_iterator to make it work for MacOS. --- cpp/CMakeLists.txt | 1 + cpp/src/utf/unicodetext.cc | 8 +++--- cpp/src/utf/unicodetext_test.cc | 45 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 cpp/src/utf/unicodetext_test.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 64390595f..95110bbe8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -234,6 +234,7 @@ set (TEST_SOURCES "src/run_tests.cc" "src/stringutil_test.cc" "src/test_metadata.cc" # Generated by build tools. + "src/utf/unicodetext_test.cc" ) add_executable (libphonenumber_test ${TEST_SOURCES}) diff --git a/cpp/src/utf/unicodetext.cc b/cpp/src/utf/unicodetext.cc index 82c1b42fa..1039f1f60 100644 --- a/cpp/src/utf/unicodetext.cc +++ b/cpp/src/utf/unicodetext.cc @@ -442,22 +442,22 @@ char32 UnicodeText::const_iterator::operator*() const { // for speed, we do the calculation ourselves.) // Convert from UTF-8 - int byte1 = it_[0]; + uint8 byte1 = static_cast(it_[0]); if (byte1 < 0x80) return byte1; - int byte2 = it_[1]; + uint8 byte2 = static_cast(it_[1]); if (byte1 < 0xE0) return ((byte1 & 0x1F) << 6) | (byte2 & 0x3F); - int byte3 = it_[2]; + uint8 byte3 = static_cast(it_[2]); if (byte1 < 0xF0) return ((byte1 & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | (byte3 & 0x3F); - int byte4 = it_[3]; + uint8 byte4 = static_cast(it_[3]); return ((byte1 & 0x07) << 18) | ((byte2 & 0x3F) << 12) | ((byte3 & 0x3F) << 6) diff --git a/cpp/src/utf/unicodetext_test.cc b/cpp/src/utf/unicodetext_test.cc new file mode 100644 index 000000000..51ba094a3 --- /dev/null +++ b/cpp/src/utf/unicodetext_test.cc @@ -0,0 +1,45 @@ +// Copyright 2011 Facebook, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// +// Author: Ben Gertzfield + +#include + +#include "unicodetext.h" + +namespace i18n { +namespace unicodetext { + +TEST(UnicodeTextTest, Iterator) { + struct value { + const char* utf8; + unsigned int code_point; + } values[] = { + { "\x31", 0x31 }, // U+0031 DIGIT ONE + { "\xC2\xBD", 0x00BD }, // U+00BD VULGAR FRACTION ONE HALF + { "\xEF\xBC\x91", 0xFF11 }, // U+FF11 FULLWIDTH DIGIT ONE + { "\xF0\x9F\x80\x80", 0x1F000 }, // U+1F000 MAHJONG TILE EAST WIND + }; + + for (size_t i = 0; i < sizeof values / sizeof values[0]; i++) { + string number(values[i].utf8); + UnicodeText number_as_unicode; + number_as_unicode.PointToUTF8(number.data(), number.size()); + UnicodeText::const_iterator it = number_as_unicode.begin(); + EXPECT_EQ(values[i].code_point, *it); + } +} + +} // namespace unicodetext +} // namespace i18n