Browse Source

Fix UnicodeText const_iterator to make it work for MacOS.

pull/567/head
bgertzfield 15 years ago
committed by Mihaela Rosca
parent
commit
5b4c8d8774
3 changed files with 50 additions and 4 deletions
  1. +1
    -0
      cpp/CMakeLists.txt
  2. +4
    -4
      cpp/src/utf/unicodetext.cc
  3. +45
    -0
      cpp/src/utf/unicodetext_test.cc

+ 1
- 0
cpp/CMakeLists.txt View File

@ -234,6 +234,7 @@ set (TEST_SOURCES
"src/run_tests.cc"
"src/stringutil_test.cc"
"src/test_metadata.cc" # Generated by build tools.
"src/utf/unicodetext_test.cc"
)
add_executable (libphonenumber_test ${TEST_SOURCES})


+ 4
- 4
cpp/src/utf/unicodetext.cc View File

@ -442,22 +442,22 @@ char32 UnicodeText::const_iterator::operator*() const {
// for speed, we do the calculation ourselves.)
// Convert from UTF-8
int byte1 = it_[0];
uint8 byte1 = static_cast<uint8>(it_[0]);
if (byte1 < 0x80)
return byte1;
int byte2 = it_[1];
uint8 byte2 = static_cast<uint8>(it_[1]);
if (byte1 < 0xE0)
return ((byte1 & 0x1F) << 6)
| (byte2 & 0x3F);
int byte3 = it_[2];
uint8 byte3 = static_cast<uint8>(it_[2]);
if (byte1 < 0xF0)
return ((byte1 & 0x0F) << 12)
| ((byte2 & 0x3F) << 6)
| (byte3 & 0x3F);
int byte4 = it_[3];
uint8 byte4 = static_cast<uint8>(it_[3]);
return ((byte1 & 0x07) << 18)
| ((byte2 & 0x3F) << 12)
| ((byte3 & 0x3F) << 6)


+ 45
- 0
cpp/src/utf/unicodetext_test.cc View File

@ -0,0 +1,45 @@
// Copyright 2011 Facebook, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//
// Author: Ben Gertzfield
#include <gtest/gtest.h>
#include "unicodetext.h"
namespace i18n {
namespace unicodetext {
TEST(UnicodeTextTest, Iterator) {
struct value {
const char* utf8;
unsigned int code_point;
} values[] = {
{ "\x31", 0x31 }, // U+0031 DIGIT ONE
{ "\xC2\xBD", 0x00BD }, // U+00BD VULGAR FRACTION ONE HALF
{ "\xEF\xBC\x91", 0xFF11 }, // U+FF11 FULLWIDTH DIGIT ONE
{ "\xF0\x9F\x80\x80", 0x1F000 }, // U+1F000 MAHJONG TILE EAST WIND
};
for (size_t i = 0; i < sizeof values / sizeof values[0]; i++) {
string number(values[i].utf8);
UnicodeText number_as_unicode;
number_as_unicode.PointToUTF8(number.data(), number.size());
UnicodeText::const_iterator it = number_as_unicode.begin();
EXPECT_EQ(values[i].code_point, *it);
}
}
} // namespace unicodetext
} // namespace i18n

Loading…
Cancel
Save