Fix UnicodeText const_iterator to make it work for MacOS.

15 years ago · 5b4c8d8774
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -234,6 +234,7 @@ set (TEST_SOURCES
  "src/run_tests.cc"
  "src/stringutil_test.cc"
  "src/test_metadata.cc"  # Generated by build tools.
  "src/utf/unicodetext_test.cc"
 )

 add_executable (libphonenumber_test ${TEST_SOURCES})
--- a/cpp/src/utf/unicodetext.cc
+++ b/cpp/src/utf/unicodetext.cc
@ -442,22 +442,22 @@ char32 UnicodeText::const_iterator::operator*() const {
  // for speed, we do the calculation ourselves.)

  // Convert from UTF-8
  int byte1 = it_[0];
  uint8 byte1 = static_cast<uint8>(it_[0]);
  if (byte1 < 0x80)
    return byte1;

  int byte2 = it_[1];
  uint8 byte2 = static_cast<uint8>(it_[1]);
  if (byte1 < 0xE0)
    return ((byte1 & 0x1F) << 6)
          | (byte2 & 0x3F);

  int byte3 = it_[2];
  uint8 byte3 = static_cast<uint8>(it_[2]);
  if (byte1 < 0xF0)
    return ((byte1 & 0x0F) << 12)
         | ((byte2 & 0x3F) << 6)
         |  (byte3 & 0x3F);

  int byte4 = it_[3];
  uint8 byte4 = static_cast<uint8>(it_[3]);
  return ((byte1 & 0x07) << 18)
       | ((byte2 & 0x3F) << 12)
       | ((byte3 & 0x3F) << 6)
--- a/cpp/src/utf/unicodetext_test.cc
+++ b/cpp/src/utf/unicodetext_test.cc
@ -0,0 +1,45 @@
 // Copyright 2011 Facebook, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may
 // not use this file except in compliance with the License. You may obtain
 // a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations
 // under the License.
 //
 // Author: Ben Gertzfield

 #include <gtest/gtest.h>

 #include "unicodetext.h"

 namespace i18n {
 namespace unicodetext {

 TEST(UnicodeTextTest, Iterator) {
  struct value {
    const char* utf8;
    unsigned int code_point;
  } values[] = {
    { "\x31", 0x31 }, // U+0031 DIGIT ONE
    { "\xC2\xBD", 0x00BD }, // U+00BD VULGAR FRACTION ONE HALF
    { "\xEF\xBC\x91", 0xFF11 }, // U+FF11 FULLWIDTH DIGIT ONE
    { "\xF0\x9F\x80\x80", 0x1F000 }, // U+1F000 MAHJONG TILE EAST WIND
  };

  for (size_t i = 0; i < sizeof values / sizeof values[0]; i++) {
    string number(values[i].utf8);
    UnicodeText number_as_unicode;
    number_as_unicode.PointToUTF8(number.data(), number.size());
    UnicodeText::const_iterator it = number_as_unicode.begin();
    EXPECT_EQ(values[i].code_point, *it);
  }
 }

 } // namespace unicodetext
 } // namespace i18n