CPP: Add geocoding data generator.

14 years ago · ac08bfe262
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -154,6 +154,26 @@ add_custom_target (
  COMMENT "Generating Protocol Buffers code"
 )

 # Geocoding data cpp file generation
 set (TOOLS_DIR "${CMAKE_CURRENT_BINARY_DIR}/tools")
 add_subdirectory("${CMAKE_SOURCE_DIR}/../tools/cpp" "${TOOLS_DIR}")

 set (GEOCODING_DIR "${RESOURCES_DIR}/geocoding")
 file (GLOB_RECURSE GEOCODING_SOURCES "${GEOCODING_DIR}/*.txt")

 set (GEOCODING_DATA_OUTPUT
  "${CMAKE_SOURCE_DIR}/src/phonenumbers/geocoding/geocoding_data.cc"
 )

 add_custom_command (
  COMMAND generate_geocoding_data "${GEOCODING_DIR}" "${GEOCODING_DATA_OUTPUT}"

  OUTPUT ${GEOCODING_DATA_OUTPUT}
  DEPENDS ${GEOCODING_SOURCES}
          generate_geocoding_data
  COMMENT "Generating geocoding data code"
 )

 set (
  SOURCES
  "src/base/string_piece.cc"
@ -161,6 +181,7 @@ set (
  "src/phonenumbers/default_logger.cc"
  "src/phonenumbers/geocoding/area_code_map.cc"
  "src/phonenumbers/geocoding/default_map_storage.cc"
  "src/phonenumbers/geocoding/geocoding_data.cc"
  "src/phonenumbers/logger.cc"
  "src/phonenumbers/metadata.h"          # Generated by build tools.
  "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
@ -310,9 +331,27 @@ add_library (phonenumber_testing STATIC ${TESTING_LIBRARY_SOURCES})
 target_link_libraries (phonenumber_testing ${LIBRARY_DEPS})
 add_dependencies (phonenumber_testing generate-sources ${TEST_METADATA_TARGET})

 # Test geocoding data cpp files generation
 set (GEOCODING_TEST_DIR "${RESOURCES_DIR}/test/geocoding")
 file (GLOB_RECURSE GEOCODING_TEST_SOURCES "${GEOCODING_TEST_DIR}/*.txt")

 set (GEOCODING_TEST_DATA_OUTPUT
  "${CMAKE_SOURCE_DIR}/test/phonenumbers/geocoding/geocoding_test_data.cc"
 )

 add_custom_command (
  COMMAND generate_geocoding_data "${GEOCODING_TEST_DIR}"
    "${GEOCODING_TEST_DATA_OUTPUT}"

  OUTPUT ${GEOCODING_TEST_DATA_OUTPUT}
  DEPENDS ${GEOCODING_TEST_SOURCES} generate_geocoding_data
  COMMENT "Generating geocoding test data code"
 )

 set (TEST_SOURCES
  "test/phonenumbers/asyoutypeformatter_test.cc"
  "test/phonenumbers/geocoding/area_code_map_test.cc"
  "test/phonenumbers/geocoding/geocoding_test_data.cc"
  "test/phonenumbers/logger_test.cc"
  "test/phonenumbers/phonenumberutil_test.cc"
  "test/phonenumbers/regexp_adapter_test.cc"
@ -339,7 +378,12 @@ if (NOT WIN32)
 endif ()

 target_link_libraries (libphonenumber_test ${TEST_LIBS})
 add_custom_target(test COMMAND libphonenumber_test DEPENDS libphonenumber_test)
 add_custom_target (test
  COMMAND generate_geocoding_data_test
  COMMAND libphonenumber_test

  DEPENDS generate_geocoding_data_test libphonenumber_test
 )

 # Install rules.
 install (FILES
--- a/cpp/src/base/basictypes.h
+++ b/cpp/src/base/basictypes.h
@ -271,18 +271,6 @@ struct CompileAssert {
 //   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.


 // MetatagId refers to metatag-id that we assign to
 // each metatag <name, value> pair..
 typedef uint32 MetatagId;

 // Argument type used in interfaces that can optionally take ownership
 // of a passed in argument.  If TAKE_OWNERSHIP is passed, the called
 // object takes ownership of the argument.  Otherwise it does not.
 enum Ownership {
  DO_NOT_TAKE_OWNERSHIP,
  TAKE_OWNERSHIP
 };

 // bit_cast<Dest,Source> is a template function that implements the
 // equivalent of "*reinterpret_cast<Dest*>(&source)".  We need this in
 // very low-level functions like the protobuf library and fast math
@ -357,7 +345,7 @@ inline Dest bit_cast(const Source& source) {
 //     ignore_result(my_var.release());
 //
 template<typename T>
 inline void ignore_result(const T& ignored) {
 inline void ignore_result(const T&) {
 }

 // The following enum should be used only as a constructor argument to indicate
--- a/cpp/src/phonenumbers/geocoding/geocoding_data.h
+++ b/cpp/src/phonenumbers/geocoding/geocoding_data.h
@ -0,0 +1,77 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // This file is generated automatically, do not edit it manually.

 #ifndef I18N_PHONENUMBERS_GEOCODING_DATA
 #define I18N_PHONENUMBERS_GEOCODING_DATA

 #include "base/basictypes.h"

 namespace i18n {
 namespace phonenumbers {

 struct CountryLanguages {
  // Sorted array of language codes.
  const char** available_languages;

  // Number of elements in available_languages.
  const int available_languages_size;
 };

 struct PrefixDescriptions {
  // Sorted array of phone number prefixes.
  const int32* prefixes;

  // Number of elements in prefixes.
  const int prefixes_size;

  // Array of phone number prefix descriptions, mapped one to one
  // to prefixes.
  const char** descriptions;

  // Sorted array of unique prefix lengths in base 10.
  const int32* possible_lengths;

  // Number of elements in possible_lengths.
  const int possible_lengths_size;
 };

 // Returns a sorted array of country calling codes.
 const int* get_country_calling_codes();

 // Returns the number of country calling codes in
 // get_country_calling_codes() array.
 int get_country_calling_codes_size();

 // Returns the CountryLanguages record for country at index, index
 // being in [0, get_country_calling_codes_size()).
 const CountryLanguages* get_country_languages(int index);

 // Returns a sorted array of prefix language code pairs like
 // "1_de" or "82_ko".
 const char** get_prefix_language_code_pairs();

 // Returns the number of elements in
 // get_prefix_language_code_pairs()
 int get_prefix_language_code_pairs_size();

 // Returns the PrefixDescriptions for language/code pair at index,
 // index being in [0, get_prefix_language_code_pairs_size()).
 const PrefixDescriptions* get_prefix_descriptions(int index);

 }  // namespace phonenumbers
 }  // namespace i18n

 #endif  // I18N_PHONENUMBERS_GEOCODING_DATA
--- a/cpp/test/phonenumbers/geocoding/geocoding_test_data.h
+++ b/cpp/test/phonenumbers/geocoding/geocoding_test_data.h
@ -0,0 +1,50 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #ifndef I18N_PHONENUMBERS_GEOCODING_TEST_DATA
 #define I18N_PHONENUMBERS_GEOCODING_TEST_DATA

 #include "base/basictypes.h"
 #include "phonenumbers/geocoding/geocoding_data.h"

 namespace i18n {
 namespace phonenumbers {

 // Returns a sorted array of country calling codes.
 const int* get_test_country_calling_codes();

 // Returns the number of country calling codes in
 // get_test_country_calling_codes() array.
 int get_test_country_calling_codes_size();

 // Returns the CountryLanguages record for country at index, index
 // being in [0, get_test_country_calling_codes_size()).
 const CountryLanguages* get_test_country_languages(int index);

 // Returns a sorted array of prefix language code pairs like
 // "1_de" or "82_ko".
 const char** get_test_prefix_language_code_pairs();

 // Returns the number of elements in
 // get_prefix_language_code_pairs()
 int get_test_prefix_language_code_pairs_size();

 // Returns the PrefixDescriptions for language/code pair at index,
 // index being in [0, get_prefix_language_code_pairs_size()).
 const PrefixDescriptions* get_test_prefix_descriptions(int index);

 }  // namespace phonenumbers
 }  // namespace i18n

 #endif  // I18N_PHONENUMBERS_GEOCODING_TEST_DATA
--- a/tools/cpp/CMakeLists.txt
+++ b/tools/cpp/CMakeLists.txt
@ -0,0 +1,76 @@
 # Copyright (C) 2012 The Libphonenumber Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # Author: Patrick Mezard

 cmake_minimum_required (VERSION 2.8)

 project (generate_geocoding_data)

 # Helper functions dealing with finding libraries and programs this library
 # depends on.

 function (print_error DESCRIPTION FILE)
  message (FATAL_ERROR
    "Can't find ${DESCRIPTION}: can't locate ${FILE}. Please read the README.")
 endfunction ()

 # Find a library. If it has not been found, stop CMake with a fatal error
 # message.
 function (find_required_library NAME HEADER LIBRARY DESCRIPTION)
  # Check the header.
  find_path (${NAME}_INCLUDE_DIR ${HEADER})
  set (INCLUDE_DIR ${${NAME}_INCLUDE_DIR})

  if (${INCLUDE_DIR} STREQUAL "${INCLUDE_DIR}-NOTFOUND")
    print_error (${DESCRIPTION} ${HEADER})
  endif ()
  include_directories (${INCLUDE_DIR})
  # Check the binary.
  find_library (${NAME}_LIB ${LIBRARY})
  set (LIB ${NAME}_LIB)

  if (${LIB} STREQUAL "${LIB}-NOTFOUND")
    print_error (${DESCRIPTION} ${LIBRARY})
  endif ()
 endfunction (find_required_library)

 find_required_library (GTEST gtest/gtest.h gtest "Google Test framework")

 set (
  SOURCES
  "src/cpp-build/generate_geocoding_data.cc"
  "src/cpp-build/generate_geocoding_data_main.cc"
 )

 if (NOT WIN32)
  add_definitions ("-Wall -Werror")
 endif ()

 include_directories ("src")

 add_executable (generate_geocoding_data ${SOURCES})

 set (TEST_SOURCES
  "src/cpp-build/generate_geocoding_data.cc"
  "test/cpp-build/generate_geocoding_data_test.cc"
  "test/cpp-build/run_tests.cc"
 )

 set (TEST_LIBS ${GTEST_LIB})

 # Build the testing binary.
 include_directories ("test")
 add_executable (generate_geocoding_data_test ${TEST_SOURCES})
 target_link_libraries (generate_geocoding_data_test ${TEST_LIBS})
--- a/tools/cpp/src/base/basictypes.h
+++ b/tools/cpp/src/base/basictypes.h
@ -0,0 +1,368 @@
 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef BASE_BASICTYPES_H_
 #define BASE_BASICTYPES_H_
 #pragma once

 #include <limits.h>         // So we can set the bounds of our types
 #include <stddef.h>         // For size_t
 #include <string.h>         // for memcpy

 #ifndef COMPILER_MSVC
 // stdint.h is part of C99 but MSVC doesn't have it.
 #include <stdint.h>         // For intptr_t.
 #endif

 #ifdef INT64_MAX

 // INT64_MAX is defined if C99 stdint.h is included; use the
 // native types if available.
 typedef int8_t int8;
 typedef int16_t int16;
 typedef int32_t int32;
 typedef int64_t int64;
 typedef uint8_t uint8;
 typedef uint16_t uint16;
 typedef uint32_t uint32;
 typedef uint64_t uint64;

 const uint8  kuint8max  = UINT8_MAX;
 const uint16 kuint16max = UINT16_MAX;
 const uint32 kuint32max = UINT32_MAX;
 const uint64 kuint64max = UINT64_MAX;
 const  int8  kint8min   = INT8_MIN;
 const  int8  kint8max   = INT8_MAX;
 const  int16 kint16min  = INT16_MIN;
 const  int16 kint16max  = INT16_MAX;
 const  int32 kint32min  = INT32_MIN;
 const  int32 kint32max  = INT32_MAX;
 const  int64 kint64min  = INT64_MIN;
 const  int64 kint64max  = INT64_MAX;

 #else // !INT64_MAX

 typedef signed char         int8;
 typedef short               int16;
 // TODO: Remove these type guards.  These are to avoid conflicts with
 // obsolete/protypes.h in the Gecko SDK.
 #ifndef _INT32
 #define _INT32
 typedef int                 int32;
 #endif

 // The NSPR system headers define 64-bit as |long| when possible.  In order to
 // not have typedef mismatches, we do the same on LP64.
 #if __LP64__
 typedef long                int64;
 #else
 typedef long long           int64;
 #endif

 // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
 // places.  Use the signed types unless your variable represents a bit
 // pattern (eg a hash value) or you really need the extra bit.  Do NOT
 // use 'unsigned' to express "this value should always be positive";
 // use assertions for this.

 typedef unsigned char      uint8;
 typedef unsigned short     uint16;
 // TODO: Remove these type guards.  These are to avoid conflicts with
 // obsolete/protypes.h in the Gecko SDK.
 #ifndef _UINT32
 #define _UINT32
 typedef unsigned int       uint32;
 #endif

 // See the comment above about NSPR and 64-bit.
 #if __LP64__
 typedef unsigned long uint64;
 #else
 typedef unsigned long long uint64;
 #endif

 #endif // !INT64_MAX

 typedef signed char         schar;

 // A type to represent a Unicode code-point value. As of Unicode 4.0,
 // such values require up to 21 bits.
 // (For type-checking on pointers, make this explicitly signed,
 // and it should always be the signed version of whatever int32 is.)
 typedef signed int         char32;

 // A macro to disallow the copy constructor and operator= functions
 // This should be used in the private: declarations for a class
 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName&);               \
  void operator=(const TypeName&)

 // An older, deprecated, politically incorrect name for the above.
 // NOTE: The usage of this macro was baned from our code base, but some
 // third_party libraries are yet using it.
 // TODO(tfarina): Figure out how to fix the usage of this macro in the
 // third_party libraries and get rid of it.
 #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)

 // A macro to disallow all the implicit constructors, namely the
 // default constructor, copy constructor and operator= functions.
 //
 // This should be used in the private: declarations for a class
 // that wants to prevent anyone from instantiating it. This is
 // especially useful for classes containing only static methods.
 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
  TypeName();                                    \
  DISALLOW_COPY_AND_ASSIGN(TypeName)

 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
 // used in defining new arrays, for example.  If you use arraysize on
 // a pointer by mistake, you will get a compile-time error.
 //
 // One caveat is that arraysize() doesn't accept any array of an
 // anonymous type or a type defined inside a function.  In these rare
 // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below.  This is
 // due to a limitation in C++'s template system.  The limitation might
 // eventually be removed, but it hasn't happened yet.

 // This template function declaration is used in defining arraysize.
 // Note that the function doesn't need an implementation, as we only
 // use its type.
 template <typename T, size_t N>
 char (&ArraySizeHelper(T (&array)[N]))[N];

 // That gcc wants both of these prototypes seems mysterious. VC, for
 // its part, can't decide which to use (another mystery). Matching of
 // template overloads: the final frontier.
 #ifndef _MSC_VER
 template <typename T, size_t N>
 char (&ArraySizeHelper(const T (&array)[N]))[N];
 #endif

 #define arraysize(array) (sizeof(ArraySizeHelper(array)))

 // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
 // but can be used on anonymous types or types defined inside
 // functions.  It's less safe than arraysize as it accepts some
 // (although not all) pointers.  Therefore, you should use arraysize
 // whenever possible.
 //
 // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
 // size_t.
 //
 // ARRAYSIZE_UNSAFE catches a few type errors.  If you see a compiler error
 //
 //   "warning: division by zero in ..."
 //
 // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
 // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
 //
 // The following comments are on the implementation details, and can
 // be ignored by the users.
 //
 // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
 // the array) and sizeof(*(arr)) (the # of bytes in one array
 // element).  If the former is divisible by the latter, perhaps arr is
 // indeed an array, in which case the division result is the # of
 // elements in the array.  Otherwise, arr cannot possibly be an array,
 // and we generate a compiler error to prevent the code from
 // compiling.
 //
 // Since the size of bool is implementation-defined, we need to cast
 // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
 // result has type size_t.
 //
 // This macro is not perfect as it wrongfully accepts certain
 // pointers, namely where the pointer size is divisible by the pointee
 // size.  Since all our code has to go through a 32-bit compiler,
 // where a pointer is 4 bytes, this means all pointers to a type whose
 // size is 3 or greater than 4 will be (righteously) rejected.

 #define ARRAYSIZE_UNSAFE(a) \
  ((sizeof(a) / sizeof(*(a))) / \
   static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))


 // Use implicit_cast as a safe version of static_cast or const_cast
 // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
 // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
 // a const pointer to Foo).
 // When you use implicit_cast, the compiler checks that the cast is safe.
 // Such explicit implicit_casts are necessary in surprisingly many
 // situations where C++ demands an exact type match instead of an
 // argument type convertable to a target type.
 //
 // The From type can be inferred, so the preferred syntax for using
 // implicit_cast is the same as for static_cast etc.:
 //
 //   implicit_cast<ToType>(expr)
 //
 // implicit_cast would have been part of the C++ standard library,
 // but the proposal was submitted too late.  It will probably make
 // its way into the language in the future.
 template<typename To, typename From>
 inline To implicit_cast(From const &f) {
  return f;
 }

 // The COMPILE_ASSERT macro can be used to verify that a compile time
 // expression is true. For example, you could use it to verify the
 // size of a static array:
 //
 //   COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
 //                  content_type_names_incorrect_size);
 //
 // or to make sure a struct is smaller than a certain size:
 //
 //   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
 //
 // The second argument to the macro is the name of the variable. If
 // the expression is false, most compilers will issue a warning/error
 // containing the name of the variable.

 template <bool>
 struct CompileAssert {
 };

 #undef COMPILE_ASSERT
 #define COMPILE_ASSERT(expr, msg) \
  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]

 // Implementation details of COMPILE_ASSERT:
 //
 // - COMPILE_ASSERT works by defining an array type that has -1
 //   elements (and thus is invalid) when the expression is false.
 //
 // - The simpler definition
 //
 //     #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
 //
 //   does not work, as gcc supports variable-length arrays whose sizes
 //   are determined at run-time (this is gcc's extension and not part
 //   of the C++ standard).  As a result, gcc fails to reject the
 //   following code with the simple definition:
 //
 //     int foo;
 //     COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
 //                               // not a compile-time constant.
 //
 // - By using the type CompileAssert<(bool(expr))>, we ensures that
 //   expr is a compile-time constant.  (Template arguments must be
 //   determined at compile-time.)
 //
 // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
 //   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
 //
 //     CompileAssert<bool(expr)>
 //
 //   instead, these compilers will refuse to compile
 //
 //     COMPILE_ASSERT(5 > 0, some_message);
 //
 //   (They seem to think the ">" in "5 > 0" marks the end of the
 //   template argument list.)
 //
 // - The array size is (bool(expr) ? 1 : -1), instead of simply
 //
 //     ((expr) ? 1 : -1).
 //
 //   This is to avoid running into a bug in MS VC 7.1, which
 //   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.


 // bit_cast<Dest,Source> is a template function that implements the
 // equivalent of "*reinterpret_cast<Dest*>(&source)".  We need this in
 // very low-level functions like the protobuf library and fast math
 // support.
 //
 //   float f = 3.14159265358979;
 //   int i = bit_cast<int32>(f);
 //   // i = 0x40490fdb
 //
 // The classical address-casting method is:
 //
 //   // WRONG
 //   float f = 3.14159265358979;            // WRONG
 //   int i = * reinterpret_cast<int*>(&f);  // WRONG
 //
 // The address-casting method actually produces undefined behavior
 // according to ISO C++ specification section 3.10 -15 -.  Roughly, this
 // section says: if an object in memory has one type, and a program
 // accesses it with a different type, then the result is undefined
 // behavior for most values of "different type".
 //
 // This is true for any cast syntax, either *(int*)&f or
 // *reinterpret_cast<int*>(&f).  And it is particularly true for
 // conversions betweeen integral lvalues and floating-point lvalues.
 //
 // The purpose of 3.10 -15- is to allow optimizing compilers to assume
 // that expressions with different types refer to different memory.  gcc
 // 4.0.1 has an optimizer that takes advantage of this.  So a
 // non-conforming program quietly produces wildly incorrect output.
 //
 // The problem is not the use of reinterpret_cast.  The problem is type
 // punning: holding an object in memory of one type and reading its bits
 // back using a different type.
 //
 // The C++ standard is more subtle and complex than this, but that
 // is the basic idea.
 //
 // Anyways ...
 //
 // bit_cast<> calls memcpy() which is blessed by the standard,
 // especially by the example in section 3.9 .  Also, of course,
 // bit_cast<> wraps up the nasty logic in one place.
 //
 // Fortunately memcpy() is very fast.  In optimized mode, with a
 // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
 // code with the minimal amount of data movement.  On a 32-bit system,
 // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
 // compiles to two loads and two stores.
 //
 // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
 //
 // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
 // is likely to surprise you.

 template <class Dest, class Source>
 inline Dest bit_cast(const Source& source) {
  // Compile time assertion: sizeof(Dest) == sizeof(Source)
  // A compile error here means your Dest and Source have different sizes.
  typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];

  Dest dest;
  memcpy(&dest, &source, sizeof(dest));
  return dest;
 }

 // Used to explicitly mark the return value of a function as unused. If you are
 // really sure you don't want to do anything with the return value of a function
 // that has been marked WARN_UNUSED_RESULT, wrap it with this. Example:
 //
 //   scoped_ptr<MyType> my_var = ...;
 //   if (TakeOwnership(my_var.get()) == SUCCESS)
 //     ignore_result(my_var.release());
 //
 template<typename T>
 inline void ignore_result(const T&) {
 }

 // The following enum should be used only as a constructor argument to indicate
 // that the variable has static storage class, and that the constructor should
 // do nothing to its state.  It indicates to the reader that it is legal to
 // declare a static instance of the class, provided the constructor is given
 // the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
 // static variable that has a constructor or a destructor because invocation
 // order is undefined.  However, IF the type can be initialized by filling with
 // zeroes (which the loader does for static variables), AND the destructor also
 // does nothing to the storage, AND there are no virtual methods, then a
 // constructor declared as
 //       explicit MyClass(base::LinkerInitialized x) {}
 // and invoked as
 //       static MyClass my_variable_name(base::LINKER_INITIALIZED);
 namespace base {
 enum LinkerInitialized { LINKER_INITIALIZED };
 }  // base

 #endif  // BASE_BASICTYPES_H_
--- a/tools/cpp/src/base/basictypes.h.orig
+++ b/tools/cpp/src/base/basictypes.h.orig
@ -0,0 +1,368 @@
 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef BASE_BASICTYPES_H_
 #define BASE_BASICTYPES_H_
 #pragma once

 #include <limits.h>         // So we can set the bounds of our types
 #include <stddef.h>         // For size_t
 #include <string.h>         // for memcpy

 #ifndef COMPILER_MSVC
 // stdint.h is part of C99 but MSVC doesn't have it.
 #include <stdint.h>         // For intptr_t.
 #endif

 #ifdef INT64_MAX

 // INT64_MAX is defined if C99 stdint.h is included; use the
 // native types if available.
 typedef int8_t int8;
 typedef int16_t int16;
 typedef int32_t int32;
 typedef int64_t int64;
 typedef uint8_t uint8;
 typedef uint16_t uint16;
 typedef uint32_t uint32;
 typedef uint64_t uint64;

 const uint8  kuint8max  = UINT8_MAX;
 const uint16 kuint16max = UINT16_MAX;
 const uint32 kuint32max = UINT32_MAX;
 const uint64 kuint64max = UINT64_MAX;
 const  int8  kint8min   = INT8_MIN;
 const  int8  kint8max   = INT8_MAX;
 const  int16 kint16min  = INT16_MIN;
 const  int16 kint16max  = INT16_MAX;
 const  int32 kint32min  = INT32_MIN;
 const  int32 kint32max  = INT32_MAX;
 const  int64 kint64min  = INT64_MIN;
 const  int64 kint64max  = INT64_MAX;

 #else // !INT64_MAX

 typedef signed char         int8;
 typedef short               int16;
 // TODO: Remove these type guards.  These are to avoid conflicts with
 // obsolete/protypes.h in the Gecko SDK.
 #ifndef _INT32
 #define _INT32
 typedef int                 int32;
 #endif

 // The NSPR system headers define 64-bit as |long| when possible.  In order to
 // not have typedef mismatches, we do the same on LP64.
 #if __LP64__
 typedef long                int64;
 #else
 typedef long long           int64;
 #endif

 // NOTE: unsigned types are DANGEROUS in loops and other arithmetical
 // places.  Use the signed types unless your variable represents a bit
 // pattern (eg a hash value) or you really need the extra bit.  Do NOT
 // use 'unsigned' to express "this value should always be positive";
 // use assertions for this.

 typedef unsigned char      uint8;
 typedef unsigned short     uint16;
 // TODO: Remove these type guards.  These are to avoid conflicts with
 // obsolete/protypes.h in the Gecko SDK.
 #ifndef _UINT32
 #define _UINT32
 typedef unsigned int       uint32;
 #endif

 // See the comment above about NSPR and 64-bit.
 #if __LP64__
 typedef unsigned long uint64;
 #else
 typedef unsigned long long uint64;
 #endif

 #endif // !INT64_MAX

 typedef signed char         schar;

 // A type to represent a Unicode code-point value. As of Unicode 4.0,
 // such values require up to 21 bits.
 // (For type-checking on pointers, make this explicitly signed,
 // and it should always be the signed version of whatever int32 is.)
 typedef signed int         char32;

 // A macro to disallow the copy constructor and operator= functions
 // This should be used in the private: declarations for a class
 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName&);               \
  void operator=(const TypeName&)

 // An older, deprecated, politically incorrect name for the above.
 // NOTE: The usage of this macro was baned from our code base, but some
 // third_party libraries are yet using it.
 // TODO(tfarina): Figure out how to fix the usage of this macro in the
 // third_party libraries and get rid of it.
 #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)

 // A macro to disallow all the implicit constructors, namely the
 // default constructor, copy constructor and operator= functions.
 //
 // This should be used in the private: declarations for a class
 // that wants to prevent anyone from instantiating it. This is
 // especially useful for classes containing only static methods.
 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
  TypeName();                                    \
  DISALLOW_COPY_AND_ASSIGN(TypeName)

 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
 // used in defining new arrays, for example.  If you use arraysize on
 // a pointer by mistake, you will get a compile-time error.
 //
 // One caveat is that arraysize() doesn't accept any array of an
 // anonymous type or a type defined inside a function.  In these rare
 // cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below.  This is
 // due to a limitation in C++'s template system.  The limitation might
 // eventually be removed, but it hasn't happened yet.

 // This template function declaration is used in defining arraysize.
 // Note that the function doesn't need an implementation, as we only
 // use its type.
 template <typename T, size_t N>
 char (&ArraySizeHelper(T (&array)[N]))[N];

 // That gcc wants both of these prototypes seems mysterious. VC, for
 // its part, can't decide which to use (another mystery). Matching of
 // template overloads: the final frontier.
 #ifndef _MSC_VER
 template <typename T, size_t N>
 char (&ArraySizeHelper(const T (&array)[N]))[N];
 #endif

 #define arraysize(array) (sizeof(ArraySizeHelper(array)))

 // ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
 // but can be used on anonymous types or types defined inside
 // functions.  It's less safe than arraysize as it accepts some
 // (although not all) pointers.  Therefore, you should use arraysize
 // whenever possible.
 //
 // The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
 // size_t.
 //
 // ARRAYSIZE_UNSAFE catches a few type errors.  If you see a compiler error
 //
 //   "warning: division by zero in ..."
 //
 // when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
 // You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
 //
 // The following comments are on the implementation details, and can
 // be ignored by the users.
 //
 // ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
 // the array) and sizeof(*(arr)) (the # of bytes in one array
 // element).  If the former is divisible by the latter, perhaps arr is
 // indeed an array, in which case the division result is the # of
 // elements in the array.  Otherwise, arr cannot possibly be an array,
 // and we generate a compiler error to prevent the code from
 // compiling.
 //
 // Since the size of bool is implementation-defined, we need to cast
 // !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
 // result has type size_t.
 //
 // This macro is not perfect as it wrongfully accepts certain
 // pointers, namely where the pointer size is divisible by the pointee
 // size.  Since all our code has to go through a 32-bit compiler,
 // where a pointer is 4 bytes, this means all pointers to a type whose
 // size is 3 or greater than 4 will be (righteously) rejected.

 #define ARRAYSIZE_UNSAFE(a) \
  ((sizeof(a) / sizeof(*(a))) / \
   static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))


 // Use implicit_cast as a safe version of static_cast or const_cast
 // for upcasting in the type hierarchy (i.e. casting a pointer to Foo
 // to a pointer to SuperclassOfFoo or casting a pointer to Foo to
 // a const pointer to Foo).
 // When you use implicit_cast, the compiler checks that the cast is safe.
 // Such explicit implicit_casts are necessary in surprisingly many
 // situations where C++ demands an exact type match instead of an
 // argument type convertable to a target type.
 //
 // The From type can be inferred, so the preferred syntax for using
 // implicit_cast is the same as for static_cast etc.:
 //
 //   implicit_cast<ToType>(expr)
 //
 // implicit_cast would have been part of the C++ standard library,
 // but the proposal was submitted too late.  It will probably make
 // its way into the language in the future.
 template<typename To, typename From>
 inline To implicit_cast(From const &f) {
  return f;
 }

 // The COMPILE_ASSERT macro can be used to verify that a compile time
 // expression is true. For example, you could use it to verify the
 // size of a static array:
 //
 //   COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
 //                  content_type_names_incorrect_size);
 //
 // or to make sure a struct is smaller than a certain size:
 //
 //   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
 //
 // The second argument to the macro is the name of the variable. If
 // the expression is false, most compilers will issue a warning/error
 // containing the name of the variable.

 template <bool>
 struct CompileAssert {
 };

 #undef COMPILE_ASSERT
 #define COMPILE_ASSERT(expr, msg) \
  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]

 // Implementation details of COMPILE_ASSERT:
 //
 // - COMPILE_ASSERT works by defining an array type that has -1
 //   elements (and thus is invalid) when the expression is false.
 //
 // - The simpler definition
 //
 //     #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
 //
 //   does not work, as gcc supports variable-length arrays whose sizes
 //   are determined at run-time (this is gcc's extension and not part
 //   of the C++ standard).  As a result, gcc fails to reject the
 //   following code with the simple definition:
 //
 //     int foo;
 //     COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
 //                               // not a compile-time constant.
 //
 // - By using the type CompileAssert<(bool(expr))>, we ensures that
 //   expr is a compile-time constant.  (Template arguments must be
 //   determined at compile-time.)
 //
 // - The outter parentheses in CompileAssert<(bool(expr))> are necessary
 //   to work around a bug in gcc 3.4.4 and 4.0.1.  If we had written
 //
 //     CompileAssert<bool(expr)>
 //
 //   instead, these compilers will refuse to compile
 //
 //     COMPILE_ASSERT(5 > 0, some_message);
 //
 //   (They seem to think the ">" in "5 > 0" marks the end of the
 //   template argument list.)
 //
 // - The array size is (bool(expr) ? 1 : -1), instead of simply
 //
 //     ((expr) ? 1 : -1).
 //
 //   This is to avoid running into a bug in MS VC 7.1, which
 //   causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.


 // bit_cast<Dest,Source> is a template function that implements the
 // equivalent of "*reinterpret_cast<Dest*>(&source)".  We need this in
 // very low-level functions like the protobuf library and fast math
 // support.
 //
 //   float f = 3.14159265358979;
 //   int i = bit_cast<int32>(f);
 //   // i = 0x40490fdb
 //
 // The classical address-casting method is:
 //
 //   // WRONG
 //   float f = 3.14159265358979;            // WRONG
 //   int i = * reinterpret_cast<int*>(&f);  // WRONG
 //
 // The address-casting method actually produces undefined behavior
 // according to ISO C++ specification section 3.10 -15 -.  Roughly, this
 // section says: if an object in memory has one type, and a program
 // accesses it with a different type, then the result is undefined
 // behavior for most values of "different type".
 //
 // This is true for any cast syntax, either *(int*)&f or
 // *reinterpret_cast<int*>(&f).  And it is particularly true for
 // conversions betweeen integral lvalues and floating-point lvalues.
 //
 // The purpose of 3.10 -15- is to allow optimizing compilers to assume
 // that expressions with different types refer to different memory.  gcc
 // 4.0.1 has an optimizer that takes advantage of this.  So a
 // non-conforming program quietly produces wildly incorrect output.
 //
 // The problem is not the use of reinterpret_cast.  The problem is type
 // punning: holding an object in memory of one type and reading its bits
 // back using a different type.
 //
 // The C++ standard is more subtle and complex than this, but that
 // is the basic idea.
 //
 // Anyways ...
 //
 // bit_cast<> calls memcpy() which is blessed by the standard,
 // especially by the example in section 3.9 .  Also, of course,
 // bit_cast<> wraps up the nasty logic in one place.
 //
 // Fortunately memcpy() is very fast.  In optimized mode, with a
 // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
 // code with the minimal amount of data movement.  On a 32-bit system,
 // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
 // compiles to two loads and two stores.
 //
 // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
 //
 // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
 // is likely to surprise you.

 template <class Dest, class Source>
 inline Dest bit_cast(const Source& source) {
  // Compile time assertion: sizeof(Dest) == sizeof(Source)
  // A compile error here means your Dest and Source have different sizes.
  typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];

  Dest dest;
  memcpy(&dest, &source, sizeof(dest));
  return dest;
 }

 // Used to explicitly mark the return value of a function as unused. If you are
 // really sure you don't want to do anything with the return value of a function
 // that has been marked WARN_UNUSED_RESULT, wrap it with this. Example:
 //
 //   scoped_ptr<MyType> my_var = ...;
 //   if (TakeOwnership(my_var.get()) == SUCCESS)
 //     ignore_result(my_var.release());
 //
 template<typename T>
 inline void ignore_result(const T& ignored) {
 }

 // The following enum should be used only as a constructor argument to indicate
 // that the variable has static storage class, and that the constructor should
 // do nothing to its state.  It indicates to the reader that it is legal to
 // declare a static instance of the class, provided the constructor is given
 // the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
 // static variable that has a constructor or a destructor because invocation
 // order is undefined.  However, IF the type can be initialized by filling with
 // zeroes (which the loader does for static variables), AND the destructor also
 // does nothing to the storage, AND there are no virtual methods, then a
 // constructor declared as
 //       explicit MyClass(base::LinkerInitialized x) {}
 // and invoked as
 //       static MyClass my_variable_name(base::LINKER_INITIALIZED);
 namespace base {
 enum LinkerInitialized { LINKER_INITIALIZED };
 }  // base

 #endif  // BASE_BASICTYPES_H_
--- a/tools/cpp/src/cpp-build/generate_geocoding_data.cc
+++ b/tools/cpp/src/cpp-build/generate_geocoding_data.cc
@ -0,0 +1,656 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Patrick Mezard

 #include "cpp-build/generate_geocoding_data.h"

 #include <dirent.h>
 #include <locale>
 #include <sys/stat.h>
 #include <algorithm>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <iomanip>
 #include <iterator>
 #include <map>
 #include <set>
 #include <sstream>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/basictypes.h"

 namespace i18n {
 namespace phonenumbers {

 using std::map;
 using std::string;
 using std::vector;
 using std::set;
 using std::pair;

 template <typename ResourceType> class AutoCloser {
 public:
  typedef int (*ReleaseFunction) (ResourceType* resource);

  AutoCloser(ResourceType** resource, ReleaseFunction release_function)
      : resource_(resource),
        release_function_(release_function)
  {}

  ~AutoCloser() {
    Close();
  }

  ResourceType* get_resource() const {
    return *resource_;
  }

  void Close() {
    if (*resource_) {
      release_function_(*resource_);
      *resource_ = NULL;
    }
  }

 private:
  ResourceType** resource_;
  ReleaseFunction release_function_;
 };

 enum DirEntryKinds {
  kFile = 0,
  kDirectory = 1,
 };

 class DirEntry {
 public:
  DirEntry(const char* n, DirEntryKinds k)
      : name_(n),
        kind_(k)
  {}

  const std::string& name() const { return name_; }
  DirEntryKinds kind() const { return kind_; }

 private:
  std::string name_;
  DirEntryKinds kind_;
 };

 // Lists directory entries in path. "." and ".." are excluded. Returns true on
 // success.
 bool ListDirectory(const string& path, vector<DirEntry>* entries) {
  entries->clear();
  DIR* dir = opendir(path.c_str());
  if (!dir) {
    return false;
  }
  AutoCloser<DIR> dir_closer(&dir, closedir);
  struct dirent entry, *dir_result;
  struct stat entry_stat;
  while (true) {
    const int res = readdir_r(dir, &entry, &dir_result);
    if (res) {
      return false;
    }
    if (dir_result == NULL) {
      return true;
    }
    if (strcmp(entry.d_name, ".") == 0 || strcmp(entry.d_name, "..") == 0) {
       continue;
    }
    const string entry_path = path + "/" + entry.d_name;
    if (stat(entry_path.c_str(), &entry_stat)) {
      return false;
    }
    DirEntryKinds kind = kFile;
    if (S_ISDIR(entry_stat.st_mode)) {
      kind = kDirectory;
    } else if (!S_ISREG(entry_stat.st_mode)) {
      continue;
    }
    entries->push_back(DirEntry(entry.d_name, kind));
  }
 }

 // Returns true if s ends with suffix.
 bool EndsWith(const string& s, const string& suffix) {
  if (suffix.length() > s.length()) {
    return false;
  }
  return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
 }

 // Converts string to integer, returns true on success.
 bool StrToInt(const string& s, int32* n) {
  std::stringstream stream;
  stream << s;
  stream >> *n;
  return stream;
 }

 // Converts integer to string, returns true on success.
 bool IntToStr(int32 n, string* s) {
  std::stringstream stream;
  stream << n;
  stream >> *s;
  return stream;
 }

 // Parses the prefix descriptions file at path, clears and fills the output
 // prefixes phone number prefix to description mapping.
 // Returns true on success.
 bool ParsePrefixes(const string& path, map<int32, string>* prefixes) {
  prefixes->clear();
  FILE* input = fopen(path.c_str(), "r");
  if (!input) {
    return false;
  }
  AutoCloser<FILE> input_closer(&input, fclose);
  const int kMaxLineLength = 2*1024;
  vector<char> buffer(kMaxLineLength);
  vector<char>::iterator begin, end, sep;
  string prefix, description;
  int32 prefix_code;
  while (fgets(&buffer[0], buffer.size(), input)) {
    begin = buffer.begin();
    end = std::find(begin, buffer.end(), '\0');
    if (end == begin) {
      continue;
    }
    --end;
    if (*end != '\n') {
      if (!feof(input)) {
        // A line without LF can only happen at the end of file.
        return false;
      }
    } else {
      // Consume the LF.
      --end;
    }

    // Trim and check for comments.
    for (; begin != end && std::isspace(*begin); ++begin) {}
    for (; end != begin && std::isspace(*(end - 1)); --end) {}
    if (begin == end || *begin == '#') {
      continue;
    }

    sep = std::find(begin, end, '|');
    if (sep == end) {
      continue;
    }
    prefix = string(begin, sep);
    if (!StrToInt(prefix, &prefix_code)) {
      return false;
    }
    (*prefixes)[prefix_code] = string(sep + 1, end);
  }
  return ferror(input) == 0;
 }

 // Builds a C string literal from s. The output is enclosed in double-quotes and
 // care is taken to escape input quotes and non-ASCII or control characters.
 //
 // An input string:
 //   Op\xc3\xa9ra
 // becomes:
 //   "Op""\xc3""\xa9""ra"
 string MakeStringLiteral(const string& s) {
  std::stringstream buffer;
  int prev_is_hex = 0;
  buffer << std::hex << std::setfill('0');
  buffer << "\"";
  for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
    const char c = *it;
    if (c >= 32 && c < 127) {
      if (prev_is_hex == 2) {
        buffer << "\"\"";
      }
      if (c == '\'') {
        buffer << "\\";
      }
      buffer << c;
      prev_is_hex = 1;
    } else {
      if (prev_is_hex != 0) {
        buffer << "\"\"";
      }
      buffer << "\\x" << std::setw(2) << (c < 0 ? c + 256 : c);
      prev_is_hex = 2;
    }
  }
  buffer << "\"";
  return buffer.str();
 }

 void WriteStringLiteral(const string& s, FILE* output) {
  string literal = MakeStringLiteral(s);
  fprintf(output, "%s", literal.c_str());
 }

 const char kLicense[] =
  "// Copyright (C) 2012 The Libphonenumber Authors\n"
  "//\n"
  "// Licensed under the Apache License, Version 2.0 (the \"License\");\n"
  "// you may not use this file except in compliance with the License.\n"
  "// You may obtain a copy of the License at\n"
  "//\n"
  "// http://www.apache.org/licenses/LICENSE-2.0\n"
  "//\n"
  "// Unless required by applicable law or agreed to in writing, software\n"
  "// distributed under the License is distributed on an \"AS IS\" BASIS,\n"
  "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or "
  "implied.\n"
  "// See the License for the specific language governing permissions and\n"
  "// limitations under the License.\n"
  "//\n"
  "// This file is generated automatically, do not edit it manually.\n"
  "\n";

 void WriteLicense(FILE* output) {
  fprintf(output, "%s", kLicense);
 }

 const char kI18NNS[] = "i18n";
 const char kPhoneNumbersNS[] = "phonenumbers";

 void WriteNSHeader(FILE* output) {
  fprintf(output, "namespace %s {\n", kI18NNS);
  fprintf(output, "namespace %s {\n", kPhoneNumbersNS);
 }

 void WriteNSFooter(FILE* output) {
  fprintf(output, "}  // namespace %s\n", kPhoneNumbersNS);
  fprintf(output, "}  // namespace %s\n", kI18NNS);
 }

 void WriteCppHeader(const string& base_name, FILE* output) {
  fprintf(output, "#include \"phonenumbers/geocoding/%s.h\"\n",
          base_name.c_str());
  fprintf(output, "\n");
  fprintf(output, "#include \"base/basictypes.h\"\n");
  fprintf(output, "\n");
 }

 void WriteArrayAndSize(const string& name, FILE* output) {
  fprintf(output, "  %s,\n", name.c_str());
  fprintf(output, "  sizeof(%s)/sizeof(*%s),\n", name.c_str(), name.c_str());
 }

 // Writes a PrefixDescriptions variable named "name", with its prefixes field
 // set to "prefixes_name" variable, its descriptions to "desc_name" and its
 // possible_lengths to "possible_lengths_name":
 //
 // const PrefixDescriptions ${name} = {
 //   ${prefix_name},
 //   sizeof(${prefix_name})/sizeof(*${prefix_name}),
 //   ${desc_name},
 //   ${possible_lengths_name},
 //   sizeof(${possible_lengths_name})/sizeof(*${possible_lengths_name}),
 // };
 //
 void WritePrefixDescriptionsDefinition(
    const string& name, const string& prefixes_name, const string& desc_name,
    const string& possible_lengths_name, FILE* output) {
  fprintf(output, "const PrefixDescriptions %s = {\n", name.c_str());
  WriteArrayAndSize(prefixes_name, output);
  fprintf(output, "  %s,\n", desc_name.c_str());
  WriteArrayAndSize(possible_lengths_name, output);
  fprintf(output, "};\n");
 }

 // Writes prefixes, descriptions and possible_lengths arrays built from the
 // phone number prefix to description mapping "prefixes". Binds these arrays
 // in a single PrefixDescriptions variable named "var_name".
 //
 // const int32 ${var_name}_prefixes[] = {
 //   1201,
 //   1650,
 // };
 //
 // const char* ${var_name}_descriptions[] = {
 //   "New Jerse",
 //   "Kalifornie",
 // };
 //
 // const int32 ${var_name}_possible_lengths[] = {
 //   4,
 // };
 //
 // const PrefixDescriptions ${var_name} = {
 //   ...
 // };
 //
 void WritePrefixDescriptions(const string& var_name, const map<int, string>&
                             prefixes, FILE* output) {
  set<int> possible_lengths;
  const string prefixes_name = var_name + "_prefixes";
  fprintf(output, "const int32 %s[] = {\n", prefixes_name.c_str());
  for (map<int, string>::const_iterator it = prefixes.begin();
       it != prefixes.end(); ++it) {
    fprintf(output, "  %d,\n", it->first);
    possible_lengths.insert(static_cast<int>(log10(it->first) + 1));
  }
  fprintf(output,
          "};\n"
          "\n");

  const string desc_name = var_name + "_descriptions";
  fprintf(output, "const char* %s[] = {\n", desc_name.c_str());
  for (map<int, string>::const_iterator it = prefixes.begin();
       it != prefixes.end(); ++it) {
    fprintf(output, "  ");
    WriteStringLiteral(it->second, output);
    fprintf(output, ",\n");
  }
  fprintf(output,
          "};\n"
          "\n");

  const string possible_lengths_name = var_name + "_possible_lengths";
  fprintf(output, "const int32 %s[] = {\n ", possible_lengths_name.c_str());
  for (set<int>::const_iterator it = possible_lengths.begin();
       it != possible_lengths.end(); ++it) {
    fprintf(output, " %d,", *it);
  }
  fprintf(output,
          "\n"
          "};\n"
          "\n");

  WritePrefixDescriptionsDefinition(var_name, prefixes_name, desc_name,
                                    possible_lengths_name, output);
  fprintf(output, "\n");
 }

 // Writes a pair of arrays mapping prefix language code pairs to
 // PrefixDescriptions instances. "prefix_var_names" maps language code pairs
 // to prefix variable names.
 //
 // const char* prefix_language_code_pairs[] = {
 //   "1_de",
 //   "1_en",
 // };
 //
 // const PrefixDescriptions* prefix_descriptions[] = {
 //   &prefix_1_de,
 //   &prefix_1_en,
 // };
 //
 void WritePrefixesDescriptions(const map<string, string>& prefix_var_names,
                               FILE* output) {
  fprintf(output, "const char* prefix_language_code_pairs[] = {\n");
  for (map<string, string>::const_iterator it = prefix_var_names.begin();
       it != prefix_var_names.end(); ++it) {
    fprintf(output, "  \"%s\",\n", it->first.c_str());
  }
  fprintf(output,
          "};\n"
          "\n"
          "const PrefixDescriptions* prefixes_descriptions[] = {\n");
  for (map<string, string>::const_iterator it = prefix_var_names.begin();
       it != prefix_var_names.end(); ++it) {
    fprintf(output, "  &%s,\n", it->second.c_str());
  }
  fprintf(output,
          "};\n"
          "\n");
 }

 // For each entry in "languages" mapping a country calling code to a set
 // of available languages, writes a sorted array of languages, then wraps it
 // into a CountryLanguages instance. Finally, writes a pair of arrays mapping
 // country calling codes to CountryLanguages instances.
 //
 // const char* country_1[] = {
 //   "de",
 //   "en",
 // };
 //
 // const CountryLanguages country_1_languages = {
 //   country_1,
 //   sizeof(country_1)/sizeof(*country_1),
 // };
 //
 // [...]
 //
 // const CountryLanguages* country_languages[] = {
 //   &country_1_languages,
 //   [...]
 // }
 //
 // const int country_calling_codes[] = {
 //   1,
 //   [...]
 // };
 //
 bool WriteCountryLanguages(const map<int32, set<string> >& languages,
                           FILE* output) {
  vector<string> country_languages_vars;
  vector<string> countries;
  for (map<int32, set<string> >::const_iterator it = languages.begin();
       it != languages.end(); ++it) {
    string country_code;
    if (!IntToStr(it->first, &country_code)) {
      return false;
    }
    const string country_var = "country_" + country_code;
    fprintf(output, "const char* %s[] = {\n", country_var.c_str());
    for (set<string>::const_iterator it_lang = it->second.begin();
         it_lang != it->second.end(); ++it_lang) {
      fprintf(output, "  \"%s\",\n", it_lang->c_str());
    }
    fprintf(output,
            "};\n"
            "\n");

    const string country_languages_var = country_var + "_languages";
    fprintf(output, "const CountryLanguages %s = {\n",
            country_languages_var.c_str());
    WriteArrayAndSize(country_var, output);
    fprintf(output,
            "};\n"
            "\n");
    country_languages_vars.push_back(country_languages_var);
    countries.push_back(country_code);
  }

  fprintf(output,
          "\n"
          "const CountryLanguages* countries_languages[] = {\n");
  for (vector<string>::const_iterator
       it_languages_var = country_languages_vars.begin();
       it_languages_var != country_languages_vars.end(); ++it_languages_var) {
    fprintf(output, "  &%s,\n", it_languages_var->c_str());
  }
  fprintf(output,
          "};\n"
          "\n"
          "const int country_calling_codes[] = {\n");
  for (vector<string>::const_iterator it_country = countries.begin();
       it_country != countries.end(); ++it_country) {
    fprintf(output, "  %s,\n", it_country->c_str());
  }
  fprintf(output,
          "};\n"
          "\n");
  return true;
 }

 // Returns a copy of input where all occurences of pattern are replaced with
 // value.
 string ReplaceAll(const string& input, const string& pattern,
                  const string& value) {
  string replaced;
  std::back_insert_iterator<string> output = std::back_inserter(replaced);
  string::const_iterator begin = input.begin(), end = begin;
  while (true) {
    const size_t pos = input.find(pattern, begin - input.begin());
    if (pos == string::npos) {
      std::copy(begin, input.end(), output);
      break;
    }
    end = input.begin() + pos;
    std::copy(begin, end, output);
    std::copy(value.begin(), value.end(), output);
    begin = end + pattern.length();
  }
  return replaced;
 }

 // Writes data accessor definitions, prefixed with "accessor_prefix".
 void WriteAccessorsDefinitions(const string& accessor_prefix, FILE* output) {
  string templ =
      "const int* get$prefix$_country_calling_codes() {\n"
      "  return country_calling_codes;\n"
      "}\n"
      "\n"
      "int get$prefix$_country_calling_codes_size() {\n"
      "  return sizeof(country_calling_codes)\n"
      "      /sizeof(*country_calling_codes);\n"
      "}\n"
      "\n"
      "const CountryLanguages* get$prefix$_country_languages(int index) {\n"
      "  return countries_languages[index];\n"
      "}\n"
      "\n"
      "const char** get$prefix$_prefix_language_code_pairs() {\n"
      "  return prefix_language_code_pairs;\n"
      "}\n"
      "\n"
      "int get$prefix$_prefix_language_code_pairs_size() {\n"
      "  return sizeof(prefix_language_code_pairs)\n"
      "      /sizeof(*prefix_language_code_pairs);\n"
      "}\n"
      "\n"
      "const PrefixDescriptions* get$prefix$_prefix_descriptions(int index) {\n"
      "  return prefixes_descriptions[index];\n"
      "}\n";
  string defs = ReplaceAll(templ, "$prefix$", accessor_prefix);
  fprintf(output, "%s", defs.c_str());
 }

 // Writes geocoding data .cc file. "data_path" is the path of geocoding textual
 // data directory. "base_name" is the base name of the .h/.cc pair, like
 // "geocoding_data".
 bool WriteSource(const string& data_path, const string& base_name,
                 const string& accessor_prefix, FILE* output) {
  WriteLicense(output);
  WriteCppHeader(base_name, output);
  WriteNSHeader(output);
  fprintf(output,
          "namespace {\n"
          "\n");

  // Enumerate language/script directories.
  map<string, string> prefix_vars;
  map<int32, set<string> > country_languages;
  vector<DirEntry> entries;
  if (!ListDirectory(data_path, &entries)) {
    fprintf(stderr, "failed to read directory entries");
    return false;
  }
  for (vector<DirEntry>::const_iterator it = entries.begin();
       it != entries.end(); ++it) {
    if (it->kind() != kDirectory) {
      continue;
    }
    // Enumerate country calling code files.
    const string dir_path = data_path + "/" + it->name();
    vector<DirEntry> files;
    if (!ListDirectory(dir_path, &files)) {
      fprintf(stderr, "failed to read file entries\n");
      return false;
    }
    for (vector<DirEntry>::const_iterator it_files = files.begin();
         it_files != files.end(); ++it_files) {
      const string fname = it_files->name();
      if (!EndsWith(fname, ".txt")) {
       continue;
      }
      int32 country_code;
      const string country_code_str = fname.substr(0, fname.length() - 4);
      if (!StrToInt(country_code_str, &country_code)) {
        return false;
      }
      const string path = dir_path + "/" + fname;

      map<int32, string> prefixes;
      if (!ParsePrefixes(path, &prefixes)) {
        return false;
      }

      const string prefix_var = "prefix_" + country_code_str + "_" + it->name();
      WritePrefixDescriptions(prefix_var, prefixes, output);
      prefix_vars[country_code_str + "_" + it->name()] = prefix_var;
      country_languages[country_code].insert(it->name());
    }
  }
  WritePrefixesDescriptions(prefix_vars, output);
  if (!WriteCountryLanguages(country_languages, output)) {
    return false;
  }
  fprintf(output, "}  // namespace\n");
  fprintf(output, "\n");
  WriteAccessorsDefinitions(accessor_prefix, output);
  WriteNSFooter(output);
  return ferror(output) == 0;
 }

 int PrintHelp(const string& message) {
  fprintf(stderr, "error: %s\n", message.c_str());
  fprintf(stderr, "generate_geocoding_data DATADIR CCPATH");
  return 1;
 }

 int Main(int argc, const char* argv[]) {
  if (argc < 2) {
    return PrintHelp("geocoding data root directory expected");
  }
  if (argc < 3) {
    return PrintHelp("output source path expected");
  }
  string accessor_prefix = "";
  if (argc > 3) {
    accessor_prefix = argv[3];
  }
  const string root_path(argv[1]);
  string source_path(argv[2]);
  std::replace(source_path.begin(), source_path.end(), '\\', '/');
  string base_name = source_path;
  if (base_name.rfind('/') != string::npos) {
    base_name = base_name.substr(base_name.rfind('/') + 1);
  }
  base_name = base_name.substr(0, base_name.rfind('.'));

  FILE* source_fp = fopen(source_path.c_str(), "w");
  if (!source_fp) {
    fprintf(stderr, "failed to open %s\n", source_path.c_str());
    return 1;
  }
  AutoCloser<FILE> source_closer(&source_fp, fclose);
  if (!WriteSource(root_path, base_name, accessor_prefix,
                   source_fp)) {
    return 1;
  }
  return 0;
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/tools/cpp/src/cpp-build/generate_geocoding_data.h
+++ b/tools/cpp/src/cpp-build/generate_geocoding_data.h
@ -0,0 +1,34 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Patrick Mezard

 #ifndef I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H
 #define I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H

 #include <string>

 namespace i18n {
 namespace phonenumbers {

 using std::string;

 string MakeStringLiteral(const string& s);

 int Main(int argc, const char* argv[]);

 }  // namespace phonenumbers
 }  // namespace i18n

 #endif  // I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H
--- a/tools/cpp/src/cpp-build/generate_geocoding_data_main.cc
+++ b/tools/cpp/src/cpp-build/generate_geocoding_data_main.cc
@ -0,0 +1,21 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Patrick Mezard

 #include "cpp-build/generate_geocoding_data.h"

 int main(int argc, const char* argv[]) {
  return i18n::phonenumbers::Main(argc, argv);
 }
--- a/tools/cpp/test/cpp-build/generate_geocoding_data_test.cc
+++ b/tools/cpp/test/cpp-build/generate_geocoding_data_test.cc
@ -0,0 +1,31 @@
 // Copyright (C) 2012 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Author: Patrick Mezard

 #include "cpp-build/generate_geocoding_data.h"

 #include <gtest/gtest.h>

 namespace i18n {
 namespace phonenumbers {

 TEST(GenerateGeocodingDataTest, TestMakeStringLiteral) {
  EXPECT_EQ("\"\"", MakeStringLiteral(""));
  EXPECT_EQ("\"Op\"\"\\xc3\"\"\\xa9\"\"ra\"",
            MakeStringLiteral("Op\xc3\xa9ra"));
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/tools/cpp/test/cpp-build/run_tests.cc
+++ b/tools/cpp/test/cpp-build/run_tests.cc
@ -0,0 +1,21 @@
 // Copyright (C) 2011 The Libphonenumber Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include <gtest/gtest.h>

 int main(int argc, char** argv) {
  ::testing::InitGoogleTest(&argc, argv);

  return RUN_ALL_TESTS();
 }