diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2557fb62f..6f4a50c75 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -154,6 +154,26 @@ add_custom_target ( COMMENT "Generating Protocol Buffers code" ) +# Geocoding data cpp file generation +set (TOOLS_DIR "${CMAKE_CURRENT_BINARY_DIR}/tools") +add_subdirectory("${CMAKE_SOURCE_DIR}/../tools/cpp" "${TOOLS_DIR}") + +set (GEOCODING_DIR "${RESOURCES_DIR}/geocoding") +file (GLOB_RECURSE GEOCODING_SOURCES "${GEOCODING_DIR}/*.txt") + +set (GEOCODING_DATA_OUTPUT + "${CMAKE_SOURCE_DIR}/src/phonenumbers/geocoding/geocoding_data.cc" +) + +add_custom_command ( + COMMAND generate_geocoding_data "${GEOCODING_DIR}" "${GEOCODING_DATA_OUTPUT}" + + OUTPUT ${GEOCODING_DATA_OUTPUT} + DEPENDS ${GEOCODING_SOURCES} + generate_geocoding_data + COMMENT "Generating geocoding data code" +) + set ( SOURCES "src/base/string_piece.cc" @@ -161,6 +181,7 @@ set ( "src/phonenumbers/default_logger.cc" "src/phonenumbers/geocoding/area_code_map.cc" "src/phonenumbers/geocoding/default_map_storage.cc" + "src/phonenumbers/geocoding/geocoding_data.cc" "src/phonenumbers/logger.cc" "src/phonenumbers/metadata.h" # Generated by build tools. "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers. @@ -310,9 +331,27 @@ add_library (phonenumber_testing STATIC ${TESTING_LIBRARY_SOURCES}) target_link_libraries (phonenumber_testing ${LIBRARY_DEPS}) add_dependencies (phonenumber_testing generate-sources ${TEST_METADATA_TARGET}) +# Test geocoding data cpp files generation +set (GEOCODING_TEST_DIR "${RESOURCES_DIR}/test/geocoding") +file (GLOB_RECURSE GEOCODING_TEST_SOURCES "${GEOCODING_TEST_DIR}/*.txt") + +set (GEOCODING_TEST_DATA_OUTPUT + "${CMAKE_SOURCE_DIR}/test/phonenumbers/geocoding/geocoding_test_data.cc" +) + +add_custom_command ( + COMMAND generate_geocoding_data "${GEOCODING_TEST_DIR}" + "${GEOCODING_TEST_DATA_OUTPUT}" + + OUTPUT ${GEOCODING_TEST_DATA_OUTPUT} + DEPENDS ${GEOCODING_TEST_SOURCES} generate_geocoding_data + COMMENT "Generating geocoding test data code" +) + set (TEST_SOURCES "test/phonenumbers/asyoutypeformatter_test.cc" "test/phonenumbers/geocoding/area_code_map_test.cc" + "test/phonenumbers/geocoding/geocoding_test_data.cc" "test/phonenumbers/logger_test.cc" "test/phonenumbers/phonenumberutil_test.cc" "test/phonenumbers/regexp_adapter_test.cc" @@ -339,7 +378,12 @@ if (NOT WIN32) endif () target_link_libraries (libphonenumber_test ${TEST_LIBS}) -add_custom_target(test COMMAND libphonenumber_test DEPENDS libphonenumber_test) +add_custom_target (test + COMMAND generate_geocoding_data_test + COMMAND libphonenumber_test + + DEPENDS generate_geocoding_data_test libphonenumber_test +) # Install rules. install (FILES diff --git a/cpp/src/base/basictypes.h b/cpp/src/base/basictypes.h index 1e102826d..264fd62d2 100644 --- a/cpp/src/base/basictypes.h +++ b/cpp/src/base/basictypes.h @@ -271,18 +271,6 @@ struct CompileAssert { // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. -// MetatagId refers to metatag-id that we assign to -// each metatag pair.. -typedef uint32 MetatagId; - -// Argument type used in interfaces that can optionally take ownership -// of a passed in argument. If TAKE_OWNERSHIP is passed, the called -// object takes ownership of the argument. Otherwise it does not. -enum Ownership { - DO_NOT_TAKE_OWNERSHIP, - TAKE_OWNERSHIP -}; - // bit_cast is a template function that implements the // equivalent of "*reinterpret_cast(&source)". We need this in // very low-level functions like the protobuf library and fast math @@ -357,7 +345,7 @@ inline Dest bit_cast(const Source& source) { // ignore_result(my_var.release()); // template -inline void ignore_result(const T& ignored) { +inline void ignore_result(const T&) { } // The following enum should be used only as a constructor argument to indicate diff --git a/cpp/src/phonenumbers/geocoding/geocoding_data.h b/cpp/src/phonenumbers/geocoding/geocoding_data.h new file mode 100644 index 000000000..90f7956a2 --- /dev/null +++ b/cpp/src/phonenumbers/geocoding/geocoding_data.h @@ -0,0 +1,77 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is generated automatically, do not edit it manually. + +#ifndef I18N_PHONENUMBERS_GEOCODING_DATA +#define I18N_PHONENUMBERS_GEOCODING_DATA + +#include "base/basictypes.h" + +namespace i18n { +namespace phonenumbers { + +struct CountryLanguages { + // Sorted array of language codes. + const char** available_languages; + + // Number of elements in available_languages. + const int available_languages_size; +}; + +struct PrefixDescriptions { + // Sorted array of phone number prefixes. + const int32* prefixes; + + // Number of elements in prefixes. + const int prefixes_size; + + // Array of phone number prefix descriptions, mapped one to one + // to prefixes. + const char** descriptions; + + // Sorted array of unique prefix lengths in base 10. + const int32* possible_lengths; + + // Number of elements in possible_lengths. + const int possible_lengths_size; +}; + +// Returns a sorted array of country calling codes. +const int* get_country_calling_codes(); + +// Returns the number of country calling codes in +// get_country_calling_codes() array. +int get_country_calling_codes_size(); + +// Returns the CountryLanguages record for country at index, index +// being in [0, get_country_calling_codes_size()). +const CountryLanguages* get_country_languages(int index); + +// Returns a sorted array of prefix language code pairs like +// "1_de" or "82_ko". +const char** get_prefix_language_code_pairs(); + +// Returns the number of elements in +// get_prefix_language_code_pairs() +int get_prefix_language_code_pairs_size(); + +// Returns the PrefixDescriptions for language/code pair at index, +// index being in [0, get_prefix_language_code_pairs_size()). +const PrefixDescriptions* get_prefix_descriptions(int index); + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_GEOCODING_DATA diff --git a/cpp/test/phonenumbers/geocoding/geocoding_test_data.h b/cpp/test/phonenumbers/geocoding/geocoding_test_data.h new file mode 100644 index 000000000..14760a70f --- /dev/null +++ b/cpp/test/phonenumbers/geocoding/geocoding_test_data.h @@ -0,0 +1,50 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_PHONENUMBERS_GEOCODING_TEST_DATA +#define I18N_PHONENUMBERS_GEOCODING_TEST_DATA + +#include "base/basictypes.h" +#include "phonenumbers/geocoding/geocoding_data.h" + +namespace i18n { +namespace phonenumbers { + +// Returns a sorted array of country calling codes. +const int* get_test_country_calling_codes(); + +// Returns the number of country calling codes in +// get_test_country_calling_codes() array. +int get_test_country_calling_codes_size(); + +// Returns the CountryLanguages record for country at index, index +// being in [0, get_test_country_calling_codes_size()). +const CountryLanguages* get_test_country_languages(int index); + +// Returns a sorted array of prefix language code pairs like +// "1_de" or "82_ko". +const char** get_test_prefix_language_code_pairs(); + +// Returns the number of elements in +// get_prefix_language_code_pairs() +int get_test_prefix_language_code_pairs_size(); + +// Returns the PrefixDescriptions for language/code pair at index, +// index being in [0, get_prefix_language_code_pairs_size()). +const PrefixDescriptions* get_test_prefix_descriptions(int index); + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_GEOCODING_TEST_DATA diff --git a/tools/cpp/CMakeLists.txt b/tools/cpp/CMakeLists.txt new file mode 100644 index 000000000..85614b470 --- /dev/null +++ b/tools/cpp/CMakeLists.txt @@ -0,0 +1,76 @@ +# Copyright (C) 2012 The Libphonenumber Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Author: Patrick Mezard + +cmake_minimum_required (VERSION 2.8) + +project (generate_geocoding_data) + +# Helper functions dealing with finding libraries and programs this library +# depends on. + +function (print_error DESCRIPTION FILE) + message (FATAL_ERROR + "Can't find ${DESCRIPTION}: can't locate ${FILE}. Please read the README.") +endfunction () + +# Find a library. If it has not been found, stop CMake with a fatal error +# message. +function (find_required_library NAME HEADER LIBRARY DESCRIPTION) + # Check the header. + find_path (${NAME}_INCLUDE_DIR ${HEADER}) + set (INCLUDE_DIR ${${NAME}_INCLUDE_DIR}) + + if (${INCLUDE_DIR} STREQUAL "${INCLUDE_DIR}-NOTFOUND") + print_error (${DESCRIPTION} ${HEADER}) + endif () + include_directories (${INCLUDE_DIR}) + # Check the binary. + find_library (${NAME}_LIB ${LIBRARY}) + set (LIB ${NAME}_LIB) + + if (${LIB} STREQUAL "${LIB}-NOTFOUND") + print_error (${DESCRIPTION} ${LIBRARY}) + endif () +endfunction (find_required_library) + +find_required_library (GTEST gtest/gtest.h gtest "Google Test framework") + +set ( + SOURCES + "src/cpp-build/generate_geocoding_data.cc" + "src/cpp-build/generate_geocoding_data_main.cc" +) + +if (NOT WIN32) + add_definitions ("-Wall -Werror") +endif () + +include_directories ("src") + +add_executable (generate_geocoding_data ${SOURCES}) + +set (TEST_SOURCES + "src/cpp-build/generate_geocoding_data.cc" + "test/cpp-build/generate_geocoding_data_test.cc" + "test/cpp-build/run_tests.cc" +) + +set (TEST_LIBS ${GTEST_LIB}) + +# Build the testing binary. +include_directories ("test") +add_executable (generate_geocoding_data_test ${TEST_SOURCES}) +target_link_libraries (generate_geocoding_data_test ${TEST_LIBS}) diff --git a/tools/cpp/src/base/basictypes.h b/tools/cpp/src/base/basictypes.h new file mode 100644 index 000000000..264fd62d2 --- /dev/null +++ b/tools/cpp/src/base/basictypes.h @@ -0,0 +1,368 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_BASICTYPES_H_ +#define BASE_BASICTYPES_H_ +#pragma once + +#include // So we can set the bounds of our types +#include // For size_t +#include // for memcpy + +#ifndef COMPILER_MSVC +// stdint.h is part of C99 but MSVC doesn't have it. +#include // For intptr_t. +#endif + +#ifdef INT64_MAX + +// INT64_MAX is defined if C99 stdint.h is included; use the +// native types if available. +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +const uint8 kuint8max = UINT8_MAX; +const uint16 kuint16max = UINT16_MAX; +const uint32 kuint32max = UINT32_MAX; +const uint64 kuint64max = UINT64_MAX; +const int8 kint8min = INT8_MIN; +const int8 kint8max = INT8_MAX; +const int16 kint16min = INT16_MIN; +const int16 kint16max = INT16_MAX; +const int32 kint32min = INT32_MIN; +const int32 kint32max = INT32_MAX; +const int64 kint64min = INT64_MIN; +const int64 kint64max = INT64_MAX; + +#else // !INT64_MAX + +typedef signed char int8; +typedef short int16; +// TODO: Remove these type guards. These are to avoid conflicts with +// obsolete/protypes.h in the Gecko SDK. +#ifndef _INT32 +#define _INT32 +typedef int int32; +#endif + +// The NSPR system headers define 64-bit as |long| when possible. In order to +// not have typedef mismatches, we do the same on LP64. +#if __LP64__ +typedef long int64; +#else +typedef long long int64; +#endif + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef unsigned char uint8; +typedef unsigned short uint16; +// TODO: Remove these type guards. These are to avoid conflicts with +// obsolete/protypes.h in the Gecko SDK. +#ifndef _UINT32 +#define _UINT32 +typedef unsigned int uint32; +#endif + +// See the comment above about NSPR and 64-bit. +#if __LP64__ +typedef unsigned long uint64; +#else +typedef unsigned long long uint64; +#endif + +#endif // !INT64_MAX + +typedef signed char schar; + +// A type to represent a Unicode code-point value. As of Unicode 4.0, +// such values require up to 21 bits. +// (For type-checking on pointers, make this explicitly signed, +// and it should always be the signed version of whatever int32 is.) +typedef signed int char32; + +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An older, deprecated, politically incorrect name for the above. +// NOTE: The usage of this macro was baned from our code base, but some +// third_party libraries are yet using it. +// TODO(tfarina): Figure out how to fix the usage of this macro in the +// third_party libraries and get rid of it. +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName) + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) + +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// +// One caveat is that arraysize() doesn't accept any array of an +// anonymous type or a type defined inside a function. In these rare +// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is +// due to a limitation in C++'s template system. The limitation might +// eventually be removed, but it hasn't happened yet. + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef _MSC_VER +template +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize, +// but can be used on anonymous types or types defined inside +// functions. It's less safe than arraysize as it accepts some +// (although not all) pointers. Therefore, you should use arraysize +// whenever possible. +// +// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type +// size_t. +// +// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error +// +// "warning: division by zero in ..." +// +// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer. +// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays. +// +// The following comments are on the implementation details, and can +// be ignored by the users. +// +// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in +// the array) and sizeof(*(arr)) (the # of bytes in one array +// element). If the former is divisible by the latter, perhaps arr is +// indeed an array, in which case the division result is the # of +// elements in the array. Otherwise, arr cannot possibly be an array, +// and we generate a compiler error to prevent the code from +// compiling. +// +// Since the size of bool is implementation-defined, we need to cast +// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final +// result has type size_t. +// +// This macro is not perfect as it wrongfully accepts certain +// pointers, namely where the pointer size is divisible by the pointee +// size. Since all our code has to go through a 32-bit compiler, +// where a pointer is 4 bytes, this means all pointers to a type whose +// size is 3 or greater than 4 will be (righteously) rejected. + +#define ARRAYSIZE_UNSAFE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast(!(sizeof(a) % sizeof(*(a))))) + + +// Use implicit_cast as a safe version of static_cast or const_cast +// for upcasting in the type hierarchy (i.e. casting a pointer to Foo +// to a pointer to SuperclassOfFoo or casting a pointer to Foo to +// a const pointer to Foo). +// When you use implicit_cast, the compiler checks that the cast is safe. +// Such explicit implicit_casts are necessary in surprisingly many +// situations where C++ demands an exact type match instead of an +// argument type convertable to a target type. +// +// The From type can be inferred, so the preferred syntax for using +// implicit_cast is the same as for static_cast etc.: +// +// implicit_cast(expr) +// +// implicit_cast would have been part of the C++ standard library, +// but the proposal was submitted too late. It will probably make +// its way into the language in the future. +template +inline To implicit_cast(From const &f) { + return f; +} + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. + +template +struct CompileAssert { +}; + +#undef COMPILE_ASSERT +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + + +// bit_cast is a template function that implements the +// equivalent of "*reinterpret_cast(&source)". We need this in +// very low-level functions like the protobuf library and fast math +// support. +// +// float f = 3.14159265358979; +// int i = bit_cast(f); +// // i = 0x40490fdb +// +// The classical address-casting method is: +// +// // WRONG +// float f = 3.14159265358979; // WRONG +// int i = * reinterpret_cast(&f); // WRONG +// +// The address-casting method actually produces undefined behavior +// according to ISO C++ specification section 3.10 -15 -. Roughly, this +// section says: if an object in memory has one type, and a program +// accesses it with a different type, then the result is undefined +// behavior for most values of "different type". +// +// This is true for any cast syntax, either *(int*)&f or +// *reinterpret_cast(&f). And it is particularly true for +// conversions betweeen integral lvalues and floating-point lvalues. +// +// The purpose of 3.10 -15- is to allow optimizing compilers to assume +// that expressions with different types refer to different memory. gcc +// 4.0.1 has an optimizer that takes advantage of this. So a +// non-conforming program quietly produces wildly incorrect output. +// +// The problem is not the use of reinterpret_cast. The problem is type +// punning: holding an object in memory of one type and reading its bits +// back using a different type. +// +// The C++ standard is more subtle and complex than this, but that +// is the basic idea. +// +// Anyways ... +// +// bit_cast<> calls memcpy() which is blessed by the standard, +// especially by the example in section 3.9 . Also, of course, +// bit_cast<> wraps up the nasty logic in one place. +// +// Fortunately memcpy() is very fast. In optimized mode, with a +// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline +// code with the minimal amount of data movement. On a 32-bit system, +// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) +// compiles to two loads and two stores. +// +// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1. +// +// WARNING: if Dest or Source is a non-POD type, the result of the memcpy +// is likely to surprise you. + +template +inline Dest bit_cast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1]; + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Used to explicitly mark the return value of a function as unused. If you are +// really sure you don't want to do anything with the return value of a function +// that has been marked WARN_UNUSED_RESULT, wrap it with this. Example: +// +// scoped_ptr my_var = ...; +// if (TakeOwnership(my_var.get()) == SUCCESS) +// ignore_result(my_var.release()); +// +template +inline void ignore_result(const T&) { +} + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static instance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, AND there are no virtual methods, then a +// constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} // base + +#endif // BASE_BASICTYPES_H_ diff --git a/tools/cpp/src/base/basictypes.h.orig b/tools/cpp/src/base/basictypes.h.orig new file mode 100644 index 000000000..e50f3ff2d --- /dev/null +++ b/tools/cpp/src/base/basictypes.h.orig @@ -0,0 +1,368 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_BASICTYPES_H_ +#define BASE_BASICTYPES_H_ +#pragma once + +#include // So we can set the bounds of our types +#include // For size_t +#include // for memcpy + +#ifndef COMPILER_MSVC +// stdint.h is part of C99 but MSVC doesn't have it. +#include // For intptr_t. +#endif + +#ifdef INT64_MAX + +// INT64_MAX is defined if C99 stdint.h is included; use the +// native types if available. +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +const uint8 kuint8max = UINT8_MAX; +const uint16 kuint16max = UINT16_MAX; +const uint32 kuint32max = UINT32_MAX; +const uint64 kuint64max = UINT64_MAX; +const int8 kint8min = INT8_MIN; +const int8 kint8max = INT8_MAX; +const int16 kint16min = INT16_MIN; +const int16 kint16max = INT16_MAX; +const int32 kint32min = INT32_MIN; +const int32 kint32max = INT32_MAX; +const int64 kint64min = INT64_MIN; +const int64 kint64max = INT64_MAX; + +#else // !INT64_MAX + +typedef signed char int8; +typedef short int16; +// TODO: Remove these type guards. These are to avoid conflicts with +// obsolete/protypes.h in the Gecko SDK. +#ifndef _INT32 +#define _INT32 +typedef int int32; +#endif + +// The NSPR system headers define 64-bit as |long| when possible. In order to +// not have typedef mismatches, we do the same on LP64. +#if __LP64__ +typedef long int64; +#else +typedef long long int64; +#endif + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef unsigned char uint8; +typedef unsigned short uint16; +// TODO: Remove these type guards. These are to avoid conflicts with +// obsolete/protypes.h in the Gecko SDK. +#ifndef _UINT32 +#define _UINT32 +typedef unsigned int uint32; +#endif + +// See the comment above about NSPR and 64-bit. +#if __LP64__ +typedef unsigned long uint64; +#else +typedef unsigned long long uint64; +#endif + +#endif // !INT64_MAX + +typedef signed char schar; + +// A type to represent a Unicode code-point value. As of Unicode 4.0, +// such values require up to 21 bits. +// (For type-checking on pointers, make this explicitly signed, +// and it should always be the signed version of whatever int32 is.) +typedef signed int char32; + +// A macro to disallow the copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An older, deprecated, politically incorrect name for the above. +// NOTE: The usage of this macro was baned from our code base, but some +// third_party libraries are yet using it. +// TODO(tfarina): Figure out how to fix the usage of this macro in the +// third_party libraries and get rid of it. +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName) + +// A macro to disallow all the implicit constructors, namely the +// default constructor, copy constructor and operator= functions. +// +// This should be used in the private: declarations for a class +// that wants to prevent anyone from instantiating it. This is +// especially useful for classes containing only static methods. +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName(); \ + DISALLOW_COPY_AND_ASSIGN(TypeName) + +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// +// One caveat is that arraysize() doesn't accept any array of an +// anonymous type or a type defined inside a function. In these rare +// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is +// due to a limitation in C++'s template system. The limitation might +// eventually be removed, but it hasn't happened yet. + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef _MSC_VER +template +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize, +// but can be used on anonymous types or types defined inside +// functions. It's less safe than arraysize as it accepts some +// (although not all) pointers. Therefore, you should use arraysize +// whenever possible. +// +// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type +// size_t. +// +// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error +// +// "warning: division by zero in ..." +// +// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer. +// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays. +// +// The following comments are on the implementation details, and can +// be ignored by the users. +// +// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in +// the array) and sizeof(*(arr)) (the # of bytes in one array +// element). If the former is divisible by the latter, perhaps arr is +// indeed an array, in which case the division result is the # of +// elements in the array. Otherwise, arr cannot possibly be an array, +// and we generate a compiler error to prevent the code from +// compiling. +// +// Since the size of bool is implementation-defined, we need to cast +// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final +// result has type size_t. +// +// This macro is not perfect as it wrongfully accepts certain +// pointers, namely where the pointer size is divisible by the pointee +// size. Since all our code has to go through a 32-bit compiler, +// where a pointer is 4 bytes, this means all pointers to a type whose +// size is 3 or greater than 4 will be (righteously) rejected. + +#define ARRAYSIZE_UNSAFE(a) \ + ((sizeof(a) / sizeof(*(a))) / \ + static_cast(!(sizeof(a) % sizeof(*(a))))) + + +// Use implicit_cast as a safe version of static_cast or const_cast +// for upcasting in the type hierarchy (i.e. casting a pointer to Foo +// to a pointer to SuperclassOfFoo or casting a pointer to Foo to +// a const pointer to Foo). +// When you use implicit_cast, the compiler checks that the cast is safe. +// Such explicit implicit_casts are necessary in surprisingly many +// situations where C++ demands an exact type match instead of an +// argument type convertable to a target type. +// +// The From type can be inferred, so the preferred syntax for using +// implicit_cast is the same as for static_cast etc.: +// +// implicit_cast(expr) +// +// implicit_cast would have been part of the C++ standard library, +// but the proposal was submitted too late. It will probably make +// its way into the language in the future. +template +inline To implicit_cast(From const &f) { + return f; +} + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. + +template +struct CompileAssert { +}; + +#undef COMPILE_ASSERT +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + + +// bit_cast is a template function that implements the +// equivalent of "*reinterpret_cast(&source)". We need this in +// very low-level functions like the protobuf library and fast math +// support. +// +// float f = 3.14159265358979; +// int i = bit_cast(f); +// // i = 0x40490fdb +// +// The classical address-casting method is: +// +// // WRONG +// float f = 3.14159265358979; // WRONG +// int i = * reinterpret_cast(&f); // WRONG +// +// The address-casting method actually produces undefined behavior +// according to ISO C++ specification section 3.10 -15 -. Roughly, this +// section says: if an object in memory has one type, and a program +// accesses it with a different type, then the result is undefined +// behavior for most values of "different type". +// +// This is true for any cast syntax, either *(int*)&f or +// *reinterpret_cast(&f). And it is particularly true for +// conversions betweeen integral lvalues and floating-point lvalues. +// +// The purpose of 3.10 -15- is to allow optimizing compilers to assume +// that expressions with different types refer to different memory. gcc +// 4.0.1 has an optimizer that takes advantage of this. So a +// non-conforming program quietly produces wildly incorrect output. +// +// The problem is not the use of reinterpret_cast. The problem is type +// punning: holding an object in memory of one type and reading its bits +// back using a different type. +// +// The C++ standard is more subtle and complex than this, but that +// is the basic idea. +// +// Anyways ... +// +// bit_cast<> calls memcpy() which is blessed by the standard, +// especially by the example in section 3.9 . Also, of course, +// bit_cast<> wraps up the nasty logic in one place. +// +// Fortunately memcpy() is very fast. In optimized mode, with a +// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline +// code with the minimal amount of data movement. On a 32-bit system, +// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) +// compiles to two loads and two stores. +// +// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1. +// +// WARNING: if Dest or Source is a non-POD type, the result of the memcpy +// is likely to surprise you. + +template +inline Dest bit_cast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1]; + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Used to explicitly mark the return value of a function as unused. If you are +// really sure you don't want to do anything with the return value of a function +// that has been marked WARN_UNUSED_RESULT, wrap it with this. Example: +// +// scoped_ptr my_var = ...; +// if (TakeOwnership(my_var.get()) == SUCCESS) +// ignore_result(my_var.release()); +// +template +inline void ignore_result(const T& ignored) { +} + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static instance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, AND there are no virtual methods, then a +// constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} // base + +#endif // BASE_BASICTYPES_H_ diff --git a/tools/cpp/src/cpp-build/generate_geocoding_data.cc b/tools/cpp/src/cpp-build/generate_geocoding_data.cc new file mode 100644 index 000000000..aace898bc --- /dev/null +++ b/tools/cpp/src/cpp-build/generate_geocoding_data.cc @@ -0,0 +1,656 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "cpp-build/generate_geocoding_data.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/basictypes.h" + +namespace i18n { +namespace phonenumbers { + +using std::map; +using std::string; +using std::vector; +using std::set; +using std::pair; + +template class AutoCloser { + public: + typedef int (*ReleaseFunction) (ResourceType* resource); + + AutoCloser(ResourceType** resource, ReleaseFunction release_function) + : resource_(resource), + release_function_(release_function) + {} + + ~AutoCloser() { + Close(); + } + + ResourceType* get_resource() const { + return *resource_; + } + + void Close() { + if (*resource_) { + release_function_(*resource_); + *resource_ = NULL; + } + } + + private: + ResourceType** resource_; + ReleaseFunction release_function_; +}; + +enum DirEntryKinds { + kFile = 0, + kDirectory = 1, +}; + +class DirEntry { + public: + DirEntry(const char* n, DirEntryKinds k) + : name_(n), + kind_(k) + {} + + const std::string& name() const { return name_; } + DirEntryKinds kind() const { return kind_; } + + private: + std::string name_; + DirEntryKinds kind_; +}; + +// Lists directory entries in path. "." and ".." are excluded. Returns true on +// success. +bool ListDirectory(const string& path, vector* entries) { + entries->clear(); + DIR* dir = opendir(path.c_str()); + if (!dir) { + return false; + } + AutoCloser dir_closer(&dir, closedir); + struct dirent entry, *dir_result; + struct stat entry_stat; + while (true) { + const int res = readdir_r(dir, &entry, &dir_result); + if (res) { + return false; + } + if (dir_result == NULL) { + return true; + } + if (strcmp(entry.d_name, ".") == 0 || strcmp(entry.d_name, "..") == 0) { + continue; + } + const string entry_path = path + "/" + entry.d_name; + if (stat(entry_path.c_str(), &entry_stat)) { + return false; + } + DirEntryKinds kind = kFile; + if (S_ISDIR(entry_stat.st_mode)) { + kind = kDirectory; + } else if (!S_ISREG(entry_stat.st_mode)) { + continue; + } + entries->push_back(DirEntry(entry.d_name, kind)); + } +} + +// Returns true if s ends with suffix. +bool EndsWith(const string& s, const string& suffix) { + if (suffix.length() > s.length()) { + return false; + } + return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin()); +} + +// Converts string to integer, returns true on success. +bool StrToInt(const string& s, int32* n) { + std::stringstream stream; + stream << s; + stream >> *n; + return stream; +} + +// Converts integer to string, returns true on success. +bool IntToStr(int32 n, string* s) { + std::stringstream stream; + stream << n; + stream >> *s; + return stream; +} + +// Parses the prefix descriptions file at path, clears and fills the output +// prefixes phone number prefix to description mapping. +// Returns true on success. +bool ParsePrefixes(const string& path, map* prefixes) { + prefixes->clear(); + FILE* input = fopen(path.c_str(), "r"); + if (!input) { + return false; + } + AutoCloser input_closer(&input, fclose); + const int kMaxLineLength = 2*1024; + vector buffer(kMaxLineLength); + vector::iterator begin, end, sep; + string prefix, description; + int32 prefix_code; + while (fgets(&buffer[0], buffer.size(), input)) { + begin = buffer.begin(); + end = std::find(begin, buffer.end(), '\0'); + if (end == begin) { + continue; + } + --end; + if (*end != '\n') { + if (!feof(input)) { + // A line without LF can only happen at the end of file. + return false; + } + } else { + // Consume the LF. + --end; + } + + // Trim and check for comments. + for (; begin != end && std::isspace(*begin); ++begin) {} + for (; end != begin && std::isspace(*(end - 1)); --end) {} + if (begin == end || *begin == '#') { + continue; + } + + sep = std::find(begin, end, '|'); + if (sep == end) { + continue; + } + prefix = string(begin, sep); + if (!StrToInt(prefix, &prefix_code)) { + return false; + } + (*prefixes)[prefix_code] = string(sep + 1, end); + } + return ferror(input) == 0; +} + +// Builds a C string literal from s. The output is enclosed in double-quotes and +// care is taken to escape input quotes and non-ASCII or control characters. +// +// An input string: +// Op\xc3\xa9ra +// becomes: +// "Op""\xc3""\xa9""ra" +string MakeStringLiteral(const string& s) { + std::stringstream buffer; + int prev_is_hex = 0; + buffer << std::hex << std::setfill('0'); + buffer << "\""; + for (string::const_iterator it = s.begin(); it != s.end(); ++it) { + const char c = *it; + if (c >= 32 && c < 127) { + if (prev_is_hex == 2) { + buffer << "\"\""; + } + if (c == '\'') { + buffer << "\\"; + } + buffer << c; + prev_is_hex = 1; + } else { + if (prev_is_hex != 0) { + buffer << "\"\""; + } + buffer << "\\x" << std::setw(2) << (c < 0 ? c + 256 : c); + prev_is_hex = 2; + } + } + buffer << "\""; + return buffer.str(); +} + +void WriteStringLiteral(const string& s, FILE* output) { + string literal = MakeStringLiteral(s); + fprintf(output, "%s", literal.c_str()); +} + +const char kLicense[] = + "// Copyright (C) 2012 The Libphonenumber Authors\n" + "//\n" + "// Licensed under the Apache License, Version 2.0 (the \"License\");\n" + "// you may not use this file except in compliance with the License.\n" + "// You may obtain a copy of the License at\n" + "//\n" + "// http://www.apache.org/licenses/LICENSE-2.0\n" + "//\n" + "// Unless required by applicable law or agreed to in writing, software\n" + "// distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or " + "implied.\n" + "// See the License for the specific language governing permissions and\n" + "// limitations under the License.\n" + "//\n" + "// This file is generated automatically, do not edit it manually.\n" + "\n"; + +void WriteLicense(FILE* output) { + fprintf(output, "%s", kLicense); +} + +const char kI18NNS[] = "i18n"; +const char kPhoneNumbersNS[] = "phonenumbers"; + +void WriteNSHeader(FILE* output) { + fprintf(output, "namespace %s {\n", kI18NNS); + fprintf(output, "namespace %s {\n", kPhoneNumbersNS); +} + +void WriteNSFooter(FILE* output) { + fprintf(output, "} // namespace %s\n", kPhoneNumbersNS); + fprintf(output, "} // namespace %s\n", kI18NNS); +} + +void WriteCppHeader(const string& base_name, FILE* output) { + fprintf(output, "#include \"phonenumbers/geocoding/%s.h\"\n", + base_name.c_str()); + fprintf(output, "\n"); + fprintf(output, "#include \"base/basictypes.h\"\n"); + fprintf(output, "\n"); +} + +void WriteArrayAndSize(const string& name, FILE* output) { + fprintf(output, " %s,\n", name.c_str()); + fprintf(output, " sizeof(%s)/sizeof(*%s),\n", name.c_str(), name.c_str()); +} + +// Writes a PrefixDescriptions variable named "name", with its prefixes field +// set to "prefixes_name" variable, its descriptions to "desc_name" and its +// possible_lengths to "possible_lengths_name": +// +// const PrefixDescriptions ${name} = { +// ${prefix_name}, +// sizeof(${prefix_name})/sizeof(*${prefix_name}), +// ${desc_name}, +// ${possible_lengths_name}, +// sizeof(${possible_lengths_name})/sizeof(*${possible_lengths_name}), +// }; +// +void WritePrefixDescriptionsDefinition( + const string& name, const string& prefixes_name, const string& desc_name, + const string& possible_lengths_name, FILE* output) { + fprintf(output, "const PrefixDescriptions %s = {\n", name.c_str()); + WriteArrayAndSize(prefixes_name, output); + fprintf(output, " %s,\n", desc_name.c_str()); + WriteArrayAndSize(possible_lengths_name, output); + fprintf(output, "};\n"); +} + +// Writes prefixes, descriptions and possible_lengths arrays built from the +// phone number prefix to description mapping "prefixes". Binds these arrays +// in a single PrefixDescriptions variable named "var_name". +// +// const int32 ${var_name}_prefixes[] = { +// 1201, +// 1650, +// }; +// +// const char* ${var_name}_descriptions[] = { +// "New Jerse", +// "Kalifornie", +// }; +// +// const int32 ${var_name}_possible_lengths[] = { +// 4, +// }; +// +// const PrefixDescriptions ${var_name} = { +// ... +// }; +// +void WritePrefixDescriptions(const string& var_name, const map& + prefixes, FILE* output) { + set possible_lengths; + const string prefixes_name = var_name + "_prefixes"; + fprintf(output, "const int32 %s[] = {\n", prefixes_name.c_str()); + for (map::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + fprintf(output, " %d,\n", it->first); + possible_lengths.insert(static_cast(log10(it->first) + 1)); + } + fprintf(output, + "};\n" + "\n"); + + const string desc_name = var_name + "_descriptions"; + fprintf(output, "const char* %s[] = {\n", desc_name.c_str()); + for (map::const_iterator it = prefixes.begin(); + it != prefixes.end(); ++it) { + fprintf(output, " "); + WriteStringLiteral(it->second, output); + fprintf(output, ",\n"); + } + fprintf(output, + "};\n" + "\n"); + + const string possible_lengths_name = var_name + "_possible_lengths"; + fprintf(output, "const int32 %s[] = {\n ", possible_lengths_name.c_str()); + for (set::const_iterator it = possible_lengths.begin(); + it != possible_lengths.end(); ++it) { + fprintf(output, " %d,", *it); + } + fprintf(output, + "\n" + "};\n" + "\n"); + + WritePrefixDescriptionsDefinition(var_name, prefixes_name, desc_name, + possible_lengths_name, output); + fprintf(output, "\n"); +} + +// Writes a pair of arrays mapping prefix language code pairs to +// PrefixDescriptions instances. "prefix_var_names" maps language code pairs +// to prefix variable names. +// +// const char* prefix_language_code_pairs[] = { +// "1_de", +// "1_en", +// }; +// +// const PrefixDescriptions* prefix_descriptions[] = { +// &prefix_1_de, +// &prefix_1_en, +// }; +// +void WritePrefixesDescriptions(const map& prefix_var_names, + FILE* output) { + fprintf(output, "const char* prefix_language_code_pairs[] = {\n"); + for (map::const_iterator it = prefix_var_names.begin(); + it != prefix_var_names.end(); ++it) { + fprintf(output, " \"%s\",\n", it->first.c_str()); + } + fprintf(output, + "};\n" + "\n" + "const PrefixDescriptions* prefixes_descriptions[] = {\n"); + for (map::const_iterator it = prefix_var_names.begin(); + it != prefix_var_names.end(); ++it) { + fprintf(output, " &%s,\n", it->second.c_str()); + } + fprintf(output, + "};\n" + "\n"); +} + +// For each entry in "languages" mapping a country calling code to a set +// of available languages, writes a sorted array of languages, then wraps it +// into a CountryLanguages instance. Finally, writes a pair of arrays mapping +// country calling codes to CountryLanguages instances. +// +// const char* country_1[] = { +// "de", +// "en", +// }; +// +// const CountryLanguages country_1_languages = { +// country_1, +// sizeof(country_1)/sizeof(*country_1), +// }; +// +// [...] +// +// const CountryLanguages* country_languages[] = { +// &country_1_languages, +// [...] +// } +// +// const int country_calling_codes[] = { +// 1, +// [...] +// }; +// +bool WriteCountryLanguages(const map >& languages, + FILE* output) { + vector country_languages_vars; + vector countries; + for (map >::const_iterator it = languages.begin(); + it != languages.end(); ++it) { + string country_code; + if (!IntToStr(it->first, &country_code)) { + return false; + } + const string country_var = "country_" + country_code; + fprintf(output, "const char* %s[] = {\n", country_var.c_str()); + for (set::const_iterator it_lang = it->second.begin(); + it_lang != it->second.end(); ++it_lang) { + fprintf(output, " \"%s\",\n", it_lang->c_str()); + } + fprintf(output, + "};\n" + "\n"); + + const string country_languages_var = country_var + "_languages"; + fprintf(output, "const CountryLanguages %s = {\n", + country_languages_var.c_str()); + WriteArrayAndSize(country_var, output); + fprintf(output, + "};\n" + "\n"); + country_languages_vars.push_back(country_languages_var); + countries.push_back(country_code); + } + + fprintf(output, + "\n" + "const CountryLanguages* countries_languages[] = {\n"); + for (vector::const_iterator + it_languages_var = country_languages_vars.begin(); + it_languages_var != country_languages_vars.end(); ++it_languages_var) { + fprintf(output, " &%s,\n", it_languages_var->c_str()); + } + fprintf(output, + "};\n" + "\n" + "const int country_calling_codes[] = {\n"); + for (vector::const_iterator it_country = countries.begin(); + it_country != countries.end(); ++it_country) { + fprintf(output, " %s,\n", it_country->c_str()); + } + fprintf(output, + "};\n" + "\n"); + return true; +} + +// Returns a copy of input where all occurences of pattern are replaced with +// value. +string ReplaceAll(const string& input, const string& pattern, + const string& value) { + string replaced; + std::back_insert_iterator output = std::back_inserter(replaced); + string::const_iterator begin = input.begin(), end = begin; + while (true) { + const size_t pos = input.find(pattern, begin - input.begin()); + if (pos == string::npos) { + std::copy(begin, input.end(), output); + break; + } + end = input.begin() + pos; + std::copy(begin, end, output); + std::copy(value.begin(), value.end(), output); + begin = end + pattern.length(); + } + return replaced; +} + +// Writes data accessor definitions, prefixed with "accessor_prefix". +void WriteAccessorsDefinitions(const string& accessor_prefix, FILE* output) { + string templ = + "const int* get$prefix$_country_calling_codes() {\n" + " return country_calling_codes;\n" + "}\n" + "\n" + "int get$prefix$_country_calling_codes_size() {\n" + " return sizeof(country_calling_codes)\n" + " /sizeof(*country_calling_codes);\n" + "}\n" + "\n" + "const CountryLanguages* get$prefix$_country_languages(int index) {\n" + " return countries_languages[index];\n" + "}\n" + "\n" + "const char** get$prefix$_prefix_language_code_pairs() {\n" + " return prefix_language_code_pairs;\n" + "}\n" + "\n" + "int get$prefix$_prefix_language_code_pairs_size() {\n" + " return sizeof(prefix_language_code_pairs)\n" + " /sizeof(*prefix_language_code_pairs);\n" + "}\n" + "\n" + "const PrefixDescriptions* get$prefix$_prefix_descriptions(int index) {\n" + " return prefixes_descriptions[index];\n" + "}\n"; + string defs = ReplaceAll(templ, "$prefix$", accessor_prefix); + fprintf(output, "%s", defs.c_str()); +} + +// Writes geocoding data .cc file. "data_path" is the path of geocoding textual +// data directory. "base_name" is the base name of the .h/.cc pair, like +// "geocoding_data". +bool WriteSource(const string& data_path, const string& base_name, + const string& accessor_prefix, FILE* output) { + WriteLicense(output); + WriteCppHeader(base_name, output); + WriteNSHeader(output); + fprintf(output, + "namespace {\n" + "\n"); + + // Enumerate language/script directories. + map prefix_vars; + map > country_languages; + vector entries; + if (!ListDirectory(data_path, &entries)) { + fprintf(stderr, "failed to read directory entries"); + return false; + } + for (vector::const_iterator it = entries.begin(); + it != entries.end(); ++it) { + if (it->kind() != kDirectory) { + continue; + } + // Enumerate country calling code files. + const string dir_path = data_path + "/" + it->name(); + vector files; + if (!ListDirectory(dir_path, &files)) { + fprintf(stderr, "failed to read file entries\n"); + return false; + } + for (vector::const_iterator it_files = files.begin(); + it_files != files.end(); ++it_files) { + const string fname = it_files->name(); + if (!EndsWith(fname, ".txt")) { + continue; + } + int32 country_code; + const string country_code_str = fname.substr(0, fname.length() - 4); + if (!StrToInt(country_code_str, &country_code)) { + return false; + } + const string path = dir_path + "/" + fname; + + map prefixes; + if (!ParsePrefixes(path, &prefixes)) { + return false; + } + + const string prefix_var = "prefix_" + country_code_str + "_" + it->name(); + WritePrefixDescriptions(prefix_var, prefixes, output); + prefix_vars[country_code_str + "_" + it->name()] = prefix_var; + country_languages[country_code].insert(it->name()); + } + } + WritePrefixesDescriptions(prefix_vars, output); + if (!WriteCountryLanguages(country_languages, output)) { + return false; + } + fprintf(output, "} // namespace\n"); + fprintf(output, "\n"); + WriteAccessorsDefinitions(accessor_prefix, output); + WriteNSFooter(output); + return ferror(output) == 0; +} + +int PrintHelp(const string& message) { + fprintf(stderr, "error: %s\n", message.c_str()); + fprintf(stderr, "generate_geocoding_data DATADIR CCPATH"); + return 1; +} + +int Main(int argc, const char* argv[]) { + if (argc < 2) { + return PrintHelp("geocoding data root directory expected"); + } + if (argc < 3) { + return PrintHelp("output source path expected"); + } + string accessor_prefix = ""; + if (argc > 3) { + accessor_prefix = argv[3]; + } + const string root_path(argv[1]); + string source_path(argv[2]); + std::replace(source_path.begin(), source_path.end(), '\\', '/'); + string base_name = source_path; + if (base_name.rfind('/') != string::npos) { + base_name = base_name.substr(base_name.rfind('/') + 1); + } + base_name = base_name.substr(0, base_name.rfind('.')); + + FILE* source_fp = fopen(source_path.c_str(), "w"); + if (!source_fp) { + fprintf(stderr, "failed to open %s\n", source_path.c_str()); + return 1; + } + AutoCloser source_closer(&source_fp, fclose); + if (!WriteSource(root_path, base_name, accessor_prefix, + source_fp)) { + return 1; + } + return 0; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/tools/cpp/src/cpp-build/generate_geocoding_data.h b/tools/cpp/src/cpp-build/generate_geocoding_data.h new file mode 100644 index 000000000..f08780076 --- /dev/null +++ b/tools/cpp/src/cpp-build/generate_geocoding_data.h @@ -0,0 +1,34 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#ifndef I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H +#define I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H + +#include + +namespace i18n { +namespace phonenumbers { + +using std::string; + +string MakeStringLiteral(const string& s); + +int Main(int argc, const char* argv[]); + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H diff --git a/tools/cpp/src/cpp-build/generate_geocoding_data_main.cc b/tools/cpp/src/cpp-build/generate_geocoding_data_main.cc new file mode 100644 index 000000000..f31c94db1 --- /dev/null +++ b/tools/cpp/src/cpp-build/generate_geocoding_data_main.cc @@ -0,0 +1,21 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "cpp-build/generate_geocoding_data.h" + +int main(int argc, const char* argv[]) { + return i18n::phonenumbers::Main(argc, argv); +} diff --git a/tools/cpp/test/cpp-build/generate_geocoding_data_test.cc b/tools/cpp/test/cpp-build/generate_geocoding_data_test.cc new file mode 100644 index 000000000..5e86b637e --- /dev/null +++ b/tools/cpp/test/cpp-build/generate_geocoding_data_test.cc @@ -0,0 +1,31 @@ +// Copyright (C) 2012 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Patrick Mezard + +#include "cpp-build/generate_geocoding_data.h" + +#include + +namespace i18n { +namespace phonenumbers { + +TEST(GenerateGeocodingDataTest, TestMakeStringLiteral) { + EXPECT_EQ("\"\"", MakeStringLiteral("")); + EXPECT_EQ("\"Op\"\"\\xc3\"\"\\xa9\"\"ra\"", + MakeStringLiteral("Op\xc3\xa9ra")); +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/tools/cpp/test/cpp-build/run_tests.cc b/tools/cpp/test/cpp-build/run_tests.cc new file mode 100644 index 000000000..e6d837472 --- /dev/null +++ b/tools/cpp/test/cpp-build/run_tests.cc @@ -0,0 +1,21 @@ +// Copyright (C) 2011 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +}