Browse Source

CPP: Add geocoding data generator.

pull/567/head
Philippe Liard 14 years ago
committed by Mihaela Rosca
parent
commit
ac08bfe262
12 changed files with 1748 additions and 14 deletions
  1. +45
    -1
      cpp/CMakeLists.txt
  2. +1
    -13
      cpp/src/base/basictypes.h
  3. +77
    -0
      cpp/src/phonenumbers/geocoding/geocoding_data.h
  4. +50
    -0
      cpp/test/phonenumbers/geocoding/geocoding_test_data.h
  5. +76
    -0
      tools/cpp/CMakeLists.txt
  6. +368
    -0
      tools/cpp/src/base/basictypes.h
  7. +368
    -0
      tools/cpp/src/base/basictypes.h.orig
  8. +656
    -0
      tools/cpp/src/cpp-build/generate_geocoding_data.cc
  9. +34
    -0
      tools/cpp/src/cpp-build/generate_geocoding_data.h
  10. +21
    -0
      tools/cpp/src/cpp-build/generate_geocoding_data_main.cc
  11. +31
    -0
      tools/cpp/test/cpp-build/generate_geocoding_data_test.cc
  12. +21
    -0
      tools/cpp/test/cpp-build/run_tests.cc

+ 45
- 1
cpp/CMakeLists.txt View File

@ -154,6 +154,26 @@ add_custom_target (
COMMENT "Generating Protocol Buffers code"
)
# Geocoding data cpp file generation
set (TOOLS_DIR "${CMAKE_CURRENT_BINARY_DIR}/tools")
add_subdirectory("${CMAKE_SOURCE_DIR}/../tools/cpp" "${TOOLS_DIR}")
set (GEOCODING_DIR "${RESOURCES_DIR}/geocoding")
file (GLOB_RECURSE GEOCODING_SOURCES "${GEOCODING_DIR}/*.txt")
set (GEOCODING_DATA_OUTPUT
"${CMAKE_SOURCE_DIR}/src/phonenumbers/geocoding/geocoding_data.cc"
)
add_custom_command (
COMMAND generate_geocoding_data "${GEOCODING_DIR}" "${GEOCODING_DATA_OUTPUT}"
OUTPUT ${GEOCODING_DATA_OUTPUT}
DEPENDS ${GEOCODING_SOURCES}
generate_geocoding_data
COMMENT "Generating geocoding data code"
)
set (
SOURCES
"src/base/string_piece.cc"
@ -161,6 +181,7 @@ set (
"src/phonenumbers/default_logger.cc"
"src/phonenumbers/geocoding/area_code_map.cc"
"src/phonenumbers/geocoding/default_map_storage.cc"
"src/phonenumbers/geocoding/geocoding_data.cc"
"src/phonenumbers/logger.cc"
"src/phonenumbers/metadata.h" # Generated by build tools.
"src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers.
@ -310,9 +331,27 @@ add_library (phonenumber_testing STATIC ${TESTING_LIBRARY_SOURCES})
target_link_libraries (phonenumber_testing ${LIBRARY_DEPS})
add_dependencies (phonenumber_testing generate-sources ${TEST_METADATA_TARGET})
# Test geocoding data cpp files generation
set (GEOCODING_TEST_DIR "${RESOURCES_DIR}/test/geocoding")
file (GLOB_RECURSE GEOCODING_TEST_SOURCES "${GEOCODING_TEST_DIR}/*.txt")
set (GEOCODING_TEST_DATA_OUTPUT
"${CMAKE_SOURCE_DIR}/test/phonenumbers/geocoding/geocoding_test_data.cc"
)
add_custom_command (
COMMAND generate_geocoding_data "${GEOCODING_TEST_DIR}"
"${GEOCODING_TEST_DATA_OUTPUT}"
OUTPUT ${GEOCODING_TEST_DATA_OUTPUT}
DEPENDS ${GEOCODING_TEST_SOURCES} generate_geocoding_data
COMMENT "Generating geocoding test data code"
)
set (TEST_SOURCES
"test/phonenumbers/asyoutypeformatter_test.cc"
"test/phonenumbers/geocoding/area_code_map_test.cc"
"test/phonenumbers/geocoding/geocoding_test_data.cc"
"test/phonenumbers/logger_test.cc"
"test/phonenumbers/phonenumberutil_test.cc"
"test/phonenumbers/regexp_adapter_test.cc"
@ -339,7 +378,12 @@ if (NOT WIN32)
endif ()
target_link_libraries (libphonenumber_test ${TEST_LIBS})
add_custom_target(test COMMAND libphonenumber_test DEPENDS libphonenumber_test)
add_custom_target (test
COMMAND generate_geocoding_data_test
COMMAND libphonenumber_test
DEPENDS generate_geocoding_data_test libphonenumber_test
)
# Install rules.
install (FILES


+ 1
- 13
cpp/src/base/basictypes.h View File

@ -271,18 +271,6 @@ struct CompileAssert {
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
// MetatagId refers to metatag-id that we assign to
// each metatag <name, value> pair..
typedef uint32 MetatagId;
// Argument type used in interfaces that can optionally take ownership
// of a passed in argument. If TAKE_OWNERSHIP is passed, the called
// object takes ownership of the argument. Otherwise it does not.
enum Ownership {
DO_NOT_TAKE_OWNERSHIP,
TAKE_OWNERSHIP
};
// bit_cast<Dest,Source> is a template function that implements the
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
// very low-level functions like the protobuf library and fast math
@ -357,7 +345,7 @@ inline Dest bit_cast(const Source& source) {
// ignore_result(my_var.release());
//
template<typename T>
inline void ignore_result(const T& ignored) {
inline void ignore_result(const T&) {
}
// The following enum should be used only as a constructor argument to indicate


+ 77
- 0
cpp/src/phonenumbers/geocoding/geocoding_data.h View File

@ -0,0 +1,77 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file is generated automatically, do not edit it manually.
#ifndef I18N_PHONENUMBERS_GEOCODING_DATA
#define I18N_PHONENUMBERS_GEOCODING_DATA
#include "base/basictypes.h"
namespace i18n {
namespace phonenumbers {
struct CountryLanguages {
// Sorted array of language codes.
const char** available_languages;
// Number of elements in available_languages.
const int available_languages_size;
};
struct PrefixDescriptions {
// Sorted array of phone number prefixes.
const int32* prefixes;
// Number of elements in prefixes.
const int prefixes_size;
// Array of phone number prefix descriptions, mapped one to one
// to prefixes.
const char** descriptions;
// Sorted array of unique prefix lengths in base 10.
const int32* possible_lengths;
// Number of elements in possible_lengths.
const int possible_lengths_size;
};
// Returns a sorted array of country calling codes.
const int* get_country_calling_codes();
// Returns the number of country calling codes in
// get_country_calling_codes() array.
int get_country_calling_codes_size();
// Returns the CountryLanguages record for country at index, index
// being in [0, get_country_calling_codes_size()).
const CountryLanguages* get_country_languages(int index);
// Returns a sorted array of prefix language code pairs like
// "1_de" or "82_ko".
const char** get_prefix_language_code_pairs();
// Returns the number of elements in
// get_prefix_language_code_pairs()
int get_prefix_language_code_pairs_size();
// Returns the PrefixDescriptions for language/code pair at index,
// index being in [0, get_prefix_language_code_pairs_size()).
const PrefixDescriptions* get_prefix_descriptions(int index);
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_GEOCODING_DATA

+ 50
- 0
cpp/test/phonenumbers/geocoding/geocoding_test_data.h View File

@ -0,0 +1,50 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef I18N_PHONENUMBERS_GEOCODING_TEST_DATA
#define I18N_PHONENUMBERS_GEOCODING_TEST_DATA
#include "base/basictypes.h"
#include "phonenumbers/geocoding/geocoding_data.h"
namespace i18n {
namespace phonenumbers {
// Returns a sorted array of country calling codes.
const int* get_test_country_calling_codes();
// Returns the number of country calling codes in
// get_test_country_calling_codes() array.
int get_test_country_calling_codes_size();
// Returns the CountryLanguages record for country at index, index
// being in [0, get_test_country_calling_codes_size()).
const CountryLanguages* get_test_country_languages(int index);
// Returns a sorted array of prefix language code pairs like
// "1_de" or "82_ko".
const char** get_test_prefix_language_code_pairs();
// Returns the number of elements in
// get_prefix_language_code_pairs()
int get_test_prefix_language_code_pairs_size();
// Returns the PrefixDescriptions for language/code pair at index,
// index being in [0, get_prefix_language_code_pairs_size()).
const PrefixDescriptions* get_test_prefix_descriptions(int index);
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_GEOCODING_TEST_DATA

+ 76
- 0
tools/cpp/CMakeLists.txt View File

@ -0,0 +1,76 @@
# Copyright (C) 2012 The Libphonenumber Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Author: Patrick Mezard
cmake_minimum_required (VERSION 2.8)
project (generate_geocoding_data)
# Helper functions dealing with finding libraries and programs this library
# depends on.
function (print_error DESCRIPTION FILE)
message (FATAL_ERROR
"Can't find ${DESCRIPTION}: can't locate ${FILE}. Please read the README.")
endfunction ()
# Find a library. If it has not been found, stop CMake with a fatal error
# message.
function (find_required_library NAME HEADER LIBRARY DESCRIPTION)
# Check the header.
find_path (${NAME}_INCLUDE_DIR ${HEADER})
set (INCLUDE_DIR ${${NAME}_INCLUDE_DIR})
if (${INCLUDE_DIR} STREQUAL "${INCLUDE_DIR}-NOTFOUND")
print_error (${DESCRIPTION} ${HEADER})
endif ()
include_directories (${INCLUDE_DIR})
# Check the binary.
find_library (${NAME}_LIB ${LIBRARY})
set (LIB ${NAME}_LIB)
if (${LIB} STREQUAL "${LIB}-NOTFOUND")
print_error (${DESCRIPTION} ${LIBRARY})
endif ()
endfunction (find_required_library)
find_required_library (GTEST gtest/gtest.h gtest "Google Test framework")
set (
SOURCES
"src/cpp-build/generate_geocoding_data.cc"
"src/cpp-build/generate_geocoding_data_main.cc"
)
if (NOT WIN32)
add_definitions ("-Wall -Werror")
endif ()
include_directories ("src")
add_executable (generate_geocoding_data ${SOURCES})
set (TEST_SOURCES
"src/cpp-build/generate_geocoding_data.cc"
"test/cpp-build/generate_geocoding_data_test.cc"
"test/cpp-build/run_tests.cc"
)
set (TEST_LIBS ${GTEST_LIB})
# Build the testing binary.
include_directories ("test")
add_executable (generate_geocoding_data_test ${TEST_SOURCES})
target_link_libraries (generate_geocoding_data_test ${TEST_LIBS})

+ 368
- 0
tools/cpp/src/base/basictypes.h View File

@ -0,0 +1,368 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_BASICTYPES_H_
#define BASE_BASICTYPES_H_
#pragma once
#include <limits.h> // So we can set the bounds of our types
#include <stddef.h> // For size_t
#include <string.h> // for memcpy
#ifndef COMPILER_MSVC
// stdint.h is part of C99 but MSVC doesn't have it.
#include <stdint.h> // For intptr_t.
#endif
#ifdef INT64_MAX
// INT64_MAX is defined if C99 stdint.h is included; use the
// native types if available.
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
const uint8 kuint8max = UINT8_MAX;
const uint16 kuint16max = UINT16_MAX;
const uint32 kuint32max = UINT32_MAX;
const uint64 kuint64max = UINT64_MAX;
const int8 kint8min = INT8_MIN;
const int8 kint8max = INT8_MAX;
const int16 kint16min = INT16_MIN;
const int16 kint16max = INT16_MAX;
const int32 kint32min = INT32_MIN;
const int32 kint32max = INT32_MAX;
const int64 kint64min = INT64_MIN;
const int64 kint64max = INT64_MAX;
#else // !INT64_MAX
typedef signed char int8;
typedef short int16;
// TODO: Remove these type guards. These are to avoid conflicts with
// obsolete/protypes.h in the Gecko SDK.
#ifndef _INT32
#define _INT32
typedef int int32;
#endif
// The NSPR system headers define 64-bit as |long| when possible. In order to
// not have typedef mismatches, we do the same on LP64.
#if __LP64__
typedef long int64;
#else
typedef long long int64;
#endif
// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
// places. Use the signed types unless your variable represents a bit
// pattern (eg a hash value) or you really need the extra bit. Do NOT
// use 'unsigned' to express "this value should always be positive";
// use assertions for this.
typedef unsigned char uint8;
typedef unsigned short uint16;
// TODO: Remove these type guards. These are to avoid conflicts with
// obsolete/protypes.h in the Gecko SDK.
#ifndef _UINT32
#define _UINT32
typedef unsigned int uint32;
#endif
// See the comment above about NSPR and 64-bit.
#if __LP64__
typedef unsigned long uint64;
#else
typedef unsigned long long uint64;
#endif
#endif // !INT64_MAX
typedef signed char schar;
// A type to represent a Unicode code-point value. As of Unicode 4.0,
// such values require up to 21 bits.
// (For type-checking on pointers, make this explicitly signed,
// and it should always be the signed version of whatever int32 is.)
typedef signed int char32;
// A macro to disallow the copy constructor and operator= functions
// This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&)
// An older, deprecated, politically incorrect name for the above.
// NOTE: The usage of this macro was baned from our code base, but some
// third_party libraries are yet using it.
// TODO(tfarina): Figure out how to fix the usage of this macro in the
// third_party libraries and get rid of it.
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
// A macro to disallow all the implicit constructors, namely the
// default constructor, copy constructor and operator= functions.
//
// This should be used in the private: declarations for a class
// that wants to prevent anyone from instantiating it. This is
// especially useful for classes containing only static methods.
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// One caveat is that arraysize() doesn't accept any array of an
// anonymous type or a type defined inside a function. In these rare
// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
// due to a limitation in C++'s template system. The limitation might
// eventually be removed, but it hasn't happened yet.
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef _MSC_VER
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
// but can be used on anonymous types or types defined inside
// functions. It's less safe than arraysize as it accepts some
// (although not all) pointers. Therefore, you should use arraysize
// whenever possible.
//
// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
// size_t.
//
// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
//
// "warning: division by zero in ..."
//
// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
//
// The following comments are on the implementation details, and can
// be ignored by the users.
//
// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
// the array) and sizeof(*(arr)) (the # of bytes in one array
// element). If the former is divisible by the latter, perhaps arr is
// indeed an array, in which case the division result is the # of
// elements in the array. Otherwise, arr cannot possibly be an array,
// and we generate a compiler error to prevent the code from
// compiling.
//
// Since the size of bool is implementation-defined, we need to cast
// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
// result has type size_t.
//
// This macro is not perfect as it wrongfully accepts certain
// pointers, namely where the pointer size is divisible by the pointee
// size. Since all our code has to go through a 32-bit compiler,
// where a pointer is 4 bytes, this means all pointers to a type whose
// size is 3 or greater than 4 will be (righteously) rejected.
#define ARRAYSIZE_UNSAFE(a) \
((sizeof(a) / sizeof(*(a))) / \
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
// Use implicit_cast as a safe version of static_cast or const_cast
// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
// a const pointer to Foo).
// When you use implicit_cast, the compiler checks that the cast is safe.
// Such explicit implicit_casts are necessary in surprisingly many
// situations where C++ demands an exact type match instead of an
// argument type convertable to a target type.
//
// The From type can be inferred, so the preferred syntax for using
// implicit_cast is the same as for static_cast etc.:
//
// implicit_cast<ToType>(expr)
//
// implicit_cast would have been part of the C++ standard library,
// but the proposal was submitted too late. It will probably make
// its way into the language in the future.
template<typename To, typename From>
inline To implicit_cast(From const &f) {
return f;
}
// The COMPILE_ASSERT macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
// content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.
template <bool>
struct CompileAssert {
};
#undef COMPILE_ASSERT
#define COMPILE_ASSERT(expr, msg) \
typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
// Implementation details of COMPILE_ASSERT:
//
// - COMPILE_ASSERT works by defining an array type that has -1
// elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
// does not work, as gcc supports variable-length arrays whose sizes
// are determined at run-time (this is gcc's extension and not part
// of the C++ standard). As a result, gcc fails to reject the
// following code with the simple definition:
//
// int foo;
// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
// // not a compile-time constant.
//
// - By using the type CompileAssert<(bool(expr))>, we ensures that
// expr is a compile-time constant. (Template arguments must be
// determined at compile-time.)
//
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
//
// CompileAssert<bool(expr)>
//
// instead, these compilers will refuse to compile
//
// COMPILE_ASSERT(5 > 0, some_message);
//
// (They seem to think the ">" in "5 > 0" marks the end of the
// template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
// ((expr) ? 1 : -1).
//
// This is to avoid running into a bug in MS VC 7.1, which
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
// bit_cast<Dest,Source> is a template function that implements the
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
// very low-level functions like the protobuf library and fast math
// support.
//
// float f = 3.14159265358979;
// int i = bit_cast<int32>(f);
// // i = 0x40490fdb
//
// The classical address-casting method is:
//
// // WRONG
// float f = 3.14159265358979; // WRONG
// int i = * reinterpret_cast<int*>(&f); // WRONG
//
// The address-casting method actually produces undefined behavior
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
// section says: if an object in memory has one type, and a program
// accesses it with a different type, then the result is undefined
// behavior for most values of "different type".
//
// This is true for any cast syntax, either *(int*)&f or
// *reinterpret_cast<int*>(&f). And it is particularly true for
// conversions betweeen integral lvalues and floating-point lvalues.
//
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
// that expressions with different types refer to different memory. gcc
// 4.0.1 has an optimizer that takes advantage of this. So a
// non-conforming program quietly produces wildly incorrect output.
//
// The problem is not the use of reinterpret_cast. The problem is type
// punning: holding an object in memory of one type and reading its bits
// back using a different type.
//
// The C++ standard is more subtle and complex than this, but that
// is the basic idea.
//
// Anyways ...
//
// bit_cast<> calls memcpy() which is blessed by the standard,
// especially by the example in section 3.9 . Also, of course,
// bit_cast<> wraps up the nasty logic in one place.
//
// Fortunately memcpy() is very fast. In optimized mode, with a
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
// code with the minimal amount of data movement. On a 32-bit system,
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
// compiles to two loads and two stores.
//
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
//
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
// is likely to surprise you.
template <class Dest, class Source>
inline Dest bit_cast(const Source& source) {
// Compile time assertion: sizeof(Dest) == sizeof(Source)
// A compile error here means your Dest and Source have different sizes.
typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
Dest dest;
memcpy(&dest, &source, sizeof(dest));
return dest;
}
// Used to explicitly mark the return value of a function as unused. If you are
// really sure you don't want to do anything with the return value of a function
// that has been marked WARN_UNUSED_RESULT, wrap it with this. Example:
//
// scoped_ptr<MyType> my_var = ...;
// if (TakeOwnership(my_var.get()) == SUCCESS)
// ignore_result(my_var.release());
//
template<typename T>
inline void ignore_result(const T&) {
}
// The following enum should be used only as a constructor argument to indicate
// that the variable has static storage class, and that the constructor should
// do nothing to its state. It indicates to the reader that it is legal to
// declare a static instance of the class, provided the constructor is given
// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
// static variable that has a constructor or a destructor because invocation
// order is undefined. However, IF the type can be initialized by filling with
// zeroes (which the loader does for static variables), AND the destructor also
// does nothing to the storage, AND there are no virtual methods, then a
// constructor declared as
// explicit MyClass(base::LinkerInitialized x) {}
// and invoked as
// static MyClass my_variable_name(base::LINKER_INITIALIZED);
namespace base {
enum LinkerInitialized { LINKER_INITIALIZED };
} // base
#endif // BASE_BASICTYPES_H_

+ 368
- 0
tools/cpp/src/base/basictypes.h.orig View File

@ -0,0 +1,368 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_BASICTYPES_H_
#define BASE_BASICTYPES_H_
#pragma once
#include <limits.h> // So we can set the bounds of our types
#include <stddef.h> // For size_t
#include <string.h> // for memcpy
#ifndef COMPILER_MSVC
// stdint.h is part of C99 but MSVC doesn't have it.
#include <stdint.h> // For intptr_t.
#endif
#ifdef INT64_MAX
// INT64_MAX is defined if C99 stdint.h is included; use the
// native types if available.
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
const uint8 kuint8max = UINT8_MAX;
const uint16 kuint16max = UINT16_MAX;
const uint32 kuint32max = UINT32_MAX;
const uint64 kuint64max = UINT64_MAX;
const int8 kint8min = INT8_MIN;
const int8 kint8max = INT8_MAX;
const int16 kint16min = INT16_MIN;
const int16 kint16max = INT16_MAX;
const int32 kint32min = INT32_MIN;
const int32 kint32max = INT32_MAX;
const int64 kint64min = INT64_MIN;
const int64 kint64max = INT64_MAX;
#else // !INT64_MAX
typedef signed char int8;
typedef short int16;
// TODO: Remove these type guards. These are to avoid conflicts with
// obsolete/protypes.h in the Gecko SDK.
#ifndef _INT32
#define _INT32
typedef int int32;
#endif
// The NSPR system headers define 64-bit as |long| when possible. In order to
// not have typedef mismatches, we do the same on LP64.
#if __LP64__
typedef long int64;
#else
typedef long long int64;
#endif
// NOTE: unsigned types are DANGEROUS in loops and other arithmetical
// places. Use the signed types unless your variable represents a bit
// pattern (eg a hash value) or you really need the extra bit. Do NOT
// use 'unsigned' to express "this value should always be positive";
// use assertions for this.
typedef unsigned char uint8;
typedef unsigned short uint16;
// TODO: Remove these type guards. These are to avoid conflicts with
// obsolete/protypes.h in the Gecko SDK.
#ifndef _UINT32
#define _UINT32
typedef unsigned int uint32;
#endif
// See the comment above about NSPR and 64-bit.
#if __LP64__
typedef unsigned long uint64;
#else
typedef unsigned long long uint64;
#endif
#endif // !INT64_MAX
typedef signed char schar;
// A type to represent a Unicode code-point value. As of Unicode 4.0,
// such values require up to 21 bits.
// (For type-checking on pointers, make this explicitly signed,
// and it should always be the signed version of whatever int32 is.)
typedef signed int char32;
// A macro to disallow the copy constructor and operator= functions
// This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&)
// An older, deprecated, politically incorrect name for the above.
// NOTE: The usage of this macro was baned from our code base, but some
// third_party libraries are yet using it.
// TODO(tfarina): Figure out how to fix the usage of this macro in the
// third_party libraries and get rid of it.
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
// A macro to disallow all the implicit constructors, namely the
// default constructor, copy constructor and operator= functions.
//
// This should be used in the private: declarations for a class
// that wants to prevent anyone from instantiating it. This is
// especially useful for classes containing only static methods.
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
//
// One caveat is that arraysize() doesn't accept any array of an
// anonymous type or a type defined inside a function. In these rare
// cases, you have to use the unsafe ARRAYSIZE_UNSAFE() macro below. This is
// due to a limitation in C++'s template system. The limitation might
// eventually be removed, but it hasn't happened yet.
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// That gcc wants both of these prototypes seems mysterious. VC, for
// its part, can't decide which to use (another mystery). Matching of
// template overloads: the final frontier.
#ifndef _MSC_VER
template <typename T, size_t N>
char (&ArraySizeHelper(const T (&array)[N]))[N];
#endif
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// ARRAYSIZE_UNSAFE performs essentially the same calculation as arraysize,
// but can be used on anonymous types or types defined inside
// functions. It's less safe than arraysize as it accepts some
// (although not all) pointers. Therefore, you should use arraysize
// whenever possible.
//
// The expression ARRAYSIZE_UNSAFE(a) is a compile-time constant of type
// size_t.
//
// ARRAYSIZE_UNSAFE catches a few type errors. If you see a compiler error
//
// "warning: division by zero in ..."
//
// when using ARRAYSIZE_UNSAFE, you are (wrongfully) giving it a pointer.
// You should only use ARRAYSIZE_UNSAFE on statically allocated arrays.
//
// The following comments are on the implementation details, and can
// be ignored by the users.
//
// ARRAYSIZE_UNSAFE(arr) works by inspecting sizeof(arr) (the # of bytes in
// the array) and sizeof(*(arr)) (the # of bytes in one array
// element). If the former is divisible by the latter, perhaps arr is
// indeed an array, in which case the division result is the # of
// elements in the array. Otherwise, arr cannot possibly be an array,
// and we generate a compiler error to prevent the code from
// compiling.
//
// Since the size of bool is implementation-defined, we need to cast
// !(sizeof(a) & sizeof(*(a))) to size_t in order to ensure the final
// result has type size_t.
//
// This macro is not perfect as it wrongfully accepts certain
// pointers, namely where the pointer size is divisible by the pointee
// size. Since all our code has to go through a 32-bit compiler,
// where a pointer is 4 bytes, this means all pointers to a type whose
// size is 3 or greater than 4 will be (righteously) rejected.
#define ARRAYSIZE_UNSAFE(a) \
((sizeof(a) / sizeof(*(a))) / \
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
// Use implicit_cast as a safe version of static_cast or const_cast
// for upcasting in the type hierarchy (i.e. casting a pointer to Foo
// to a pointer to SuperclassOfFoo or casting a pointer to Foo to
// a const pointer to Foo).
// When you use implicit_cast, the compiler checks that the cast is safe.
// Such explicit implicit_casts are necessary in surprisingly many
// situations where C++ demands an exact type match instead of an
// argument type convertable to a target type.
//
// The From type can be inferred, so the preferred syntax for using
// implicit_cast is the same as for static_cast etc.:
//
// implicit_cast<ToType>(expr)
//
// implicit_cast would have been part of the C++ standard library,
// but the proposal was submitted too late. It will probably make
// its way into the language in the future.
template<typename To, typename From>
inline To implicit_cast(From const &f) {
return f;
}
// The COMPILE_ASSERT macro can be used to verify that a compile time
// expression is true. For example, you could use it to verify the
// size of a static array:
//
// COMPILE_ASSERT(ARRAYSIZE_UNSAFE(content_type_names) == CONTENT_NUM_TYPES,
// content_type_names_incorrect_size);
//
// or to make sure a struct is smaller than a certain size:
//
// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
//
// The second argument to the macro is the name of the variable. If
// the expression is false, most compilers will issue a warning/error
// containing the name of the variable.
template <bool>
struct CompileAssert {
};
#undef COMPILE_ASSERT
#define COMPILE_ASSERT(expr, msg) \
typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
// Implementation details of COMPILE_ASSERT:
//
// - COMPILE_ASSERT works by defining an array type that has -1
// elements (and thus is invalid) when the expression is false.
//
// - The simpler definition
//
// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
//
// does not work, as gcc supports variable-length arrays whose sizes
// are determined at run-time (this is gcc's extension and not part
// of the C++ standard). As a result, gcc fails to reject the
// following code with the simple definition:
//
// int foo;
// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is
// // not a compile-time constant.
//
// - By using the type CompileAssert<(bool(expr))>, we ensures that
// expr is a compile-time constant. (Template arguments must be
// determined at compile-time.)
//
// - The outter parentheses in CompileAssert<(bool(expr))> are necessary
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
//
// CompileAssert<bool(expr)>
//
// instead, these compilers will refuse to compile
//
// COMPILE_ASSERT(5 > 0, some_message);
//
// (They seem to think the ">" in "5 > 0" marks the end of the
// template argument list.)
//
// - The array size is (bool(expr) ? 1 : -1), instead of simply
//
// ((expr) ? 1 : -1).
//
// This is to avoid running into a bug in MS VC 7.1, which
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
// bit_cast<Dest,Source> is a template function that implements the
// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
// very low-level functions like the protobuf library and fast math
// support.
//
// float f = 3.14159265358979;
// int i = bit_cast<int32>(f);
// // i = 0x40490fdb
//
// The classical address-casting method is:
//
// // WRONG
// float f = 3.14159265358979; // WRONG
// int i = * reinterpret_cast<int*>(&f); // WRONG
//
// The address-casting method actually produces undefined behavior
// according to ISO C++ specification section 3.10 -15 -. Roughly, this
// section says: if an object in memory has one type, and a program
// accesses it with a different type, then the result is undefined
// behavior for most values of "different type".
//
// This is true for any cast syntax, either *(int*)&f or
// *reinterpret_cast<int*>(&f). And it is particularly true for
// conversions betweeen integral lvalues and floating-point lvalues.
//
// The purpose of 3.10 -15- is to allow optimizing compilers to assume
// that expressions with different types refer to different memory. gcc
// 4.0.1 has an optimizer that takes advantage of this. So a
// non-conforming program quietly produces wildly incorrect output.
//
// The problem is not the use of reinterpret_cast. The problem is type
// punning: holding an object in memory of one type and reading its bits
// back using a different type.
//
// The C++ standard is more subtle and complex than this, but that
// is the basic idea.
//
// Anyways ...
//
// bit_cast<> calls memcpy() which is blessed by the standard,
// especially by the example in section 3.9 . Also, of course,
// bit_cast<> wraps up the nasty logic in one place.
//
// Fortunately memcpy() is very fast. In optimized mode, with a
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
// code with the minimal amount of data movement. On a 32-bit system,
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
// compiles to two loads and two stores.
//
// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
//
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
// is likely to surprise you.
template <class Dest, class Source>
inline Dest bit_cast(const Source& source) {
// Compile time assertion: sizeof(Dest) == sizeof(Source)
// A compile error here means your Dest and Source have different sizes.
typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : -1];
Dest dest;
memcpy(&dest, &source, sizeof(dest));
return dest;
}
// Used to explicitly mark the return value of a function as unused. If you are
// really sure you don't want to do anything with the return value of a function
// that has been marked WARN_UNUSED_RESULT, wrap it with this. Example:
//
// scoped_ptr<MyType> my_var = ...;
// if (TakeOwnership(my_var.get()) == SUCCESS)
// ignore_result(my_var.release());
//
template<typename T>
inline void ignore_result(const T& ignored) {
}
// The following enum should be used only as a constructor argument to indicate
// that the variable has static storage class, and that the constructor should
// do nothing to its state. It indicates to the reader that it is legal to
// declare a static instance of the class, provided the constructor is given
// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a
// static variable that has a constructor or a destructor because invocation
// order is undefined. However, IF the type can be initialized by filling with
// zeroes (which the loader does for static variables), AND the destructor also
// does nothing to the storage, AND there are no virtual methods, then a
// constructor declared as
// explicit MyClass(base::LinkerInitialized x) {}
// and invoked as
// static MyClass my_variable_name(base::LINKER_INITIALIZED);
namespace base {
enum LinkerInitialized { LINKER_INITIALIZED };
} // base
#endif // BASE_BASICTYPES_H_

+ 656
- 0
tools/cpp/src/cpp-build/generate_geocoding_data.cc View File

@ -0,0 +1,656 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#include "cpp-build/generate_geocoding_data.h"
#include <dirent.h>
#include <locale>
#include <sys/stat.h>
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iomanip>
#include <iterator>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "base/basictypes.h"
namespace i18n {
namespace phonenumbers {
using std::map;
using std::string;
using std::vector;
using std::set;
using std::pair;
template <typename ResourceType> class AutoCloser {
public:
typedef int (*ReleaseFunction) (ResourceType* resource);
AutoCloser(ResourceType** resource, ReleaseFunction release_function)
: resource_(resource),
release_function_(release_function)
{}
~AutoCloser() {
Close();
}
ResourceType* get_resource() const {
return *resource_;
}
void Close() {
if (*resource_) {
release_function_(*resource_);
*resource_ = NULL;
}
}
private:
ResourceType** resource_;
ReleaseFunction release_function_;
};
enum DirEntryKinds {
kFile = 0,
kDirectory = 1,
};
class DirEntry {
public:
DirEntry(const char* n, DirEntryKinds k)
: name_(n),
kind_(k)
{}
const std::string& name() const { return name_; }
DirEntryKinds kind() const { return kind_; }
private:
std::string name_;
DirEntryKinds kind_;
};
// Lists directory entries in path. "." and ".." are excluded. Returns true on
// success.
bool ListDirectory(const string& path, vector<DirEntry>* entries) {
entries->clear();
DIR* dir = opendir(path.c_str());
if (!dir) {
return false;
}
AutoCloser<DIR> dir_closer(&dir, closedir);
struct dirent entry, *dir_result;
struct stat entry_stat;
while (true) {
const int res = readdir_r(dir, &entry, &dir_result);
if (res) {
return false;
}
if (dir_result == NULL) {
return true;
}
if (strcmp(entry.d_name, ".") == 0 || strcmp(entry.d_name, "..") == 0) {
continue;
}
const string entry_path = path + "/" + entry.d_name;
if (stat(entry_path.c_str(), &entry_stat)) {
return false;
}
DirEntryKinds kind = kFile;
if (S_ISDIR(entry_stat.st_mode)) {
kind = kDirectory;
} else if (!S_ISREG(entry_stat.st_mode)) {
continue;
}
entries->push_back(DirEntry(entry.d_name, kind));
}
}
// Returns true if s ends with suffix.
bool EndsWith(const string& s, const string& suffix) {
if (suffix.length() > s.length()) {
return false;
}
return std::equal(suffix.rbegin(), suffix.rend(), s.rbegin());
}
// Converts string to integer, returns true on success.
bool StrToInt(const string& s, int32* n) {
std::stringstream stream;
stream << s;
stream >> *n;
return stream;
}
// Converts integer to string, returns true on success.
bool IntToStr(int32 n, string* s) {
std::stringstream stream;
stream << n;
stream >> *s;
return stream;
}
// Parses the prefix descriptions file at path, clears and fills the output
// prefixes phone number prefix to description mapping.
// Returns true on success.
bool ParsePrefixes(const string& path, map<int32, string>* prefixes) {
prefixes->clear();
FILE* input = fopen(path.c_str(), "r");
if (!input) {
return false;
}
AutoCloser<FILE> input_closer(&input, fclose);
const int kMaxLineLength = 2*1024;
vector<char> buffer(kMaxLineLength);
vector<char>::iterator begin, end, sep;
string prefix, description;
int32 prefix_code;
while (fgets(&buffer[0], buffer.size(), input)) {
begin = buffer.begin();
end = std::find(begin, buffer.end(), '\0');
if (end == begin) {
continue;
}
--end;
if (*end != '\n') {
if (!feof(input)) {
// A line without LF can only happen at the end of file.
return false;
}
} else {
// Consume the LF.
--end;
}
// Trim and check for comments.
for (; begin != end && std::isspace(*begin); ++begin) {}
for (; end != begin && std::isspace(*(end - 1)); --end) {}
if (begin == end || *begin == '#') {
continue;
}
sep = std::find(begin, end, '|');
if (sep == end) {
continue;
}
prefix = string(begin, sep);
if (!StrToInt(prefix, &prefix_code)) {
return false;
}
(*prefixes)[prefix_code] = string(sep + 1, end);
}
return ferror(input) == 0;
}
// Builds a C string literal from s. The output is enclosed in double-quotes and
// care is taken to escape input quotes and non-ASCII or control characters.
//
// An input string:
// Op\xc3\xa9ra
// becomes:
// "Op""\xc3""\xa9""ra"
string MakeStringLiteral(const string& s) {
std::stringstream buffer;
int prev_is_hex = 0;
buffer << std::hex << std::setfill('0');
buffer << "\"";
for (string::const_iterator it = s.begin(); it != s.end(); ++it) {
const char c = *it;
if (c >= 32 && c < 127) {
if (prev_is_hex == 2) {
buffer << "\"\"";
}
if (c == '\'') {
buffer << "\\";
}
buffer << c;
prev_is_hex = 1;
} else {
if (prev_is_hex != 0) {
buffer << "\"\"";
}
buffer << "\\x" << std::setw(2) << (c < 0 ? c + 256 : c);
prev_is_hex = 2;
}
}
buffer << "\"";
return buffer.str();
}
void WriteStringLiteral(const string& s, FILE* output) {
string literal = MakeStringLiteral(s);
fprintf(output, "%s", literal.c_str());
}
const char kLicense[] =
"// Copyright (C) 2012 The Libphonenumber Authors\n"
"//\n"
"// Licensed under the Apache License, Version 2.0 (the \"License\");\n"
"// you may not use this file except in compliance with the License.\n"
"// You may obtain a copy of the License at\n"
"//\n"
"// http://www.apache.org/licenses/LICENSE-2.0\n"
"//\n"
"// Unless required by applicable law or agreed to in writing, software\n"
"// distributed under the License is distributed on an \"AS IS\" BASIS,\n"
"// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or "
"implied.\n"
"// See the License for the specific language governing permissions and\n"
"// limitations under the License.\n"
"//\n"
"// This file is generated automatically, do not edit it manually.\n"
"\n";
void WriteLicense(FILE* output) {
fprintf(output, "%s", kLicense);
}
const char kI18NNS[] = "i18n";
const char kPhoneNumbersNS[] = "phonenumbers";
void WriteNSHeader(FILE* output) {
fprintf(output, "namespace %s {\n", kI18NNS);
fprintf(output, "namespace %s {\n", kPhoneNumbersNS);
}
void WriteNSFooter(FILE* output) {
fprintf(output, "} // namespace %s\n", kPhoneNumbersNS);
fprintf(output, "} // namespace %s\n", kI18NNS);
}
void WriteCppHeader(const string& base_name, FILE* output) {
fprintf(output, "#include \"phonenumbers/geocoding/%s.h\"\n",
base_name.c_str());
fprintf(output, "\n");
fprintf(output, "#include \"base/basictypes.h\"\n");
fprintf(output, "\n");
}
void WriteArrayAndSize(const string& name, FILE* output) {
fprintf(output, " %s,\n", name.c_str());
fprintf(output, " sizeof(%s)/sizeof(*%s),\n", name.c_str(), name.c_str());
}
// Writes a PrefixDescriptions variable named "name", with its prefixes field
// set to "prefixes_name" variable, its descriptions to "desc_name" and its
// possible_lengths to "possible_lengths_name":
//
// const PrefixDescriptions ${name} = {
// ${prefix_name},
// sizeof(${prefix_name})/sizeof(*${prefix_name}),
// ${desc_name},
// ${possible_lengths_name},
// sizeof(${possible_lengths_name})/sizeof(*${possible_lengths_name}),
// };
//
void WritePrefixDescriptionsDefinition(
const string& name, const string& prefixes_name, const string& desc_name,
const string& possible_lengths_name, FILE* output) {
fprintf(output, "const PrefixDescriptions %s = {\n", name.c_str());
WriteArrayAndSize(prefixes_name, output);
fprintf(output, " %s,\n", desc_name.c_str());
WriteArrayAndSize(possible_lengths_name, output);
fprintf(output, "};\n");
}
// Writes prefixes, descriptions and possible_lengths arrays built from the
// phone number prefix to description mapping "prefixes". Binds these arrays
// in a single PrefixDescriptions variable named "var_name".
//
// const int32 ${var_name}_prefixes[] = {
// 1201,
// 1650,
// };
//
// const char* ${var_name}_descriptions[] = {
// "New Jerse",
// "Kalifornie",
// };
//
// const int32 ${var_name}_possible_lengths[] = {
// 4,
// };
//
// const PrefixDescriptions ${var_name} = {
// ...
// };
//
void WritePrefixDescriptions(const string& var_name, const map<int, string>&
prefixes, FILE* output) {
set<int> possible_lengths;
const string prefixes_name = var_name + "_prefixes";
fprintf(output, "const int32 %s[] = {\n", prefixes_name.c_str());
for (map<int, string>::const_iterator it = prefixes.begin();
it != prefixes.end(); ++it) {
fprintf(output, " %d,\n", it->first);
possible_lengths.insert(static_cast<int>(log10(it->first) + 1));
}
fprintf(output,
"};\n"
"\n");
const string desc_name = var_name + "_descriptions";
fprintf(output, "const char* %s[] = {\n", desc_name.c_str());
for (map<int, string>::const_iterator it = prefixes.begin();
it != prefixes.end(); ++it) {
fprintf(output, " ");
WriteStringLiteral(it->second, output);
fprintf(output, ",\n");
}
fprintf(output,
"};\n"
"\n");
const string possible_lengths_name = var_name + "_possible_lengths";
fprintf(output, "const int32 %s[] = {\n ", possible_lengths_name.c_str());
for (set<int>::const_iterator it = possible_lengths.begin();
it != possible_lengths.end(); ++it) {
fprintf(output, " %d,", *it);
}
fprintf(output,
"\n"
"};\n"
"\n");
WritePrefixDescriptionsDefinition(var_name, prefixes_name, desc_name,
possible_lengths_name, output);
fprintf(output, "\n");
}
// Writes a pair of arrays mapping prefix language code pairs to
// PrefixDescriptions instances. "prefix_var_names" maps language code pairs
// to prefix variable names.
//
// const char* prefix_language_code_pairs[] = {
// "1_de",
// "1_en",
// };
//
// const PrefixDescriptions* prefix_descriptions[] = {
// &prefix_1_de,
// &prefix_1_en,
// };
//
void WritePrefixesDescriptions(const map<string, string>& prefix_var_names,
FILE* output) {
fprintf(output, "const char* prefix_language_code_pairs[] = {\n");
for (map<string, string>::const_iterator it = prefix_var_names.begin();
it != prefix_var_names.end(); ++it) {
fprintf(output, " \"%s\",\n", it->first.c_str());
}
fprintf(output,
"};\n"
"\n"
"const PrefixDescriptions* prefixes_descriptions[] = {\n");
for (map<string, string>::const_iterator it = prefix_var_names.begin();
it != prefix_var_names.end(); ++it) {
fprintf(output, " &%s,\n", it->second.c_str());
}
fprintf(output,
"};\n"
"\n");
}
// For each entry in "languages" mapping a country calling code to a set
// of available languages, writes a sorted array of languages, then wraps it
// into a CountryLanguages instance. Finally, writes a pair of arrays mapping
// country calling codes to CountryLanguages instances.
//
// const char* country_1[] = {
// "de",
// "en",
// };
//
// const CountryLanguages country_1_languages = {
// country_1,
// sizeof(country_1)/sizeof(*country_1),
// };
//
// [...]
//
// const CountryLanguages* country_languages[] = {
// &country_1_languages,
// [...]
// }
//
// const int country_calling_codes[] = {
// 1,
// [...]
// };
//
bool WriteCountryLanguages(const map<int32, set<string> >& languages,
FILE* output) {
vector<string> country_languages_vars;
vector<string> countries;
for (map<int32, set<string> >::const_iterator it = languages.begin();
it != languages.end(); ++it) {
string country_code;
if (!IntToStr(it->first, &country_code)) {
return false;
}
const string country_var = "country_" + country_code;
fprintf(output, "const char* %s[] = {\n", country_var.c_str());
for (set<string>::const_iterator it_lang = it->second.begin();
it_lang != it->second.end(); ++it_lang) {
fprintf(output, " \"%s\",\n", it_lang->c_str());
}
fprintf(output,
"};\n"
"\n");
const string country_languages_var = country_var + "_languages";
fprintf(output, "const CountryLanguages %s = {\n",
country_languages_var.c_str());
WriteArrayAndSize(country_var, output);
fprintf(output,
"};\n"
"\n");
country_languages_vars.push_back(country_languages_var);
countries.push_back(country_code);
}
fprintf(output,
"\n"
"const CountryLanguages* countries_languages[] = {\n");
for (vector<string>::const_iterator
it_languages_var = country_languages_vars.begin();
it_languages_var != country_languages_vars.end(); ++it_languages_var) {
fprintf(output, " &%s,\n", it_languages_var->c_str());
}
fprintf(output,
"};\n"
"\n"
"const int country_calling_codes[] = {\n");
for (vector<string>::const_iterator it_country = countries.begin();
it_country != countries.end(); ++it_country) {
fprintf(output, " %s,\n", it_country->c_str());
}
fprintf(output,
"};\n"
"\n");
return true;
}
// Returns a copy of input where all occurences of pattern are replaced with
// value.
string ReplaceAll(const string& input, const string& pattern,
const string& value) {
string replaced;
std::back_insert_iterator<string> output = std::back_inserter(replaced);
string::const_iterator begin = input.begin(), end = begin;
while (true) {
const size_t pos = input.find(pattern, begin - input.begin());
if (pos == string::npos) {
std::copy(begin, input.end(), output);
break;
}
end = input.begin() + pos;
std::copy(begin, end, output);
std::copy(value.begin(), value.end(), output);
begin = end + pattern.length();
}
return replaced;
}
// Writes data accessor definitions, prefixed with "accessor_prefix".
void WriteAccessorsDefinitions(const string& accessor_prefix, FILE* output) {
string templ =
"const int* get$prefix$_country_calling_codes() {\n"
" return country_calling_codes;\n"
"}\n"
"\n"
"int get$prefix$_country_calling_codes_size() {\n"
" return sizeof(country_calling_codes)\n"
" /sizeof(*country_calling_codes);\n"
"}\n"
"\n"
"const CountryLanguages* get$prefix$_country_languages(int index) {\n"
" return countries_languages[index];\n"
"}\n"
"\n"
"const char** get$prefix$_prefix_language_code_pairs() {\n"
" return prefix_language_code_pairs;\n"
"}\n"
"\n"
"int get$prefix$_prefix_language_code_pairs_size() {\n"
" return sizeof(prefix_language_code_pairs)\n"
" /sizeof(*prefix_language_code_pairs);\n"
"}\n"
"\n"
"const PrefixDescriptions* get$prefix$_prefix_descriptions(int index) {\n"
" return prefixes_descriptions[index];\n"
"}\n";
string defs = ReplaceAll(templ, "$prefix$", accessor_prefix);
fprintf(output, "%s", defs.c_str());
}
// Writes geocoding data .cc file. "data_path" is the path of geocoding textual
// data directory. "base_name" is the base name of the .h/.cc pair, like
// "geocoding_data".
bool WriteSource(const string& data_path, const string& base_name,
const string& accessor_prefix, FILE* output) {
WriteLicense(output);
WriteCppHeader(base_name, output);
WriteNSHeader(output);
fprintf(output,
"namespace {\n"
"\n");
// Enumerate language/script directories.
map<string, string> prefix_vars;
map<int32, set<string> > country_languages;
vector<DirEntry> entries;
if (!ListDirectory(data_path, &entries)) {
fprintf(stderr, "failed to read directory entries");
return false;
}
for (vector<DirEntry>::const_iterator it = entries.begin();
it != entries.end(); ++it) {
if (it->kind() != kDirectory) {
continue;
}
// Enumerate country calling code files.
const string dir_path = data_path + "/" + it->name();
vector<DirEntry> files;
if (!ListDirectory(dir_path, &files)) {
fprintf(stderr, "failed to read file entries\n");
return false;
}
for (vector<DirEntry>::const_iterator it_files = files.begin();
it_files != files.end(); ++it_files) {
const string fname = it_files->name();
if (!EndsWith(fname, ".txt")) {
continue;
}
int32 country_code;
const string country_code_str = fname.substr(0, fname.length() - 4);
if (!StrToInt(country_code_str, &country_code)) {
return false;
}
const string path = dir_path + "/" + fname;
map<int32, string> prefixes;
if (!ParsePrefixes(path, &prefixes)) {
return false;
}
const string prefix_var = "prefix_" + country_code_str + "_" + it->name();
WritePrefixDescriptions(prefix_var, prefixes, output);
prefix_vars[country_code_str + "_" + it->name()] = prefix_var;
country_languages[country_code].insert(it->name());
}
}
WritePrefixesDescriptions(prefix_vars, output);
if (!WriteCountryLanguages(country_languages, output)) {
return false;
}
fprintf(output, "} // namespace\n");
fprintf(output, "\n");
WriteAccessorsDefinitions(accessor_prefix, output);
WriteNSFooter(output);
return ferror(output) == 0;
}
int PrintHelp(const string& message) {
fprintf(stderr, "error: %s\n", message.c_str());
fprintf(stderr, "generate_geocoding_data DATADIR CCPATH");
return 1;
}
int Main(int argc, const char* argv[]) {
if (argc < 2) {
return PrintHelp("geocoding data root directory expected");
}
if (argc < 3) {
return PrintHelp("output source path expected");
}
string accessor_prefix = "";
if (argc > 3) {
accessor_prefix = argv[3];
}
const string root_path(argv[1]);
string source_path(argv[2]);
std::replace(source_path.begin(), source_path.end(), '\\', '/');
string base_name = source_path;
if (base_name.rfind('/') != string::npos) {
base_name = base_name.substr(base_name.rfind('/') + 1);
}
base_name = base_name.substr(0, base_name.rfind('.'));
FILE* source_fp = fopen(source_path.c_str(), "w");
if (!source_fp) {
fprintf(stderr, "failed to open %s\n", source_path.c_str());
return 1;
}
AutoCloser<FILE> source_closer(&source_fp, fclose);
if (!WriteSource(root_path, base_name, accessor_prefix,
source_fp)) {
return 1;
}
return 0;
}
} // namespace phonenumbers
} // namespace i18n

+ 34
- 0
tools/cpp/src/cpp-build/generate_geocoding_data.h View File

@ -0,0 +1,34 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#ifndef I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H
#define I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H
#include <string>
namespace i18n {
namespace phonenumbers {
using std::string;
string MakeStringLiteral(const string& s);
int Main(int argc, const char* argv[]);
} // namespace phonenumbers
} // namespace i18n
#endif // I18N_PHONENUMBERS_GENERATE_GEOCODING_DATA_H

+ 21
- 0
tools/cpp/src/cpp-build/generate_geocoding_data_main.cc View File

@ -0,0 +1,21 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#include "cpp-build/generate_geocoding_data.h"
int main(int argc, const char* argv[]) {
return i18n::phonenumbers::Main(argc, argv);
}

+ 31
- 0
tools/cpp/test/cpp-build/generate_geocoding_data_test.cc View File

@ -0,0 +1,31 @@
// Copyright (C) 2012 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Patrick Mezard
#include "cpp-build/generate_geocoding_data.h"
#include <gtest/gtest.h>
namespace i18n {
namespace phonenumbers {
TEST(GenerateGeocodingDataTest, TestMakeStringLiteral) {
EXPECT_EQ("\"\"", MakeStringLiteral(""));
EXPECT_EQ("\"Op\"\"\\xc3\"\"\\xa9\"\"ra\"",
MakeStringLiteral("Op\xc3\xa9ra"));
}
} // namespace phonenumbers
} // namespace i18n

+ 21
- 0
tools/cpp/test/cpp-build/run_tests.cc View File

@ -0,0 +1,21 @@
// Copyright (C) 2011 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

Loading…
Cancel
Save