|
|
/*
|
|
|
* Copyright (C) 2011 Google Inc.
|
|
|
*
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
* You may obtain a copy of the License at
|
|
|
*
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
*
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
* See the License for the specific language governing permissions and
|
|
|
* limitations under the License.
|
|
|
*/
|
|
|
|
|
|
package com.google.i18n.phonenumbers.geocoding;
|
|
|
|
|
|
import java.io.BufferedInputStream;
|
|
|
import java.io.BufferedReader;
|
|
|
import java.io.Closeable;
|
|
|
import java.io.File;
|
|
|
import java.io.FileInputStream;
|
|
|
import java.io.FileNotFoundException;
|
|
|
import java.io.FileOutputStream;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.InputStreamReader;
|
|
|
import java.io.ObjectOutputStream;
|
|
|
import java.io.OutputStream;
|
|
|
import java.nio.charset.Charset;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.HashSet;
|
|
|
import java.util.List;
|
|
|
import java.util.Set;
|
|
|
import java.util.SortedMap;
|
|
|
import java.util.TreeMap;
|
|
|
import java.util.logging.Level;
|
|
|
import java.util.logging.Logger;
|
|
|
|
|
|
/**
|
|
|
* A utility that generates the binary serialization of the area code/location mappings from
|
|
|
* human-readable text files. It also generates a configuration file which contains information on
|
|
|
* data files available for use.
|
|
|
*
|
|
|
* <p> The text files must be located in sub-directories of the provided input path. For each input
|
|
|
* file inputPath/lang/countryCallingCode.txt the corresponding binary file is generated as
|
|
|
* outputPath/countryCallingCode_lang.
|
|
|
*
|
|
|
* @author Philippe Liard
|
|
|
*/
|
|
|
public class GenerateAreaCodeData {
|
|
|
// The path to the input directory containing the languages directories.
|
|
|
private final File inputPath;
|
|
|
// The path to the output directory.
|
|
|
private final File outputPath;
|
|
|
// Whether the data is generated for testing.
|
|
|
private final boolean forTesting;
|
|
|
|
|
|
private static final Logger LOGGER = Logger.getLogger(GenerateAreaCodeData.class.getName());
|
|
|
|
|
|
public GenerateAreaCodeData(File inputPath, File outputPath, boolean forTesting)
|
|
|
throws IOException {
|
|
|
if (!inputPath.isDirectory()) {
|
|
|
throw new IOException("The provided input path does not exist: " +
|
|
|
inputPath.getAbsolutePath());
|
|
|
}
|
|
|
if (outputPath.exists()) {
|
|
|
if (!outputPath.isDirectory()) {
|
|
|
throw new IOException("Expected directory: " + outputPath.getAbsolutePath());
|
|
|
}
|
|
|
} else {
|
|
|
if (!outputPath.mkdirs()) {
|
|
|
throw new IOException("Could not create directory " + outputPath.getAbsolutePath());
|
|
|
}
|
|
|
}
|
|
|
this.inputPath = inputPath;
|
|
|
this.outputPath = outputPath;
|
|
|
this.forTesting = forTesting;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Closes the provided file and log any potential IOException.
|
|
|
*/
|
|
|
private static void closeFile(Closeable closeable) {
|
|
|
if (closeable == null) {
|
|
|
return;
|
|
|
}
|
|
|
try {
|
|
|
closeable.close();
|
|
|
} catch (IOException e) {
|
|
|
LOGGER.log(Level.WARNING, e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Converts the text data read from the provided input stream to the Java binary serialization
|
|
|
* format. The resulting data is written to the provided output stream.
|
|
|
*
|
|
|
* @VisibleForTesting
|
|
|
*/
|
|
|
static void convertData(InputStream input, OutputStream output) throws IOException {
|
|
|
SortedMap<Integer, String> areaCodeMapTemp = new TreeMap<Integer, String>();
|
|
|
BufferedReader bufferedReader =
|
|
|
new BufferedReader(new InputStreamReader(
|
|
|
new BufferedInputStream(input), Charset.forName("UTF-8")));
|
|
|
for (String line; (line = bufferedReader.readLine()) != null; ) {
|
|
|
line = line.trim();
|
|
|
if (line.length() == 0 || line.startsWith("#")) {
|
|
|
continue;
|
|
|
}
|
|
|
int indexOfPipe = line.indexOf('|');
|
|
|
if (indexOfPipe == -1) {
|
|
|
LOGGER.log(Level.WARNING, "Malformatted data: expected '|'");
|
|
|
continue;
|
|
|
}
|
|
|
String areaCode = line.substring(0, indexOfPipe);
|
|
|
if (indexOfPipe == line.length() - 1) {
|
|
|
LOGGER.log(Level.WARNING, "Missing location for area code " + areaCode);
|
|
|
continue;
|
|
|
}
|
|
|
String location = line.substring(indexOfPipe + 1);
|
|
|
areaCodeMapTemp.put(Integer.parseInt(areaCode), location);
|
|
|
}
|
|
|
// Build the corresponding area code map and serialize it to the binary format.
|
|
|
AreaCodeMap areaCodeMap = new AreaCodeMap();
|
|
|
areaCodeMap.readAreaCodeMap(areaCodeMapTemp);
|
|
|
ObjectOutputStream objectOutputStream = new ObjectOutputStream(output);
|
|
|
areaCodeMap.writeExternal(objectOutputStream);
|
|
|
objectOutputStream.flush();
|
|
|
}
|
|
|
|
|
|
private class Pair<A, B> {
|
|
|
public final A first;
|
|
|
public final B second;
|
|
|
|
|
|
public Pair(A first, B second) {
|
|
|
this.first = first;
|
|
|
this.second = second;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Creates the input country code text file/output binary file (named countryCode_language)
|
|
|
* mappings.
|
|
|
*/
|
|
|
private List<Pair<File, File>> createInputOutputFileMappings() {
|
|
|
List<Pair<File, File>> mappings = new ArrayList<Pair<File, File>>();
|
|
|
File[] languageDirectories = inputPath.listFiles();
|
|
|
|
|
|
for (File languageDirectory : languageDirectories) {
|
|
|
if (!languageDirectory.isDirectory() || languageDirectory.isHidden()) {
|
|
|
continue;
|
|
|
}
|
|
|
File[] countryCodeFiles = languageDirectory.listFiles();
|
|
|
|
|
|
for (File countryCodeFile : countryCodeFiles) {
|
|
|
if (countryCodeFile.isHidden()) {
|
|
|
continue;
|
|
|
}
|
|
|
String countryCodeFileName = countryCodeFile.getName();
|
|
|
int indexOfDot = countryCodeFileName.indexOf('.');
|
|
|
if (indexOfDot == -1) {
|
|
|
LOGGER.log(Level.WARNING,
|
|
|
String.format("unexpected file name %s, expected pattern .*\\.txt",
|
|
|
countryCodeFileName));
|
|
|
continue;
|
|
|
}
|
|
|
String countryCode = countryCodeFileName.substring(0, indexOfDot);
|
|
|
if (!countryCode.matches("\\d+")) {
|
|
|
LOGGER.log(Level.WARNING, "ignoring unexpected file " + countryCodeFileName);
|
|
|
continue;
|
|
|
}
|
|
|
mappings.add(new Pair<File, File>(
|
|
|
countryCodeFile,
|
|
|
new File(outputPath,
|
|
|
String.format("%s_%s", countryCode, languageDirectory.getName()))));
|
|
|
}
|
|
|
}
|
|
|
return mappings;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Adds a country code/language mapping to the provided map. The country code and language are
|
|
|
* generated from the provided file name previously used to output the area code/location mappings
|
|
|
* for the given country.
|
|
|
*
|
|
|
* @VisibleForTesting
|
|
|
*/
|
|
|
static void addConfigurationMapping(SortedMap<Integer, Set<String>> availableDataFiles,
|
|
|
File outputAreaCodeMappingsFile) {
|
|
|
String outputAreaCodeMappingsFileName = outputAreaCodeMappingsFile.getName();
|
|
|
int indexOfUnderscore = outputAreaCodeMappingsFileName.indexOf('_');
|
|
|
int countryCode = Integer.parseInt(
|
|
|
outputAreaCodeMappingsFileName.substring(0, indexOfUnderscore));
|
|
|
String language = outputAreaCodeMappingsFileName.substring(indexOfUnderscore + 1);
|
|
|
|
|
|
Set<String> languageSet = availableDataFiles.get(countryCode);
|
|
|
if (languageSet == null) {
|
|
|
languageSet = new HashSet<String>();
|
|
|
availableDataFiles.put(countryCode, languageSet);
|
|
|
}
|
|
|
languageSet.add(language);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Outputs the binary configuration file mapping country codes to language strings.
|
|
|
*
|
|
|
* @VisibleForTesting
|
|
|
*/
|
|
|
static void outputBinaryConfiguration(SortedMap<Integer, Set<String>> availableDataFiles,
|
|
|
OutputStream outputStream) throws IOException {
|
|
|
MappingFileProvider mappingFileProvider = new MappingFileProvider();
|
|
|
mappingFileProvider.readFileConfigs(availableDataFiles);
|
|
|
ObjectOutputStream objectOutputStream = new ObjectOutputStream(outputStream);
|
|
|
mappingFileProvider.writeExternal(objectOutputStream);
|
|
|
objectOutputStream.flush();
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Runs the area code data generator.
|
|
|
*
|
|
|
* @throws IOException
|
|
|
* @throws FileNotFoundException
|
|
|
*/
|
|
|
public void run() throws FileNotFoundException, IOException {
|
|
|
List<Pair<File, File>> inputOutputMappings = createInputOutputFileMappings();
|
|
|
SortedMap<Integer, Set<String>> availableDataFiles = new TreeMap<Integer, Set<String>>();
|
|
|
|
|
|
for (Pair<File, File> inputOutputMapping : inputOutputMappings) {
|
|
|
FileInputStream fileInputStream = null;
|
|
|
FileOutputStream fileOutputStream = null;
|
|
|
|
|
|
try {
|
|
|
File textFile = inputOutputMapping.first;
|
|
|
File binaryFile = inputOutputMapping.second;
|
|
|
fileInputStream = new FileInputStream(textFile);
|
|
|
fileOutputStream = new FileOutputStream(binaryFile);
|
|
|
convertData(fileInputStream, fileOutputStream);
|
|
|
addConfigurationMapping(availableDataFiles, inputOutputMapping.second);
|
|
|
} catch (IOException e) {
|
|
|
LOGGER.log(Level.SEVERE, e.getMessage());
|
|
|
continue;
|
|
|
} finally {
|
|
|
closeFile(fileInputStream);
|
|
|
closeFile(fileOutputStream);
|
|
|
}
|
|
|
}
|
|
|
// Output the binary configuration file mapping country codes to languages.
|
|
|
FileOutputStream fileOutputStream = null;
|
|
|
|
|
|
try {
|
|
|
File configFile = new File(outputPath, "config");
|
|
|
fileOutputStream = new FileOutputStream(configFile);
|
|
|
outputBinaryConfiguration(availableDataFiles, fileOutputStream);
|
|
|
} finally {
|
|
|
closeFile(fileOutputStream);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
if (args.length != 3) {
|
|
|
LOGGER.log(Level.SEVERE,
|
|
|
"usage: GenerateAreaCodeData /path/to/input/directory /path/to/output/directory" +
|
|
|
" forTesting");
|
|
|
System.exit(1);
|
|
|
}
|
|
|
try {
|
|
|
GenerateAreaCodeData generateAreaCodeData =
|
|
|
new GenerateAreaCodeData(new File(args[0]), new File(args[1]),
|
|
|
Boolean.parseBoolean(args[2]));
|
|
|
generateAreaCodeData.run();
|
|
|
} catch (IOException e) {
|
|
|
LOGGER.log(Level.SEVERE, e.getMessage());
|
|
|
System.exit(1);
|
|
|
}
|
|
|
}
|
|
|
}
|