|
|
/*
|
|
|
* Copyright (C) 2011 The Libphonenumber Authors
|
|
|
*
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
* You may obtain a copy of the License at
|
|
|
*
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
*
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
* See the License for the specific language governing permissions and
|
|
|
* limitations under the License.
|
|
|
*/
|
|
|
|
|
|
package com.google.i18n.phonenumbers;
|
|
|
|
|
|
import java.io.BufferedOutputStream;
|
|
|
import java.io.BufferedReader;
|
|
|
import java.io.FileInputStream;
|
|
|
import java.io.FileOutputStream;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.InputStreamReader;
|
|
|
import java.io.OutputStream;
|
|
|
import java.io.PrintWriter;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
import java.util.SortedMap;
|
|
|
import java.util.TreeMap;
|
|
|
import java.util.logging.Logger;
|
|
|
|
|
|
/**
|
|
|
* Utility class that makes the geocoding data as small as possible. This class assumes the
|
|
|
* geocoding data provided as input doesn't contain any gaps thus should not be used with incomplete
|
|
|
* data (missing prefixes).
|
|
|
* <pre>
|
|
|
* Example: Can be combined as:
|
|
|
* 33131|Paris 331|Paris
|
|
|
* 33132|Paris 334|Marseille
|
|
|
* 3341|Marseille
|
|
|
* </pre>
|
|
|
*
|
|
|
* @author Philippe Liard
|
|
|
*/
|
|
|
public class CombineGeoData {
|
|
|
private final InputStream inputStream;
|
|
|
private final OutputStream outputStream;
|
|
|
private final String outputLineSeparator;
|
|
|
private static final Logger LOGGER = Logger.getLogger(CombineGeoData.class.getName());
|
|
|
|
|
|
public CombineGeoData(InputStream inputStream, OutputStream outputStream, String lineSeparator) {
|
|
|
this.inputStream = inputStream;
|
|
|
this.outputStream = outputStream;
|
|
|
this.outputLineSeparator = lineSeparator;
|
|
|
}
|
|
|
|
|
|
public CombineGeoData(InputStream inputStream, OutputStream outputStream) {
|
|
|
this(inputStream, outputStream, System.getProperty("line.separator"));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Utility class that contains two indexes (start and end).
|
|
|
*/
|
|
|
static class Range {
|
|
|
public final int start;
|
|
|
public final int end;
|
|
|
|
|
|
public Range(int start, int end) {
|
|
|
this.start = start;
|
|
|
this.end = end;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Parses the input text file expected to contain lines written as 'prefix|description'. Note that
|
|
|
* description can be empty.
|
|
|
*
|
|
|
* @return the map of phone prefix data parsed.
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
private SortedMap<String, String> parseInput() throws IOException {
|
|
|
SortedMap<String, String> outputMap = new TreeMap<String, String>();
|
|
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
|
|
|
|
|
|
for (String line; (line = bufferedReader.readLine()) != null; ) {
|
|
|
int indexOfPipe = line.indexOf('|');
|
|
|
if (indexOfPipe == -1) {
|
|
|
continue;
|
|
|
}
|
|
|
outputMap.put(line.substring(0, indexOfPipe), line.substring(indexOfPipe + 1));
|
|
|
}
|
|
|
return outputMap;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Creates a sorted array of phone number prefixes as strings from the provided phone number
|
|
|
* prefix map.
|
|
|
*
|
|
|
* @return the array of phone number prefixes sorted by string.
|
|
|
*/
|
|
|
static String[] createSortedPrefixArray(SortedMap<String, String> phonePrefixMap) {
|
|
|
String[] sortedPrefixes = new String[phonePrefixMap.size()];
|
|
|
phonePrefixMap.keySet().toArray(sortedPrefixes);
|
|
|
return sortedPrefixes;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Finds the end index of the range of phone number prefixes starting at the provided index.
|
|
|
* A range ends when a different description or prefix divided by 10 is encountered.
|
|
|
*
|
|
|
* @param prefixes the array of phone number prefixes sorted by string
|
|
|
* @param phonePrefixMap the map associating phone number prefixes and descriptions
|
|
|
* @param start the start index of the prefixes array
|
|
|
* @return the index of the end of the range starting at the provided index
|
|
|
*/
|
|
|
static int findRangeEnd(String[] prefixes, Map<String, String> phonePrefixMap, int start) {
|
|
|
String previousPrefix = prefixes[start];
|
|
|
int previousPrefixAsInt = Integer.parseInt(previousPrefix);
|
|
|
String previousLocation = phonePrefixMap.get(previousPrefix);
|
|
|
|
|
|
for (int i = start; i < prefixes.length; i++) {
|
|
|
String currentPrefix = prefixes[i];
|
|
|
String currentLocation = phonePrefixMap.get(currentPrefix);
|
|
|
if (!currentLocation.equals(previousLocation) ||
|
|
|
(Integer.parseInt(currentPrefix) / 10 != previousPrefixAsInt / 10)) {
|
|
|
return i - 1;
|
|
|
}
|
|
|
}
|
|
|
return prefixes.length - 1;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Splits the provided array of prefixes into an array of ranges. A range contains the start and
|
|
|
* end indexes of a set of mappings that share the same description and have the same prefix minus
|
|
|
* the last digit.
|
|
|
*
|
|
|
* @param prefixes the array of phone number prefixes sorted by string
|
|
|
* @param phonePrefixMap the map associating phone number prefixes and descriptions
|
|
|
* @return the list of ranges
|
|
|
*/
|
|
|
static List<Range> createRanges(String[] prefixes, Map<String, String> phonePrefixMap) {
|
|
|
List<Range> ranges = new ArrayList<Range>();
|
|
|
int index = 0;
|
|
|
int phonePrefixMapSize = phonePrefixMap.size();
|
|
|
|
|
|
while (index < phonePrefixMapSize) {
|
|
|
int rangeEnd = findRangeEnd(prefixes, phonePrefixMap, index);
|
|
|
ranges.add(new Range(index, rangeEnd));
|
|
|
index = rangeEnd + 1;
|
|
|
}
|
|
|
return ranges;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Checks whether the provided candidate prefix conflicts with the prefixes contained in the
|
|
|
* provided range. A conflict occurs if the provided prefix covers (is a prefix of) one of the
|
|
|
* prefixes contained in the range.
|
|
|
*
|
|
|
* @param prefixes the array of phone number prefixes sorted by string
|
|
|
* @param candidate the candidate phone number prefix
|
|
|
* @param start the start of the range
|
|
|
* @param end the end of the range
|
|
|
* @return whether the candidate prefix conflicts with the prefixes contained in the range
|
|
|
*/
|
|
|
static boolean findConflict(String[] prefixes, int candidate, int start, int end) {
|
|
|
String candidateAsString = String.valueOf(candidate);
|
|
|
for (int i = start; i <= end; i++) {
|
|
|
String prefix = prefixes[i];
|
|
|
if (prefix.startsWith(candidateAsString)) {
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Checks whether the provided candidate prefix conflicts with the prefixes contained in the
|
|
|
* ranges adjacent (before and after the provided range) of the provided range.
|
|
|
*
|
|
|
* @param ranges the list of ranges
|
|
|
* @param rangeIndex the index of the range in which the conflict search occurs
|
|
|
* @param prefixes the array of phone number prefixes sorted by string
|
|
|
* @param candidate the candidate phone number prefix
|
|
|
* @return whether a conflict was found in the provided range
|
|
|
*/
|
|
|
static boolean hasConflict(List<Range> ranges, int rangeIndex, String[] prefixes, int candidate) {
|
|
|
if (rangeIndex > 0) {
|
|
|
Range previousRange = ranges.get(rangeIndex - 1);
|
|
|
if (findConflict(prefixes, candidate, previousRange.start, previousRange.end)) {
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
if (rangeIndex < ranges.size() - 1) {
|
|
|
Range nextRange = ranges.get(rangeIndex + 1);
|
|
|
if (findConflict(prefixes, candidate, nextRange.start, nextRange.end)) {
|
|
|
return true;
|
|
|
}
|
|
|
}
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Combines the mappings contained in the provided map. A new combined map is returned as a result
|
|
|
* in case any combination occurred. Otherwise (if no combination occurred) the same map (same
|
|
|
* identity) is returned. Note that this method performs a 'single step' (i.e performs only one
|
|
|
* combination iteration).
|
|
|
*/
|
|
|
static SortedMap<String, String> combine(SortedMap<String, String> phonePrefixMap) {
|
|
|
String[] prefixes = createSortedPrefixArray(phonePrefixMap);
|
|
|
List<Range> ranges = createRanges(prefixes, phonePrefixMap);
|
|
|
Map<Integer /* range index */, Integer> combinedPrefixes = new HashMap<Integer, Integer>();
|
|
|
int rangeIndex = 0;
|
|
|
|
|
|
for (Range range : ranges) {
|
|
|
int prefixCandidate = Integer.parseInt(prefixes[range.start]) / 10;
|
|
|
if (prefixCandidate != 0 && !hasConflict(ranges, rangeIndex, prefixes, prefixCandidate)) {
|
|
|
combinedPrefixes.put(rangeIndex, prefixCandidate);
|
|
|
}
|
|
|
++rangeIndex;
|
|
|
}
|
|
|
if (combinedPrefixes.size() == 0) {
|
|
|
return phonePrefixMap;
|
|
|
}
|
|
|
SortedMap<String, String> combinedMap = new TreeMap<String, String>();
|
|
|
rangeIndex = 0;
|
|
|
for (Range range : ranges) {
|
|
|
Integer combinedRange = combinedPrefixes.get(rangeIndex++);
|
|
|
if (combinedRange != null) {
|
|
|
String firstPrefixOfRange = prefixes[range.start];
|
|
|
combinedMap.put(String.valueOf(combinedRange), phonePrefixMap.get(firstPrefixOfRange));
|
|
|
} else {
|
|
|
for (int i = range.start; i <= range.end; i++) {
|
|
|
String prefix = prefixes[i];
|
|
|
combinedMap.put(prefix, phonePrefixMap.get(prefix));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
return combinedMap;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Combines the provided map associating phone number prefixes and descriptions.
|
|
|
*
|
|
|
* @return the combined map
|
|
|
*/
|
|
|
static SortedMap<String, String> combineMultipleTimes(SortedMap<String, String> phonePrefixMap) {
|
|
|
SortedMap<String, String> previousMap = null;
|
|
|
while (phonePrefixMap != previousMap) {
|
|
|
previousMap = phonePrefixMap;
|
|
|
phonePrefixMap = combine(phonePrefixMap);
|
|
|
}
|
|
|
return phonePrefixMap;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Combines the geocoding data read from the provided input stream and writes it as a result to
|
|
|
* the provided output stream. Uses the provided string as the line separator.
|
|
|
*/
|
|
|
public void run() throws IOException {
|
|
|
SortedMap<String, String> phonePrefixMap = parseInput();
|
|
|
phonePrefixMap = combineMultipleTimes(phonePrefixMap);
|
|
|
PrintWriter printWriter = new PrintWriter(new BufferedOutputStream(outputStream));
|
|
|
for (Map.Entry<String, String> mapping : phonePrefixMap.entrySet()) {
|
|
|
printWriter.printf("%s|%s%s", mapping.getKey(), mapping.getValue(), outputLineSeparator);
|
|
|
}
|
|
|
printWriter.flush();
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
if (args.length != 2) {
|
|
|
LOGGER.severe("usage: java -jar combine-geodata.jar /path/to/input /path/to/output");
|
|
|
System.exit(1);
|
|
|
}
|
|
|
try {
|
|
|
CombineGeoData combineGeoData =
|
|
|
new CombineGeoData(new FileInputStream(args[0]), new FileOutputStream(args[1]));
|
|
|
combineGeoData.run();
|
|
|
} catch (Exception e) {
|
|
|
LOGGER.severe(e.getMessage());
|
|
|
System.exit(1);
|
|
|
}
|
|
|
}
|
|
|
}
|