Browse Source

Project import generated by Copybara. (#2494)

PiperOrigin-RevId: 319856719

Co-authored-by: The libphonenumber Team <noreply@google.com>
pull/3882/head
David Beaumont 6 years ago
committed by GitHub
parent
commit
54ba70db86
67 changed files with 14323 additions and 2 deletions
  1. +1
    -2
      metadata/README.md
  2. +311
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/DigitSequence.java
  3. +65
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/MetadataKey.java
  4. +351
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/PrefixTree.java
  5. +752
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeSpecification.java
  6. +1342
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeTree.java
  7. +194
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeTreeFactorizer.java
  8. +112
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
  9. +99
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/i18n/PhoneRegion.java
  10. +60
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/i18n/SimpleLanguageTag.java
  11. +94
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/AltFormatSpec.java
  12. +146
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/AltFormatsSchema.java
  13. +132
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/CommentsSchema.java
  14. +236
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/CsvData.java
  15. +126
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
  16. +68
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
  17. +637
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatSpec.java
  18. +96
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
  19. +36
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/MetadataException.java
  20. +168
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/MetadataTableSchema.java
  21. +750
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/NumberingScheme.java
  22. +63
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/NumberingSchemes.java
  23. +88
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/OperatorsTableSchema.java
  24. +396
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
  25. +228
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ShortcodesTableSchema.java
  26. +154
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/XmlRangesSchema.java
  27. +92
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Assignment.java
  28. +131
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Change.java
  29. +217
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Column.java
  30. +100
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/ColumnGroup.java
  31. +74
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvKeyMarshaller.java
  32. +241
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
  33. +108
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvSchema.java
  34. +589
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
  35. +99
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTableCollector.java
  36. +100
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/DiffKey.java
  37. +186
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/DisjointRangeMap.java
  38. +116
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/MultiValue.java
  39. +74
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeException.java
  40. +215
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeKey.java
  41. +951
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
  42. +169
    -0
      metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
  43. +69
    -0
      metadata/src/main/proto/enums.proto
  44. +82
    -0
      metadata/src/main/proto/types.proto
  45. +134
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/DigitSequenceTest.java
  46. +213
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/PrefixTreeTest.java
  47. +308
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
  48. +101
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeTreeFactorizerTest.java
  49. +555
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeTreeTest.java
  50. +57
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/i18n/PhoneRegionTest.java
  51. +42
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/i18n/SimpleLanguageTagTest.java
  52. +82
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/AltFormatSpecTest.java
  53. +111
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/AltFormatsSchemaTest.java
  54. +156
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/CommentsSchemaTest.java
  55. +160
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/FormatSpecTest.java
  56. +70
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/AssignmentTest.java
  57. +71
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ChangeTest.java
  58. +58
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ColumnGroupTest.java
  59. +93
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ColumnTest.java
  60. +177
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/CsvParserTest.java
  61. +275
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/CsvTableTest.java
  62. +132
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/RangeKeyTest.java
  63. +412
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/RangeTableTest.java
  64. +71
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/SchemaTest.java
  65. +132
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/RangeTableSubject.java
  66. +118
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/RangeTreeSubject.java
  67. +477
    -0
      metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/TestNumberingScheme.java

+ 1
- 2
metadata/README.md View File

@ -24,6 +24,5 @@ inevitable.
Patches and pull requests cannot be accepted directly on this codebase, so if
you find an issue with these libraries, please open a new issue for it. However
we do not accept feature requests, or provide answeres or technical support for
we do not accept feature requests, or provide answers or technical support for
anything in this directory at this time.

+ 311
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/DigitSequence.java View File

@ -0,0 +1,311 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import com.google.common.base.CharMatcher;
import com.google.common.base.Preconditions;
import com.google.common.collect.DiscreteDomain;
import com.google.errorprone.annotations.Immutable;
import com.google.errorprone.annotations.concurrent.LazyInit;
/**
* A small, fast, immutable representation of a phone number digit sequence. This class represents
* contiguous sequences of digits in phone numbers, such as "123" or "000". It does not encode
* semantic information such as the region code to which a number belongs or perform any semantic
* validation. It can be thought of as equivalent to a String containing only the ASCII digits
* {@code '0'} to {@code '9'}.
*/
@Immutable
public final class DigitSequence implements Comparable<DigitSequence> {
private static final CharMatcher ASCII_DIGITS = CharMatcher.inRange('0', '9');
// IMPORTANT
// This cannot be more than 18 to avoid overflowing a signed long (it must be signed due to the
// calculation of the "distance" metric which can be +ve or -ve).
//
// If it does need to be raised, this whole class probably needs to be rethought. ITU recommends
// a limit of 15 digits (not including country calling code) but there are currently 2 examples
// in the metadata XML file which exceed this (Japan) where some non-international toll free
// numbers (those starting with 0037 and 0036) can be up to 17 digits (still okay) in the current
// metadata but there's a note saying that they may even extend to 21 digits!!
//
// An appropriate way to split this class would be to make a closed type hierarchy with 2
// separate implementations, one using a long to encode the numbers and one using BigInteger (or
// maybe just encoding digits in a string directly).
// The good thing about this approach is that instances of the different implementations could
// never be equal to each other. This is likely not a difficult refactoring, although the Domain
// class will also need to be considered carefully and details like the "index()" value will have
// to change completely between the classes.
//
/** The maximum number of digits which can be held in a digit sequence. */
public static final int MAX_DIGITS = 18;
// Simple lookup of powers-of-10 for all valid sequence lengths (0 - MAX_DIGITS).
private static final long[] POWERS_OF_TEN = new long[MAX_DIGITS + 1];
static {
// 1, 10, 100, 1000, 10000 ...
POWERS_OF_TEN[0] = 1;
for (int n = 1; n < POWERS_OF_TEN.length; n++) {
POWERS_OF_TEN[n] = 10 * POWERS_OF_TEN[n - 1];
}
}
// A table of adjustment values to convert a digit sequence into an absolute index in the
// integer domain, to impose a true lexicographical ordering. The value of a digit sequence is
// adjusted by the number of additional elements in the phone number domain which cannot be
// represented as integers (the empty sequence or anything with leading zeros). This results in
// an absolute ordering of all digit sequences. For example the digit sequence "0123" is length
// 4, and there are 111 additional additional elements that come before 4-length sequences
// ("", "00"-"09", "000"-"099"), so its index is {@code 123 + 111 = 234}.
// To calculate this value dynamically for any length N, offset=floor(10^N / 9).
private static final long[] DOMAIN_OFFSET = new long[MAX_DIGITS + 1];
static {
// 0, 1, 11, 111, 1111 ...
for (int n = 1; n < DOMAIN_OFFSET.length; n++) {
DOMAIN_OFFSET[n] = 10 * DOMAIN_OFFSET[n - 1] + 1;
}
}
private static final DigitSequence EMPTY = new DigitSequence(0, 0L);
private static final DigitSequence[] SINGLETON_DIGITS = new DigitSequence[] {
new DigitSequence(1, 0L),
new DigitSequence(1, 1L),
new DigitSequence(1, 2L),
new DigitSequence(1, 3L),
new DigitSequence(1, 4L),
new DigitSequence(1, 5L),
new DigitSequence(1, 6L),
new DigitSequence(1, 7L),
new DigitSequence(1, 8L),
new DigitSequence(1, 9L),
};
// Simple helper to return {@code 10^n} for all valid sequence lengths.
private static long pow10(int n) {
return POWERS_OF_TEN[n];
}
/**
* Returns the domain in which phone number digit sequences exist. This is needed when creating
* canonical {@link com.google.common.collect.Range Ranges} of digit-sequences.
*/
public static DiscreteDomain<DigitSequence> domain() {
return Domain.INSTANCE;
}
private static final class Domain extends DiscreteDomain<DigitSequence> {
private static final Domain INSTANCE = new Domain();
private static final DigitSequence MIN = EMPTY;
private static final DigitSequence MAX = DigitSequence.of("999999999999999999");
@Override
public DigitSequence next(DigitSequence num) {
long next = num.value + 1;
if (next < pow10(num.length)) {
return new DigitSequence(num.length, next);
} else {
int len = num.length + 1;
return (len <= MAX_DIGITS) ? new DigitSequence(len, 0) : null;
}
}
@Override
public DigitSequence previous(DigitSequence num) {
long prev = num.value - 1;
if (prev >= 0) {
return new DigitSequence(num.length, prev);
} else {
int len = num.length - 1;
return (len >= 0) ? new DigitSequence(len, pow10(len) - 1) : null;
}
}
@Override
public long distance(DigitSequence start, DigitSequence end) {
// The indices get up to 19 digits but can't overflow Long.MAX_VALUE, so they can be safely
// subtracted to get a signed long "distance" without risk of over-/under- flow.
return end.index() - start.index();
}
@Override
public DigitSequence minValue() {
return MIN;
}
@Override
public DigitSequence maxValue() {
return MAX;
}
}
/** Returns the digit sequence of length one representing the given digit value. */
public static DigitSequence singleton(int digit) {
Preconditions.checkArgument(0 <= digit && digit <= 9, "invalid digit value: %s", digit);
return SINGLETON_DIGITS[digit];
}
/**
* Returns the empty digit sequence. This is useful in special cases where you need to build up
* a digit sequence starting from nothing).
*/
public static DigitSequence empty() {
return EMPTY;
}
/** Returns a digit sequence for the given string (e.g. "012345"). */
public static DigitSequence of(String digits) {
Preconditions.checkArgument(digits.length() <= MAX_DIGITS,
"Digit string too long: '%s'", digits);
Preconditions.checkArgument(ASCII_DIGITS.matchesAllOf(digits),
"Digit string contains non-digit characters: '%s'", digits);
return digits.isEmpty() ? empty() : new DigitSequence(digits.length(), Long.parseLong(digits));
}
/**
* Returns a digit sequence of {@code length} containing only the digit '0'. This is useful when
* performing range calculations to determine the smallest digit sequence in a block.
*/
public static DigitSequence zeros(int length) {
return new DigitSequence(length, 0L);
}
/**
* Returns a digit sequence of {@code length} containing only the digit '9'. This is useful when
* performing range calculations to determine the largest digit sequence in a block.
*/
public static DigitSequence nines(int length) {
return new DigitSequence(length, pow10(length) - 1);
}
// The overall length of the digit sequence, including any leading zeros.
private final int length;
// The decimal value of the digit sequence (excluding leading zeros, obviously).
private final long value;
// Cached toString() representation (toString() of DigitSequence is used in comparisons for
// sorting to achieve lexicographical ordering, which means it gets churned a lot).
@LazyInit
private String toString;
// Called directly from RangeSpecification.
DigitSequence(int length, long value) {
// Don't check for -ve length as this should never happen and will blow up in pow10() anyway.
Preconditions.checkArgument(length <= MAX_DIGITS,
"Digit sequence too long [%s digits]", length);
// This should not happen unless there's a code error, so nice user messages aren't needed.
Preconditions.checkArgument(value >= 0 && value < pow10(length));
this.length = length;
this.value = value;
}
/** Returns if this sequence is empty (i.e. length == 0). */
public boolean isEmpty() {
return length == 0;
}
/** Returns the length of this digit sequence. */
public int length() {
return length;
}
/**
* Returns the digit at index {@code n} in this digit sequence, starting from the most
* significant digit.
*/
public int getDigit(int n) {
Preconditions.checkElementIndex(n, length);
return (int) (value / pow10(((length - 1) - n)) % 10);
}
/**
* Returns the sub-sequence representing only the first {@code n} digits in this sequence. For
* example, {@code "01234".first(3) == "012"}.
*/
public DigitSequence first(int n) {
Preconditions.checkElementIndex(n, length);
return new DigitSequence(n, value / pow10(length - n));
}
/**
* Returns the sub-sequence representing only the last {@code n} digits in this sequence. For
* example, {@code "01234".last(3) == "234"}.
*/
public DigitSequence last(int n) {
Preconditions.checkElementIndex(n, length);
return new DigitSequence(n, value % pow10(n));
}
/**
* Returns a new sequence which extends this sequence by a single digit ({@code 0 <= digit <= 9}).
*/
public DigitSequence extendBy(int digit) {
Preconditions.checkArgument(0 <= digit && digit <= 9);
return new DigitSequence(length + 1, (10 * value) + digit);
}
/** Returns a new sequence which extends this sequence by the given value. */
public DigitSequence extendBy(DigitSequence n) {
Preconditions.checkNotNull(n);
return new DigitSequence(length + n.length, (pow10(n.length) * value) + n.value);
}
/**
* Returns the digit sequence immediately after this one, or {@code null} if this is the
* maximum value.
*/
public DigitSequence next() {
return domain().next(this);
}
/**
* Returns the digit sequence immediately before this one, or {@code null} if this is the
* minimum value.
*/
public DigitSequence previous() {
return domain().previous(this);
}
/** Returns the absolute index of this digit sequence within the integer domain. */
private long index() {
return value + DOMAIN_OFFSET[length];
}
@Override
public int compareTo(DigitSequence other) {
return Long.signum(index() - other.index());
}
@Override
public boolean equals(Object o) {
return (o instanceof DigitSequence) && index() == ((DigitSequence) o).index();
}
@Override
public int hashCode() {
return Long.hashCode(index());
}
@Override
public String toString() {
// This little dance is required (according to the docs for the LazyInit annotation) for lazy
// initialization of non-volatile fields (yes, that's a double init in a single statement).
String localVar = toString;
if (localVar == null) {
toString = localVar = (length > 0 ? String.format("%0" + length + "d", value) : "");
}
return localVar;
}
}

+ 65
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/MetadataKey.java View File

@ -0,0 +1,65 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import java.util.Comparator;
/**
* A key for uniquely identifying number metadata for a region. For "geographical" regions, the
* region code suffices to identify the range information, but for "non geographical" regions, the
* calling code is required and the region is set to "UN001" (world).
*/
@AutoValue
public abstract class MetadataKey implements Comparable<MetadataKey> {
private static final Comparator<MetadataKey> ORDERING =
Comparator.comparing(MetadataKey::region).thenComparing(MetadataKey::callingCode);
/**
* Returns a key to identify phone number data in the given region with the specified calling
* code. Care must be taken when creating keys because it is possible to create invalid keys that
* would not match any data (e.g. region="US", calling code="44").
*/
public static MetadataKey create(PhoneRegion region, DigitSequence callingCode) {
// Null checks and semantic checks.
Preconditions.checkArgument(region.equals(PhoneRegion.getWorld())
|| (region.toString().length() == 2 && !region.equals(PhoneRegion.getUnknown())));
Preconditions.checkArgument(!callingCode.isEmpty());
return new AutoValue_MetadataKey(region, callingCode);
}
/**
* Returns the region for this key (this is {@link PhoneRegion#getWorld()} for non-geographical
* regions).
*/
public abstract PhoneRegion region();
/** Returns the calling code for this key. */
public abstract DigitSequence callingCode();
@Override
public int compareTo(MetadataKey other) {
return ORDERING.compare(this, other);
}
// Used in human readable formatting during presubmit checks; be careful if you change it.
@Override
public final String toString() {
return String.format("region=%s, calling code=+%s", region(), callingCode());
}
}

+ 351
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/PrefixTree.java View File

@ -0,0 +1,351 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
import com.google.i18n.phonenumbers.metadata.RangeTree.SetOperations;
import java.util.ArrayList;
import java.util.List;
/**
* A variation of a {@link RangeTree} which represents a set of prefixes (as opposed to a set of
* ranges). While this implementation is backed by a {@code RangeTree} and has a similar serialized
* representation, it is a deliberately distinct type and should not be thought of as a subset of
* {@code RangeTree}. In particular, set operations are defined to work differently for
* {@code PrefixTree} due to its differing semantics and some set operations (e.g. subtraction) are
* not even well defined.
*/
public final class PrefixTree {
private static final PrefixTree EMPTY = new PrefixTree(RangeTree.empty());
/** Returns the "empty" prefix tree, which matches no ranges. */
public static PrefixTree empty() {
return EMPTY;
}
/**
* Returns a prefix tree with the paths of the given ranges, trimmed to the earliest point of
* termination. For example, the ranges {@code {"1[0-3]", "1234", "56x"}} will result in the
* prefixes {@code {"1[0-3]", "56x"}}, since {@code "1[0-3]"} contains {@code "12"}, which is a
* prefix of {@code "1234"}.
*/
public static PrefixTree from(RangeTree ranges) {
return !ranges.isEmpty()
? new PrefixTree(removeTrailingAnyDigitPaths(TrimmingVisitor.trim(ranges)))
: empty();
}
/**
* Returns a prefix tree containing all digit sequences in the given range specification. A
* single range specification cannot overlap in the way that general range trees can, so unlike
* {@link #from(RangeTree)}, this method will never throw {@code IllegalArgumentException}.
*/
public static PrefixTree from(RangeSpecification spec) {
// Range specifications define ranges of a single length, so must always be a valid prefix.
return from(RangeTree.from(spec));
}
/**
* Returns the minimal prefix tree which includes all the paths in "include", and none of the
* paths in "exclude". For example:
* <pre> {@code
* minimal({ "123x", "456x" }, { "13xx", "459x" }, 0) == { "12", "456" }
* minimal({ "123x", "456x" }, {}, 0) == { "" }
* minimal({ "123x", "456x" }, {}, 1) == { "[14]" }
* }</pre>
*
* <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some
* circumstances.
*
* <p>Caveat: In cases where the {@code include} and {@code exclude} ranges overlap, the shortest
* possible prefix is chosen. For example:
* <pre> {@code
* minimal({ "12", "1234", "56" }, { "123", "5678" }) == { "12", "56" }
* }</pre>
* This means that it may not always be true that {@code minimal(A, B).intersect(minimal(B, A))}
* is empty.
*/
public static PrefixTree minimal(RangeTree include, RangeTree exclude, int minLength) {
checkArgument(include.intersect(exclude).isEmpty(), "ranges must be disjoint");
checkArgument(minLength >= 0, "invalid minimum prefix length: %s", minLength);
PrefixTree prefix = PrefixTree.from(include);
if (prefix.isEmpty()) {
// This matches no input, not all input.
return prefix;
}
// Ignore anything that the prefix already captures, since there's no point avoiding shortening
// the prefix to avoid what's already overlapping.
exclude = exclude.subtract(prefix.retainFrom(exclude));
// This can contain only the empty sequence (i.e. match all input) if the original include set
// was something like "xxxxx". In that case the initial node is just the terminal.
RangeTree minimal;
DfaNode root = prefix.asRangeTree().getInitial();
if (prefix.isIdentity() || exclude.isEmpty()) {
// Either we already accept anything, or there is nothing to exclude.
minimal = emit(root, RangeSpecification.empty(), RangeTree.empty(), minLength);
} else {
minimal = recursivelyMinimize(
root, RangeSpecification.empty(), exclude.getInitial(), RangeTree.empty(), minLength);
}
// No need to go via the static factory here, since that does a bunch of work we know cannot
// be necessary. The range tree here is a subset of an already valid prefix tree, so cannot
// contain "early terminating nodes" or "trailing any digit sequences".
return new PrefixTree(minimal);
}
private final RangeTree ranges;
private PrefixTree(RangeTree ranges) {
// Caller is responsible for ensuring that the ranges conform to expectations of a prefix tree.
this.ranges = ranges;
}
/**
* Returns a {@link RangeTree} containing the same digit sequences as this prefix tree. Prefix
* trees and range trees do not have the same semantics, but they do have the same serialized
* form (i.e. to serialize a prefix tree, you can just serialize the corresponding range tree).
*/
public RangeTree asRangeTree() {
return ranges;
}
/**
* Returns whether this prefix tree is empty. Filtering a {@link RangeTree} by the empty prefix
* tree always returns the empty range tree. The result of filtering a range tree is defined as
* containing only digit sequences which are prefixed by some digit sequence in the prefix tree.
* If the prefix tree is empty, no digit sequence can ever satisfy that requirement.
*/
public boolean isEmpty() {
return ranges.isEmpty();
}
/**
* Returns whether this prefix tree matches any digit sequence. Filtering a {@link RangeTree} by
* the identity prefix returns the original range tree. The result of filtering a range tree is
* defined as containing only digit sequences which are prefixed by some digit sequence in the
* prefix tree. The identity prefix tree contains the empty digit sequence, which is a prefix of
* every digit sequence.
*/
public boolean isIdentity() {
return !ranges.isEmpty() && ranges.getInitial().equals(RangeTree.getTerminal());
}
/** Returns whether the given sequence would be retained by this prefix tree. */
public boolean prefixes(DigitSequence digits) {
DfaNode node = ranges.getInitial();
for (int n = 0; n < digits.length(); n++) {
DfaEdge e = node.find(digits.getDigit(n));
if (e == null) {
break;
}
node = e.getTarget();
}
return node.equals(RangeTree.getTerminal());
}
/**
* Returns a subset of the given ranges, containing only ranges which are prefixed by an
* element in this prefix tree. For example:
* <pre> {@code
* RangeTree r = { "12xx", "1234x" }
* PrefixTree p = { "12[0-5]" }
* p.retainFrom(r) = { "12[0-5]x", "1234x"}
* }</pre>
* Note that if the prefix tree is empty, this method returns the empty range tree.
*/
public RangeTree retainFrom(RangeTree ranges) {
return SetOperations.INSTANCE.retainFrom(this, ranges);
}
/**
* Returns the union of two prefix trees. For prefix trees {@code p1}, {@code p2} and any range
* tree {@code R}, the union {@code P = p1.union(p2)} is defined such that:
* <pre> {@code
* P.retainFrom(R) = p1.retainFrom(R).union(p2.retainFrom(R))
* }</pre>
* If prefixes are the same length this is equivalent to {@link RangeTree#union(RangeTree)},
* but when prefixes overlap, only the more general (shorter) prefix is retained.
*/
public PrefixTree union(PrefixTree other) {
return SetOperations.INSTANCE.union(this, other);
}
/**
* Returns the intersection of two prefix trees. For prefix trees {@code p1}, {@code p2} and any
* range tree {@code R}, the intersection {@code P = p1.intersect(p2)} is defined such that:
* <pre> {@code
* P.retainFrom(R) = p1.retainFrom(R).intersect(p2.retainFrom(R))
* }</pre>
* If prefixes are the same length this is equivalent to {@link RangeTree#intersect(RangeTree)},
* but when prefixes overlap, only the more specific (longer) prefix is retained.
*/
public PrefixTree intersect(PrefixTree other) {
return SetOperations.INSTANCE.intersect(this, other);
}
/**
* Returns a prefix tree trimmed to at most {@code maxLength} digits. The returned value may be
* shorter if, in the process of trimming, trailing edges are collapsed to "any digit" sequences.
* For example:
* <pre> {@code
* { "12[0-4]5", "12[5-9]" }.trim(3) == "12"
* { "7001", "70[1-9]", "7[1-9]" }.trim(3) == "7"
* }</pre>
*/
public PrefixTree trim(int maxLength) {
return PrefixTree.from(
RangeTree.from(
ranges.asRangeSpecifications().stream()
.map(s -> s.first(maxLength))
.collect(toImmutableList())));
}
@Override
public int hashCode() {
return ranges.hashCode();
}
@Override
public boolean equals(Object o) {
return (o instanceof PrefixTree) && ranges.equals(((PrefixTree) o).ranges);
}
@Override
public String toString() {
return ranges.toString();
}
private static final class TrimmingVisitor implements DfaVisitor {
static RangeTree trim(RangeTree ranges) {
if (ranges.isEmpty()) {
return ranges;
}
if (ranges.getInitial().canTerminate()) {
// Not the "empty range tree" (which matches no input), but the range tree containing the
// empty range specification (which matches only the empty digit sequence).
return RangeTree.from(RangeSpecification.empty());
}
TrimmingVisitor v = new TrimmingVisitor();
ranges.accept(v);
return RangeTree.from(v.paths);
}
private final List<RangeSpecification> paths = new ArrayList<>();
private RangeSpecification path = RangeSpecification.empty();
@Override
public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
RangeSpecification oldPath = path;
path = path.extendByMask(edge.getDigitMask());
if (target.canTerminate()) {
paths.add(path);
} else {
target.accept(this);
}
path = oldPath;
}
}
// Note: This is NOT as simple as just calling "getPrefix()" on each range specification because
// ranges that are too short become problematic. Consider { "7[1-9]", "70x" } which should result
// in "7". If we just call "getPrefix()" and merge, we end up with "7x".
//
// One way to fix this is by repeatedly creating prefix trees (removing trailing "any digit"
// sequences) until it becomes stable.
//
// The other way (simpler) is to extend the length of any shorter range specifications to bring
// them up to the max length before merging them. In the above example, we extend the length of
// "7[1-9]" to "7[1-9]x" and merge it with "70x" to get "7xx", which can then have its prefix
// extracted.
private static RangeTree removeTrailingAnyDigitPaths(RangeTree ranges) {
if (ranges.isEmpty()) {
return ranges;
}
// Skip this if "ranges" matches only one length (since it would be a no-op).
if (ranges.getLengths().size() > 1) {
int length = ranges.getLengths().last();
ranges = ranges.map(s -> s.length() < length ? s.extendByLength(length - s.length()) : s);
}
// Having merged everything, we can now extract the correct prefixes as the final step.
return ranges.map(RangeSpecification::getPrefix);
}
/**
* Recursively determines the next level of prefix minimization. The algorithm follows as much
* of the "included" path as possible (node), potentially splitting into several sub-recursive
* steps if the current included edge overlaps with multiple "excluded" paths. Once a path no
* longer overlaps with the exclude paths, it is added to the result. Paths are also added to
* the result if they terminate while still overlapping the excluded paths.
*/
private static RangeTree recursivelyMinimize(
DfaNode node, RangeSpecification path, DfaNode exclude, RangeTree minimal, int minLength) {
for (DfaEdge edge : node.getEdges()) {
int mask = edge.getDigitMask();
DfaNode target = edge.getTarget();
// This algorithm only operates on the DFA of a prefix tree (not a general range tree). As
// such the only terminating node we can reach is the terminal node itself. If we hit that
// from the current edge, just emit it and continue on to the next edge.
if (target.equals(RangeTree.getTerminal())) {
minimal = minimal.union(RangeTree.from(path.extendByMask(mask)));
continue;
}
checkState(!target.canTerminate(), "invalid DFA state for prefix tree at: %s", path);
// Otherwise recurse on every "exclude" path, using the intersection of the "include" and
// "exclude" masks. Anything left on the include mask which didn't overlap any of excluded
// edges can emitted. This also works at the end of the exclude paths (exclude == TERMINAL)
// since that has no outgoing edges (so the entire include path is emitted).
for (DfaEdge ex : exclude.getEdges()) {
int m = ex.getDigitMask() & mask;
if (m != 0) {
mask &= ~m;
minimal =
recursivelyMinimize(target, path.extendByMask(m), ex.getTarget(), minimal, minLength);
}
}
// The mask identifies edges which are now outside the exclude tree, and thus safe to emit.
if (mask != 0) {
// Emitting an included path may involve emitting some of the sub-tree below it in order
// to make up the minimal length (we can't do this for the terminating case above).
minimal = emit(target, path.extendByMask(mask), minimal, minLength);
}
}
return minimal;
}
/**
* Recursively visits the sub-tree under the given node, extending the path until it reaches the
* minimum length before emitting it.
*/
private static RangeTree emit(
DfaNode node, RangeSpecification path, RangeTree minimal, int minLength) {
if (path.length() >= minLength || node.equals(RangeTree.getTerminal())) {
minimal = minimal.union(RangeTree.from(path));
} else {
for (DfaEdge e : node.getEdges()) {
minimal = minimal.union(
emit(e.getTarget(), path.extendByMask(e.getDigitMask()), minimal, minLength));
}
}
return minimal;
}
}

+ 752
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeSpecification.java View File

@ -0,0 +1,752 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
import static java.lang.Integer.numberOfLeadingZeros;
import static java.lang.Integer.numberOfTrailingZeros;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* A compact representation of a disjoint set of ranges of digit sequences. This is a compact way
* to represent one or many ranges of digit sequences which share the same length. Examples include:
* <pre>{@code
* "01234" --> the singleton range containing only the digit sequence "01234"
* "012xx" --> the contiguous digit sequence range ["01200".."01299"]
* "012[3-5]6xx" --> the disjoint set of contiguous digit sequence ranges
* ["0123600".."0123699"], ["0124600".."0124699"], ["0125600".."0125699"]
* }</pre>
* Note that the sets of contiguous ranges defined by a {@code RangeSpecification} are always
* mutually disjoint.
*
* <p>Range specifications have a natural prefix based lexicographical ordering (based on the
* most-significant point at which a difference appears), but if you are comparing a disjoint set
* of range specifications (e.g. from a {@link RangeTree}) then it can be more intuitive to use an
* ordering based on the minimum digit sequence, but note this approach fails if the range
* specifications can overlap (e.g. comparing "1xx" and "100").
*/
public final class RangeSpecification implements Comparable<RangeSpecification> {
/** The mask of all possible digits. */
public static final char ALL_DIGITS_MASK = (1 << 10) - 1;
private static final RangeSpecification EMPTY = new RangeSpecification("");
/** Returns the empty range specification, which matches only the empty digit sequence. */
public static RangeSpecification empty() {
return EMPTY;
}
/** Returns the range specification of length one which matches any of the given digits. */
public static RangeSpecification singleton(Iterable<Integer> digits) {
int mask = 0;
for (int digit : digits) {
checkArgument(0 <= digit && digit <= 9, "bad digit value '%s'", digit);
mask |= (1 << digit);
}
return new RangeSpecification(String.valueOf((char) mask));
}
/** Returns a new range specification which matches only the given non-empty digit sequence. */
public static RangeSpecification from(DigitSequence s) {
if (s.length() == 0) {
return RangeSpecification.empty();
}
char[] masks = new char[s.length()];
for (int n = 0; n < masks.length; n++) {
masks[n] = (char) (1 << s.getDigit(n));
}
return new RangeSpecification(new String(masks));
}
/** Returns a new range specification which matches any digit sequence of the specified length. */
public static RangeSpecification any(int length) {
checkArgument(length >= 0);
if (length == 0) {
return RangeSpecification.empty();
}
char[] masks = new char[length];
Arrays.fill(masks, ALL_DIGITS_MASK);
return new RangeSpecification(new String(masks));
}
/**
* Parses the string form of a range specification (e.g. "1234[57-9]xxx"). This must be
* correctly formed, including having all ranges be well formed (e.g. not "[33]", "[3-3]" or
* "[6-4]").
*
* <p>Note that non-canonical ranges are permitted if the digits are in order (e.g. "[1234]",
* "[4-5]" or "[0-9]" but not "[4321]"). The returned range specification is canonical (e.g.
* {@code parse("12[34569]").toString() == "12[3-69]"}).
*
* <p>The empty string is parsed as the empty range specification.
*
* <p>The use of single ASCII underscores ("_") to group ranges and aid readability is supported
* during parsing but is not retained in the parsed result (e.g.
* {@code parse("12_34[5-8]_xxx_xxx").toString() == "1234[5-8]xxxxxx"}). Note that underscore may
* not be present inside ranges (e.g. "1_4") or at the ends of the range (e.g. "123xxx_").
*/
public static RangeSpecification parse(String s) {
if (s.isEmpty()) {
return empty();
}
checkArgument(!s.startsWith("_") && !s.endsWith("_"), "cannot start/end with '_': %s", s);
StringBuilder bitmasks = new StringBuilder();
boolean lastCharWasUnderscore = false;
for (int n = 0; n < s.length(); n++) {
char c = s.charAt(n);
switch (c) {
case '_':
checkArgument(!lastCharWasUnderscore, "cannot have multiple '_' in a row: %s", s);
lastCharWasUnderscore = true;
// Continue the for-loop rather than breaking out the switch to avoid resetting the flag.
continue;
case 'x':
bitmasks.append(ALL_DIGITS_MASK);
break;
case '[':
n += 1;
int end = s.indexOf(']', n);
checkArgument(end != -1, "unclosed range in specification: %s", s);
checkArgument(end > n, "empty range in specification: %s", s);
bitmasks.append(parseRange(s, n, end));
n = end;
break;
default:
checkArgument('0' <= c && c <= '9',
"bad digit value '%s' in range specification: %s", c, s);
bitmasks.append((char) (1 << (c - '0')));
break;
}
lastCharWasUnderscore = false;
}
return new RangeSpecification(bitmasks.toString());
}
private static char parseRange(String s, int start, int end) {
int mask = 0;
for (int n = start; n < end;) {
char c = s.charAt(n++);
checkArgument('0' <= c && c <= '9',
"bad digit value '%s' in range specification: %s", c, s);
int shift = (c - '0');
// check that this bit and all above it are zero (to ensure correct ordering).
checkArgument(mask >> shift == 0, "unordered range in specification: %s", s);
if (n == end || s.charAt(n) != '-') {
// Single digit not in a range.
mask |= 1 << shift;
continue;
}
n++;
checkArgument(n < end, "unclosed range in specification: %s", s);
c = s.charAt(n++);
checkArgument('0' <= c && c <= '9',
"bad digit value '%s' in range specification: %s", c, s);
int rshift = (c - '0');
checkArgument(rshift > shift, "unordered range in specification: %s", s);
// Set bits from shift to rshift inclusive (e.g. 11111 & ~11 = 11100).
mask |= ((1 << (rshift + 1)) - 1) & ~((1 << shift) - 1);
}
return (char) mask;
}
/**
* Returns the canonical representation of the given ranges. The number of range specifications
* in the returned instance may be higher or lower than the number of given ranges.
* <p>
* NOTE: This is only used by RangeTree for generating a RangeTree from a RangeSet, and is not
* suitable as a public API (one day we might generate the RangeTree directly and be able to
* delete this code).
*/
static ImmutableList<RangeSpecification> from(RangeSet<DigitSequence> ranges) {
List<RangeSpecification> specs = new ArrayList<>();
Set<Range<DigitSequence>> s = ranges.asRanges();
checkArgument(!s.isEmpty(), "empty range set not permitted");
// Make sure are ranges we use are canonicalized over the domain of DigitSequences (so Range
// operations (e.g. isConnected()) work as expected. See Range for more on why this matters.
Range<DigitSequence> cur = s.iterator().next().canonical(domain());
checkArgument(!cur.contains(DigitSequence.empty()),
"empty digit sequence not permitted in range set");
for (Range<DigitSequence> next : Iterables.skip(ranges.asRanges(), 1)) {
next = next.canonical(domain());
if (cur.isConnected(next)) {
// Even though 'cur' and 'next' are both canonicalized, it's not guaranteed that they are
// closed-open (singleton ranges are fully closed and any range containing the maximum
// value must be closed. To "union" the two ranges we must also preserve the bound types.
cur = Range.range(
cur.lowerEndpoint(), cur.lowerBoundType(),
next.upperEndpoint(), next.upperBoundType())
.canonical(domain());
continue;
}
addRangeSpecsOf(cur, specs);
cur = next;
}
addRangeSpecsOf(cur, specs);
return ImmutableList.sortedCopyOf(Comparator.comparing(RangeSpecification::min), specs);
}
/** Adds the canonical minimal range specifications for a single range to the given list. */
private static void addRangeSpecsOf(Range<DigitSequence> r, List<RangeSpecification> specs) {
// Given range is already canonical but may span multiple lengths. It's easier to view this
// as a contiguous set when finding first/last elements however to avoid worrying about bound
// types. A contiguous set is not an expensive class to create.
ContiguousSet<DigitSequence> s = ContiguousSet.create(r, domain());
DigitSequence start = s.first();
DigitSequence end = s.last();
while (start.length() < end.length()) {
// Add <start> to "999..." for the current block length (the max domain value is all 9's).
DigitSequence blockEnd = DigitSequence.nines(start.length());
addRangeSpecs(start, blockEnd, specs);
// Reset the start to the next length up (i.e. the "000..." sequence that's one longer).
start = blockEnd.next();
}
// Finally and the range specs up to (and including) the end value.
addRangeSpecs(start, end, specs);
}
// Adds canonical minimal range specifications for the range of same-length digit sequences.
private static void addRangeSpecs(
DigitSequence start, DigitSequence end, List<RangeSpecification> specs) {
int length = start.length();
checkArgument(end.length() == length);
// Masks contains a running total of the bitmasks we want to convert to RangeSpecifications.
// As processing proceeds, the mask array is reused. This is because the prefix used for
// successive range specifications is always a subset of the previous specifications and the
// trailing part of the array always fills up with the range mask for 'x' (i.e. [0-9]).
int[] masks = new int[length];
// Stage 1:
// Starting from the last digit in the 'start' sequence, work up until we find something that
// is not a '0'. This is the first digit that needs to be adjusted to create a range
// specification covering it and the digits 'below' it. For example, the first specification
// for the range ["1200".."9999"] is "1[2-9]xx".
// Once a specification is emitted, the start value is adjusted to the next digit sequence
// immediately above the end of the emitted range, so after emitting "1[2-9]xx", start="2000".
// Once each range specification is emitted, we continue working 'up' the digit sequence until
// the next calculated start value exceeds the 'end' of our range. This specification cannot
// be emitted and signals the end of stage 1.
setBitmasks(masks, start);
for (int n = previousNon(0, start, length); n != -1; n = previousNon(0, start, n)) {
int loDigit = start.getDigit(n);
DigitSequence prefix = start.first(n);
DigitSequence blockEnd = prefix.extendBy(DigitSequence.nines(length - n));
if (blockEnd.compareTo(end) > 0) {
// The end of this block would exceed the end of the main range, so we must stop.
break;
}
// The bitmasks we want is:
// <first (n-1) digits of 'start'> [loDigit..9] <any digits mask...>
masks[n] = bitmaskUpFrom(loDigit);
fillBitmasksAfter(masks, n);
specs.add(RangeSpecification.fromBitmasks(masks));
// Adjust the range start now we have emitted the range specification.
start = blockEnd.next();
}
// Stage 2:
// Very similar to stage 1, but work up from the last digit in the 'end' sequence. The
// difference now is that we look for the first digit that's not '9' and generate ranges that
// go down to the start of the range, not up to the end. Thus for ["0000", "1299"] the first
// specification generated is "1[0-2]xx", which is emitted at the end of the list.
int midIdx = specs.size();
setBitmasks(masks, end);
for (int n = previousNon(9, end, length); n != -1; n = previousNon(9, end, n)) {
int hiDigit = end.getDigit(n);
DigitSequence prefix = end.first(n);
DigitSequence blockStart = prefix.extendBy(DigitSequence.zeros(length - n));
if (blockStart.compareTo(start) < 0) {
// The start of this block would precede the start of the main range, so we must stop.
break;
}
// The bitmasks we want is:
// <first (n-1) digits of 'end'> [0..hiDigit] <any digits mask...>
masks[n] = bitmaskDownFrom(hiDigit);
fillBitmasksAfter(masks, n);
specs.add(midIdx, RangeSpecification.fromBitmasks(masks));
// Adjust the range end now we have emitted the range specification.
end = blockStart.previous();
}
// Stage 3: Having emitted the first and last set of range specifications, it only remains to
// emit the "center" specification in the middle of the list. This is special as neither bound
// is the end of a block. In previous stages, all partial ranges are either "up to 9" or
// "down to zero". For example: ["1234".."1789"] has the center range "1[3-6]xx", and
// ["1234".."1345"] has no center range at all.
if (start.compareTo(end) < 0) {
// Find the last digit before start and end combine (ie, 1200, 1299 --> 12xx --> n=1). We
// know that 'start' and 'end' are the same length and bound a range like:
// <prefix> [X..Y] [000..999]
// but X or Y could be 0 or 9 respectively (just not both).
//
// Note that we don't even both to test the first digit in the sequences because if 'start'
// and 'end' span a full range (e.g. [000.999]) we can just use the same code to fill the
// masks correctly anyway.
int n = start.length();
while (--n > 0 && start.getDigit(n) == 0 && end.getDigit(n) == 9) {}
// Bitwise AND the masks for [X..9] and [0..Y] to get the mask for [X..Y].
// Note that the "masks" array already contains the correct prefix digits up to (n-1).
masks[n] = bitmaskUpFrom(start.getDigit(n)) & bitmaskDownFrom(end.getDigit(n));
fillBitmasksAfter(masks, n);
specs.add(midIdx, RangeSpecification.fromBitmasks(masks));
}
}
// Sets the values in the given array to correspond to the digits in the given sequence. If a
// range specification were made from the resulting array it would match only that digit sequence.
private static void setBitmasks(int[] masks, DigitSequence s) {
for (int n = 0; n < s.length(); n++) {
masks[n] = 1 << s.getDigit(n);
}
}
/**
* Creates a range specification from a given array of integer masks. The Nth element of the
* array corresponds to the Nth element in the range specification, and mask values must be
* non-zero and have only bits 0 to 9 set.
*/
private static RangeSpecification fromBitmasks(int[] bitmasks) {
checkArgument(bitmasks.length <= DigitSequence.MAX_DIGITS,
"range specification too large");
StringBuilder s = new StringBuilder(bitmasks.length);
s.setLength(bitmasks.length);
for (int n = 0; n < bitmasks.length; n++) {
int mask = bitmasks[n];
checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask);
s.setCharAt(n, (char) mask);
}
return new RangeSpecification(s.toString());
}
// Fills the bitmasks after the given index with the "all digits" mask (i.e. matching [0-9]).
// This can accept -1 as the index since it always pre-increments before using it.
private static void fillBitmasksAfter(int[] masks, int n) {
// Because of the iterative way the mask array is handled, we can stop filling when we hit
// ALL_DIGITS_MASK because everything past that must already be filled.
while (++n < masks.length && masks[n] != ALL_DIGITS_MASK) {
masks[n] = ALL_DIGITS_MASK;
}
}
// Starting at digit-N, returns the index of the nearest preceding digit that's not equal to the
// given value (or -1 if no such digit exists).
private static int previousNon(int digit, DigitSequence s, int n) {
while (--n >= 0 && s.getDigit(n) == digit) {}
return n;
}
/** Returns the bitmask for the range {@code [n-9]}. */
private static int bitmaskUpFrom(int n) {
return (-1 << n) & ALL_DIGITS_MASK;
}
/** Returns the bitmask for the range {@code [0-n]}. */
private static int bitmaskDownFrom(int n) {
return ALL_DIGITS_MASK >>> (9 - n);
}
// String containing one bitmasks per character (bits 0..9).
private final String bitmasks;
// Minimum and maximum sequences (inclusive) which span the ranges defined by this specification.
// Caching this is deliberate, since we sort disjoint ranges using the minimum value. It might
// not be so useful to cache the maximum value though.
private final DigitSequence min;
private final DigitSequence max;
// Total number of sequences matched by this specification.
private final long sequenceCount;
private RangeSpecification(String bitmasks) {
int length = bitmasks.length();
checkArgument(length <= DigitSequence.MAX_DIGITS,
"Range specification too long (%s digits)", length);
this.bitmasks = bitmasks;
long minValue = 0;
long maxValue = 0;
long sequenceCount = 1;
for (int n = 0; n < length; n++) {
int mask = bitmasks.charAt(n);
checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask);
minValue = (minValue * 10) + numberOfTrailingZeros(mask);
maxValue = (maxValue * 10) + (31 - numberOfLeadingZeros(mask));
sequenceCount *= Integer.bitCount(mask);
}
this.min = new DigitSequence(length, minValue);
this.max = new DigitSequence(length, maxValue);
this.sequenceCount = sequenceCount;
}
/**
* Returns the number of digits that this specification can match. This is the length of all
* digit sequences which can match this specification.
*/
public int length() {
return bitmasks.length();
}
/** Returns the smallest digit sequence matched by this range. */
public DigitSequence min() {
return min;
}
/** Returns the largest digit sequence matched by this range. */
public DigitSequence max() {
return max;
}
/** Returns the total number of digit sequences matched by (contained in) this specification. */
public long getSequenceCount() {
return sequenceCount;
}
/**
* Returns the bitmask of the Nth range in this specification. Bit-X (0<= X <= 9) corresponds to
* the digit with value X. As every range in a specification must match at least one digit, this
* mask can never be zero.
*/
public int getBitmask(int n) {
return bitmasks.charAt(n);
}
/**
* Returns whether the given digit sequence is in one of the ranges specified by this instance.
* This is more efficient that obtaining the associated {@code RangeSet} and checking that.
*/
public boolean matches(DigitSequence digits) {
if (digits.length() != length()) {
return false;
}
for (int n = 0; n < length(); n++) {
if ((bitmasks.charAt(n) & (1 << digits.getDigit(n))) == 0) {
return false;
}
}
return true;
}
// Returns the next sequence in forward order which is contained by a range defined by this
// range specification, or null if none exists. The given sequence must not be matched by this
// specification.
private DigitSequence nextRangeStart(DigitSequence s) {
// Easy length based checks (this is where the fact that range specification only define ranges
// of the same length really simplifies things).
if (s.length() < length()) {
return min();
} else if (s.length() > length()) {
return null;
}
// Algorithm:
// 1) Find the highest digit that isn't in the corresponding bitmask for the range.
// 2) Try and increase the digit value until it's inside the next available range.
// 3) If that fails, move back up the sequence and increment the next digit up.
// 4) Repeat until a digit can be adjusted to start a new range, or all digits are exhausted.
// If all digits exhausted, the sequence was above all ranges in this specification.
// Otherwise return a new sequence using the unchanged prefix of the original sequence, the
// newly adjusted digit and the trailing digits of the minimal sequence.
for (int n = 0; n < length(); n++) {
int d = s.getDigit(n);
int mask = bitmasks.charAt(n);
if ((mask & (1 << d)) != 0) {
continue;
}
while (true) {
// Digit 'd' is either outside the range mask (first time though the loop) or inside a
// range. Either way we want to find the next digit above it which is inside a range.
// First increment 'd', and then find the next set bit in the mask at or above that point.
// Not extra check is needed at the end of ranges because numberOfTrailingZeros(0)==32
// which neatly ensures that the new value of 'd' must be out-of-range.
// If mask=[3-58]: d=1-->d'=3, d=4-->d'=5, d=5-->d'=8, d=8-->d'>9
d++;
d += numberOfTrailingZeros(mask >>> d);
if (d <= 9) {
// Found the value of the largest digit which can be adjusted to start the next range.
// Everything higher than this digit is the same as the original sequence and everything
// lower that this digit is the same as the corresponding digit in the minimal value.
return s.first(n).extendBy(d).extendBy(min.last((length() - n) - 1));
}
// No more bits available in this range, so go back up to the previous range.
if (--n < 0) {
// The sequence was above the last element in the set.
// Example: Range Spec: 1[2-8][3-8]456, Sequence: 188457
return null;
}
d = s.getDigit(n);
mask = bitmasks.charAt(n);
}
}
// If we finish the outer loop the given sequence was in a range (which is an error).
throw new IllegalArgumentException(
"Digit sequence '" + s + "' is in the range specified by: " + this);
}
// Given a sequence inside a range defined by this specification, return the highest sequence
// in the current range (possibly just the given sequence).
private DigitSequence currentRangeEnd(DigitSequence s) {
// Build up a value representing the trailing digits (which must always be 9's).
long nines = 0;
for (int n = length() - 1; n >= 0; n--, nines = (10 * nines) + 9) {
int mask = bitmasks.charAt(n);
if (mask == ALL_DIGITS_MASK) {
continue;
}
// The new digit is the top of the current range that the current sequence digit is in.
int d = nextUnsetBit(mask, s.getDigit(n)) - 1;
DigitSequence end =
s.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines));
// Edge case for cases like "12[34][09]x" where "1239x" and "1240x" abut. This adjustment
// will happen at most once because the second range cannot also include an upper bound
// ending at '9', since otherwise (mask == ALL_DIGITS_MASK) at this position. The next
// sequence must be terminated with zeros starting at the current position having "rolled
// over" on the digit above.
if (d == 9) {
DigitSequence next = end.next();
if (matches(next)) {
d = nextUnsetBit(mask, 0) - 1;
end = next.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines));
}
}
return end;
}
// The range specification is entirely 'x', which means it's a single range.
return max;
}
/**
* Returns a generating iterator which iterates in forward order over the disjoint ranges defined
* by this specification. This is not actually as useful as you might expect because in a lot of
* cases you would be dealing with a sequence of range specifications and it's not true that all
* ranges from multiple specifications are disjoint.
*/
Iterable<Range<DigitSequence>> asRanges() {
return () -> new Iterator<Range<DigitSequence>>() {
// Start is always in a range.
private DigitSequence start = min;
@Override
public boolean hasNext() {
return start != null;
}
@Override
public Range<DigitSequence> next() {
DigitSequence end = currentRangeEnd(start);
Range<DigitSequence> r = Range.closed(start, end).canonical(DigitSequence.domain());
start = nextRangeStart(end.next());
return r;
}
};
}
/**
* Returns a new range specification which is extended by the given mask value. For example:
* <pre>{@code
* "0123[4-6]".extendByMask(7) == "0123[4-6][0-2]"
* }</pre>
*/
public RangeSpecification extendByMask(int mask) {
checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "bad mask value '%s'", mask);
return new RangeSpecification(bitmasks + ((char) mask));
}
/**
* Returns a new range specification which is extended by the given specification. For example:
* <pre>{@code
* "0123[4-6]".extendBy("7[89]") == "0123[4-6]7[89]"
* }</pre>
*/
public RangeSpecification extendBy(RangeSpecification extra) {
return new RangeSpecification(bitmasks + extra.bitmasks);
}
/**
* Returns a new range specification which is extended by a sequence of any digits of the given
* length. For example:
* <pre>{@code
* "012".extendByLength(4) == "012xxxx"
* }</pre>
*/
public RangeSpecification extendByLength(int length) {
return this.extendBy(any(length));
}
/**
* Returns a range specification containing only the first {@code n} digits. If the given length
* is the same or greater than the specification's length, this specification is returned.
* For example:
* <pre>{@code
* "01[2-4]xx".first(8) == "01[2-4]xx" (same instance)
* "01[2-4]xx".first(5) == "01[2-4]xx" (same instance)
* "01[2-4]xx".first(3) == "01[2-4]"
* "01[2-4]xx".first(0) == "" (the empty specification)
* }</pre>
*/
public RangeSpecification first(int n) {
checkArgument(n >= 0);
if (n == 0) {
return empty();
}
return n < length() ? new RangeSpecification(bitmasks.substring(0, n)) : this;
}
/**
* Returns a range specification containing only the last {@code n} digits. If the given length
* is the same or greater than the specification's length, this specification is returned.
* For example:
* <pre>{@code
* "01[2-4]xx".last(8) == "01[2-4]xx" (same instance)
* "01[2-4]xx".last(5) == "01[2-4]xx" (same instance)
* "01[2-4]xx".last(3) == "[2-4]xx"
* "01[2-4]xx".last(0) == "" (the empty specification)
* }</pre>
*/
public RangeSpecification last(int n) {
checkArgument(n >= 0);
if (n == 0) {
return empty();
}
return n < length() ? new RangeSpecification(bitmasks.substring(length() - n)) : this;
}
/**
* Returns a range specification with any trailing "any digit" sequence removed. For example:
* <pre>{@code
* "0123".getPrefix() == "0123" (same instance)
* "0123xx".getPrefix() == "0123"
* "xxx".getPrefix() == "" (the empty specification)
* }</pre>
*/
public RangeSpecification getPrefix() {
int length = length();
while (length > 0 && getBitmask(length - 1) == ALL_DIGITS_MASK) {
length--;
}
return first(length);
}
@Override
public int compareTo(RangeSpecification other) {
int length = Math.min(length(), other.length());
for (int i = 0; i < length; i++) {
int mask = getBitmask(i);
int otherMask = other.getBitmask(i);
if (mask == otherMask) {
continue;
}
int commonBits = mask & otherMask;
mask -= commonBits;
otherMask -= commonBits;
// At least one mask is still non-zero and they don't overlap.
//
// The mask with the lowest set bit is the smaller mask in the ordering, since that bit
// distinguishes a smaller prefix than can never exist in the other specification.
// Testing the number of trailing zeros is equivalent to finding the lowest set bit.
return Integer.compare(numberOfTrailingZeros(mask), numberOfTrailingZeros(otherMask));
}
return Integer.compare(length(), other.length());
}
@Override
public boolean equals(Object o) {
return (o instanceof RangeSpecification) && bitmasks.equals(((RangeSpecification) o).bitmasks);
}
@Override
public int hashCode() {
return bitmasks.hashCode();
}
/**
* If you want lexicographical ordering of range specifications, don't use this method, use the
* {@code min().toString()}. This works assuming the ranges being compared are disjoint.
*/
@Override
public String toString() {
// Consider caching if it turns out that we are serializing a lot of these.
StringBuilder s = new StringBuilder();
for (int n = 0; n < bitmasks.length(); n++) {
appendMask(bitmasks.charAt(n), s);
}
return s.toString();
}
/** Returns the string representation of a single bit-mask. */
public static String toString(int bitMask) {
checkArgument(bitMask > 0 && bitMask < (1 << 10), "bad mask value: %s", bitMask);
return appendMask(bitMask, new StringBuilder()).toString();
}
static StringBuilder appendMask(int mask, StringBuilder out) {
if (mask == ALL_DIGITS_MASK) {
out.append('x');
} else if (hasOneBit(mask)) {
out.append(asChar(numberOfTrailingZeros(mask)));
} else {
out.append('[');
for (int loBit = numberOfTrailingZeros(mask);
loBit != 32;
loBit = numberOfTrailingZeros(mask)) {
// Always append the loBit digit into the range.
out.append(asChar(loBit));
int hiBit = nextUnsetBit(mask, loBit);
int numBits = hiBit - loBit;
if (numBits > 1) {
// Stylistically prefer "[34]" to "[3-4]" for compactness.
if (numBits > 2) {
out.append('-');
}
out.append(asChar(hiBit - 1));
}
// Clear the bits we've just processed before going back round the loop.
mask &= ~((1 << hiBit) - 1);
}
out.append(']');
}
return out;
}
// Turns a value in the range [0-9] into the corresponding ASCII character.
private static char asChar(int digit) {
return (char) ('0' + digit);
}
// Determines if the given bit-mask has only one bit set.
private static boolean hasOneBit(int mask) {
return (mask & (mask - 1)) == 0;
}
private static int nextUnsetBit(int mask, int bit) {
// Example mask transform for [013-589] if bit=3:
// v-- bit=3
// 01100111011
// 00000000111 (1 << 3) - 1
// 01100111111 OR with mask
// 10011000000 Bitwise NOT
// ^-- return=6
return numberOfTrailingZeros(~(mask | ((1 << bit) - 1)));
}
}

+ 1342
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeTree.java
File diff suppressed because it is too large
View File


+ 194
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/RangeTreeFactorizer.java View File

@ -0,0 +1,194 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
import java.util.ArrayList;
import java.util.List;
/**
* Factor a range tree into a sequence of trees which attempts to minimize overall complexity in
* the face of non-determinism. This can be used to reduce the size of any generated regular
* expressions.
*/
public final class RangeTreeFactorizer {
/** Strategies to control how merging is achieved when building factors.*/
public enum MergeStrategy {
/**
* Edges are only merged if they accept exactly the same set of digits. If the existing factor
* contains "[0-5]" it will not be merged with the candidate edge "[0-8]".
*/
REQUIRE_EQUAL_EDGES,
/**
* Edges can be merged if the candidate edge accepts more digits than the existing edge. If the
* existing factor contains "[0-5]" and the candidate edge is "[0-8]", the candidate edge is
* split so that "[0-5]" is merged as normal and an additional edge "[6-8]" is branched off.
*/
ALLOW_EDGE_SPLITTING,
}
/**
* Factors the given range tree.
* <p>
* Paths are processed longest-first, and a path belongs in particular "factor" if it can be
* added without "causing a split" in the existing factor. For example, given an existing factor
* {@code {"12[3-6]x", "45xx"}}:
* <ul>
* <li>The path "12[3-6]" can be added, since it is a prefix of one of the existing paths in
* the DFA.
* <li>The path "13xx" can be added since it forms a new branch in the DFA, which does not
* affect any existing branches ("13..." is disjoint with "12...").
* <li>The path "12[34]" cannot be added since it would "split" the existing path
* "12[3-6]x" in the DFA ("[34]" is a subset of "[3-6]"). "
* <li>Depending on the merge strategy, the path "12[0-6]x" might be added ("[0-6]" is a
* superset of "[3-6]"). See {@link MergeStrategy} for more information.
* </ul>
*/
public static ImmutableList<RangeTree> factor(RangeTree ranges, MergeStrategy strategy) {
// If only one length on all paths, the DFA is already "factored".
if (ranges.getLengths().size() == 1) {
return ImmutableList.of(ranges);
}
List<RangeTree> factors = new ArrayList<>();
// Start with the "naive" factors (splitting by length) from longest to shortest.
for (int n : ranges.getLengths().descendingSet()) {
factors.add(ranges.intersect(RangeTree.from(RangeSpecification.any(n))));
}
// Now attempt to merge as much of each of the shorter factors as possible into the longer ones.
// In each loop we subsume a candidate factor into previous factors, either in whole or in part.
int index = 1;
while (index < factors.size()) {
// Merge (as much as possible) each "naive" factor into earlier factors.
RangeTree r = factors.get(index);
for (int n = 0; n < index && !r.isEmpty(); n++) {
RangeTree merged = new RangeTreeFactorizer(factors.get(n), strategy).mergeFrom(r);
factors.set(n, merged);
// Calculate the ranges which haven't yet been merged into any earlier factor.
r = r.subtract(merged);
}
if (r.isEmpty()) {
// All ranges merged, so remove the original factor (index now references the next factor).
factors.remove(index);
} else {
// We have some un-factorable ranges which are kept to start a new factor.
factors.set(index, r);
index++;
}
}
return ImmutableList.copyOf(factors);
}
// This is modified as paths are added.
private RangeTree factor;
private final MergeStrategy strategy;
RangeTreeFactorizer(RangeTree factor, MergeStrategy strategy) {
this.factor = checkNotNull(factor);
this.strategy = strategy;
}
RangeTree mergeFrom(RangeTree ranges) {
recursivelyMerge(ranges.getInitial(), factor.getInitial(), RangeSpecification.empty());
return factor;
}
void recursivelyMerge(DfaNode srcNode, DfaNode dstNode, RangeSpecification path) {
if (srcNode.canTerminate()) {
factor = factor.union(RangeTree.from(path));
} else {
srcNode.accept(new FactoringVisitor(dstNode, path));
}
}
private final class FactoringVisitor implements DfaVisitor {
private final RangeSpecification path;
private final DfaNode dstNode;
// True if we encountered a situation when an edge we are merging (srcMask) has a partial
// overlap with the existing edge (dstMask) (e.g. merging "[0-6]" into "[4-9]"). This is
// distinct from the case where the existing edge is a subset of the edge being merged (e.g.
// merging "[0-6]" into "[2-4]", where the edge being merged can be split into "[0156]" and
// "[2-4]"). In either strategy, a partial overlap will prevent merging.
private boolean partialOverlap = false;
// Records the union of all edge ranges visited for the current node. This is used to determine
// the remaining edges that must be added after visiting the existing factor (especially in the
// case of ALLOW_EDGE_SPLITTING).
private int allDstMask = 0;
FactoringVisitor(DfaNode dstNode, RangeSpecification path) {
this.dstNode = dstNode;
this.path = path;
}
@Override
public void visit(DfaNode source, DfaEdge srcEdge, DfaNode srcTarget) {
int srcMask = srcEdge.getDigitMask();
dstNode.accept((s, dstEdge, dstTarget) -> {
int dstMask = dstEdge.getDigitMask();
if ((strategy == REQUIRE_EQUAL_EDGES) ? (dstMask == srcMask) : (dstMask & ~srcMask) == 0) {
// The set of digits accepted by the edge being merged (mask) is equal-to or a superset
// of the digits of the edge in the factor we are merging into. The path is extended by
// the destination edge because during recursion we only follow paths already in the
// factor.
recursivelyMerge(srcTarget, dstTarget, path.extendByMask(dstMask));
} else {
partialOverlap |= (dstMask & srcMask) != 0;
}
allDstMask |= dstMask;
});
if (!partialOverlap) {
// Work out the digits that weren't in any of the edges of the factor we were processing
// and merge the sub-tree under that edge into the current factor. For REQUIRE_EQUAL_EDGES
// the extraMask is always either srcMask or 0 (since the edge was either added in full,
// or disjoint with all the existing edges). For ALLOW_EDGE_SPLITTING it's the remaining
// range that wasn't merged with any of the existing paths.
int extraMask = srcMask & ~allDstMask;
if (extraMask != 0) {
new MergingVisitor(path).recurse(srcTarget, extraMask);
}
}
}
}
private final class MergingVisitor implements DfaVisitor {
private final RangeSpecification path;
MergingVisitor(RangeSpecification path) {
this.path = checkNotNull(path);
}
void recurse(DfaNode node, int mask) {
RangeSpecification newPath = path.extendByMask(mask);
if (node.canTerminate()) {
factor = factor.union(RangeTree.from(newPath));
} else {
node.accept(new MergingVisitor(newPath));
}
}
@Override
public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
recurse(target, edge.getDigitMask());
}
}
}

+ 112
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java View File

@ -0,0 +1,112 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.base.CaseFormat.LOWER_CAMEL;
import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PAGER;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PERSONAL_NUMBER;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UAN;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOICEMAIL;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOIP;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PAGER;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PERSONAL_NUMBER;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PREMIUM_RATE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_SHARED_COST;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_TOLL_FREE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UAN;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOICEMAIL;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOIP;
import static java.util.function.Function.identity;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableSet;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType;
import java.util.Optional;
import java.util.stream.Stream;
/** Static utility for conversion of number types. */
public final class Types {
private static final ImmutableBiMap<String, XmlNumberType> XML_TYPE_MAP =
Stream.of(XmlNumberType.values())
.filter(t -> t != XML_UNKNOWN && t != XmlNumberType.UNRECOGNIZED)
.collect(toImmutableBiMap(Types::toXmlName, identity()));
// Map the subset of XmlNumberType values which correspond to valid number types. Note that while
// FIXED_LINE and MOBILE exist in both types, and can be converted, their semantics change.
private static final ImmutableBiMap<XmlNumberType, ValidNumberType> XML_TO_SCHEMA_TYPE_MAP =
ImmutableBiMap.<XmlNumberType, ValidNumberType>builder()
.put(XML_FIXED_LINE, FIXED_LINE)
.put(XML_MOBILE, MOBILE)
.put(XML_PAGER, PAGER)
.put(XML_TOLL_FREE, TOLL_FREE)
.put(XML_PREMIUM_RATE, PREMIUM_RATE)
.put(XML_SHARED_COST, SHARED_COST)
.put(XML_PERSONAL_NUMBER, PERSONAL_NUMBER)
.put(XML_VOIP, VOIP)
.put(XML_UAN, UAN)
.put(XML_VOICEMAIL, VOICEMAIL)
.build();
/** Returns the set of valid XML type names. */
public static ImmutableSet<String> getXmlNames() {
return XML_TYPE_MAP.keySet();
}
/** Returns the XML element name based on the given XML range type. */
public static String toXmlName(XmlNumberType type) {
checkState(type.name().startsWith("XML_"), "Bad type: %s", type);
return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(4));
}
/** Returns the XML element name based on the given XML shortcode type. */
public static String toXmlName(XmlShortcodeType type) {
checkState(type.name().startsWith("SC_"), "Bad type: %s", type);
return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(3));
}
/**
* Returns the XML range type based on the given case-sensitive XML element name (e.g.
* "fixedLine").
*/
public static Optional<XmlNumberType> forXmlName(String xmlName) {
return Optional.ofNullable(XML_TYPE_MAP.get(xmlName));
}
/** Returns the {@code ValidNumberType} equivalent of the given XML range type (if it exists). */
public static Optional<ValidNumberType> toSchemaType(XmlNumberType rangeType) {
return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.get(rangeType));
}
/** Returns the {@code XmlNumberType} equivalent of the given schema range type (if it exists). */
public static Optional<XmlNumberType> toXmlType(ValidNumberType schemaType) {
return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.inverse().get(schemaType));
}
private Types() {}
}

+ 99
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/i18n/PhoneRegion.java View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.i18n;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static java.util.Comparator.comparing;
import static java.util.Comparator.naturalOrder;
import com.google.auto.value.AutoValue;
import com.ibm.icu.util.ULocale;
import java.util.Comparator;
import java.util.regex.Pattern;
/**
* A simple type-safe identifier for CLDR regions for phone numbers. Only basic checking of regions
* is performed, but this should be fine since the set of input regions is tightly controlled.
*
* <p>The metadata tooling makes only minimal use of the semantics of region codes, relying on
* them mainly as key values, and never tries to canonicalize or modify them.
*/
@AutoValue
public abstract class PhoneRegion implements Comparable<PhoneRegion> {
// We limit the non XX region codes to just "world" for this project.
private static final Pattern VALID_CODE = Pattern.compile("[A-Z]{2}|001");
// Since we want "ZZ" < "001" in the ordering.
private static Comparator<PhoneRegion> ORDERING =
comparing(r -> r.locale().getCountry(),
comparing(String::length).thenComparing(naturalOrder()));
private static final PhoneRegion UNKNOWN = of("ZZ");
private static final PhoneRegion WORLD = of("001");
/** Returns the "world" region (001). */
public static PhoneRegion getWorld() {
return PhoneRegion.WORLD;
}
/** Returns the "unknown" region (ZZ). */
public static PhoneRegion getUnknown() {
return PhoneRegion.UNKNOWN;
}
/**
* Returns the region identified by the given case-insensitive CLDR String representation.
*
* @throws IllegalArgumentException if there is no region for {@code cldrCode}
*/
public static PhoneRegion of(String cldrCode) {
checkArgument(VALID_CODE.matcher(cldrCode).matches(), "invalid region code: %s", cldrCode);
return new AutoValue_PhoneRegion(new ULocale.Builder().setRegion(cldrCode).build());
}
@Override
public int compareTo(PhoneRegion other) {
return ORDERING.compare(this, other);
}
/** Returns the string representation for the region (either a two-letter or three-digit code). */
@Override public final String toString() {
String s = locale().getCountry();
checkArgument(!s.isEmpty(), "invalid (empty) country: %s", locale());
return s;
}
// Visible for AutoValue only.
abstract ULocale locale();
/**
* Return an English identifier for the region in the form {@code "<region name> (<cldr code>)"}.
* If the English name is not available, then {@code "Region: <cldr code>"} is returned. This
* This string is only suitable for use in comments.
*
* @throws IllegalStateException if this method is called on the "world" region.
*/
public String getEnglishNameForXmlComments() {
checkState(!equals(getWorld()), "cannot ask for display name of 'world' region");
String regionStr = locale().getCountry();
// Use "US" so we get "en_US", and not just "en", since the policy is to use the name as it
// would appear in America.
String displayCountry = locale().getDisplayCountry(ULocale.US);
return !displayCountry.isEmpty() && !displayCountry.equals(regionStr)
? String.format("%s (%s)", displayCountry, regionStr)
: String.format("Region: %s", regionStr);
}
}

+ 60
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/i18n/SimpleLanguageTag.java View File

@ -0,0 +1,60 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.i18n;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.auto.value.AutoValue;
import java.util.regex.Pattern;
/**
* A simple type-safe identifier for BCP 47 language tags containing only language code and an
* optional script (e.g. "en" or "zh-Hant"). This class does no canonicalization on the values its
* given, apart from normalizing the separator to a hyphen.
*
* <p>We can't really use {@code Locale} here because there's an issue whereby the JDK deliberately
* uses deprecated language tags and would, for example, convert "id" (Indonesian) to "in", which
* is at odds with BCP 47. See {@link java.util.Locale#forLanguageTag(String) forLanguageTag()} for
* more information.
*
* <p>The metadata tooling makes only minimal use of the semantics of language codes, relying on
* them mainly as key values, and never tries to canonicalize or modify them (i.e. it is possible
* that a language code used for this data may end up being non-canonical). It is up to any library
* which loads the metadata at runtime to ensure that its mappings to the data account for current
* canonicalization.
*/
@AutoValue
public abstract class SimpleLanguageTag {
// This can be extended or modified to use Locale as necessary.
private static final Pattern SIMPLE_TAG = Pattern.compile("[a-z]{2,3}(?:[-_][A-Z][a-z]{3})?");
/**
* Returns a language tag instance for the given string with minimal structural checking. If the
* given tag uses {@code '_'} for separating language and script it's converted into {@code '-'}.
*/
public static SimpleLanguageTag of(String lang) {
checkArgument(SIMPLE_TAG.matcher(lang).matches(), "invalid language tag: %s", lang);
return new AutoValue_SimpleLanguageTag(lang.replace('_', '-'));
}
// Visible for AutoValue only.
abstract String lang();
@Override
public final String toString() {
return lang();
}
}

+ 94
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/AltFormatSpec.java View File

@ -0,0 +1,94 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.auto.value.AutoValue;
import com.google.auto.value.extension.memoized.Memoized;
import com.google.common.base.Ascii;
import com.google.common.base.CharMatcher;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatGroup;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate;
import java.util.Optional;
/**
* An alternate format, used to describe less common ways we believe a phone number can be
* formatted in a region. These can be derived from an "alias" in the formats table, or as
* "historical" formats which are not associated with any specific current format.
*
* <p>Note that alternate formats can be defined with the same template, and they are merged
* together to produce a canonical map in which the format template is the key.
*/
@AutoValue
public abstract class AltFormatSpec {
private static final CharMatcher OPT_DIGIT = CharMatcher.is('*');
private static final CharMatcher ANY_DIGIT = CharMatcher.is('X');
private static final CharMatcher ALLOWED_TEMPLATE_CHARS = CharMatcher.anyOf("X* ");
public static AltFormatSpec create(
FormatTemplate template, RangeSpecification prefix, String parent, Optional<String> comment) {
// As only a limited set of chars is allowed, we know things like national prefix or carrier
// codes cannot be present. We're just interested in basic grouping like "XXX XXX**".
String spec = template.getSpecifier();
checkArgument(ALLOWED_TEMPLATE_CHARS.matchesAllOf(spec) && !template.getXmlPrefix().isPresent(),
"invalid alternate format template: %s", template);
// Prefix must be shorter than the template and not contain any trailing 'x'.
checkArgument(prefix.length() <= template.minLength() && prefix.equals(prefix.getPrefix()),
"invalid prefix '%s' for alternate format template: %s", prefix, template);
// If variable length, the spec must have room for the prefix before the '*' characters.
checkArgument(
OPT_DIGIT.matchesNoneOf(spec)
|| prefix.length() <= ANY_DIGIT.countIn(spec.substring(0, OPT_DIGIT.indexIn(spec))),
"invalid prefix '%s' for alternate format template: %s", prefix, template);
return new AutoValue_AltFormatSpec(template, prefix, parent, comment);
}
/** Return the alternate format template containing only simple grouping (e.g. "XXX XXX**"). */
public abstract FormatTemplate template();
/**
* Returns the prefix for this alternate format which (along with the template length) defines
* the bounds over which this format can apply based.
*/
public abstract RangeSpecification prefix();
/** Returns the ID of the format for which this specifier is an alternative. */
public abstract String parentFormatId();
/** Returns the arbitrary comment, possibly containing newlines, for this format. */
public abstract Optional<String> comment();
/** Returns the format specifier as used in the CSV representation (e.g. "20 XXX XXX"). */
@Memoized
public String specifier() {
RangeSpecification prefix = prefix();
int digitIdx = 0;
StringBuilder buf = new StringBuilder();
for (FormatGroup g : template().getGroups()) {
for (int i = 0; i < g.maxLength(); i++, digitIdx++) {
// Uppercasing is so that 'x' --> 'X'
buf.append(digitIdx < prefix.length()
? Ascii.toUpperCase(RangeSpecification.toString(prefix.getBitmask(digitIdx)))
: (i < g.minLength() ? "X" : "*"));
}
buf.append(" ");
}
buf.setLength(buf.length() - 1);
return buf.toString();
}
}

+ 146
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/AltFormatsSchema.java View File

@ -0,0 +1,146 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper;
import static java.util.function.Function.identity;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate;
import com.google.i18n.phonenumbers.metadata.table.CsvParser;
import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Stream;
import javax.annotation.Nullable;
/**
* The schema of the "AltFormats" table with rows identified by an "alternate format specifier":
* <ol>
* <li>{@link #PARENT}: The ID of the "main" format that this is an alternate of.
* <li>{@link #COMMENT}: Freeform comment text.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce the leading column:
* <ol>
* <li>{@code Format}: The alternate format specifier including prefix and grouping information
* (e.g. "20 XXXX XXXX").
* </ol>
*/
public final class AltFormatsSchema {
private static final String FORMAT = "Format";
private static final String PARENT = "Parent Format";
private static final String COMMENT = "Comment";
public static final ImmutableList<String> HEADER = ImmutableList.of(FORMAT, PARENT, COMMENT);
private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace();
private static final RowMapper ROW_MAPPER =
rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected alt-format header: %s", h));
/** Loads the alternate formats from a given file path. */
public static ImmutableList<AltFormatSpec> loadAltFormats(Path path) {
if (!Files.exists(path)) {
return ImmutableList.of();
}
try (Reader csv = Files.newBufferedReader(path)) {
return importAltFormats(csv);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@VisibleForTesting
static ImmutableList<AltFormatSpec> importAltFormats(Reader csv) throws IOException {
List<AltFormatSpec> altFormats = new ArrayList<>();
Consumer<Stream<String>> rowCallback = getRowCallback(altFormats);
try (BufferedReader r = new BufferedReader(csv)) {
CSV_PARSER.parse(r.lines(),
row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText)));
}
return ImmutableList.copyOf(altFormats);
}
public static ImmutableList<AltFormatSpec> importAltFormats(Supplier<List<String>> rows) {
List<AltFormatSpec> altFormats = new ArrayList<>();
Consumer<Stream<String>> rowCallback = getRowCallback(altFormats);
// Expect header row always.
rowCallback.accept(rows.get().stream());
List<String> row;
while ((row = rows.get()) != null) {
rowCallback.accept(row.stream());
}
return ImmutableList.copyOf(altFormats);
}
private static Consumer<Stream<String>> getRowCallback(List<AltFormatSpec> altFormats) {
return ROW_MAPPER.mapTo(
row -> altFormats.add(parseAltFormat(row.get(FORMAT), row.get(PARENT), row.get(COMMENT))));
}
public static AltFormatSpec parseAltFormat(
String altId, String parent, @Nullable String comment) {
// "1X [2-8]XXX** XXX" --> "XX XXXX** XXX"
FormatTemplate template = FormatTemplate.parse(altId.replaceAll("[0-9]|\\[[-0-9]+\\]", "X"));
// "1X [2-8]XXX** XXX" --> "1X [2-8]" --> "1X[2-8]" --> "1x[2-8]"
// The prefix here can (and often will be) the empty string.
// This fails if '*' is ever left in the specification, but that really should not happen.
RangeSpecification prefix = RangeSpecification.parse(
Ascii.toLowerCase(whitespace().removeFrom(altId.replaceAll("[X* ]*$", ""))));
return AltFormatSpec.create(template, prefix, parent, Optional.ofNullable(comment));
}
/** Exports alternate formats to a collector (potentially escaping fields for CSV). */
public static void export(
List<AltFormatSpec> altFormats, Consumer<Stream<String>> collector, boolean toCsv) {
collector.accept(HEADER.stream());
Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity();
altFormats.forEach(
f -> collector.accept(
Stream.of(f.specifier(), f.parentFormatId(), f.comment().map(escapeFn).orElse(""))));
}
/** Helper method to write alternate formats in same CSV format as CsvTable. */
public static boolean exportCsv(Writer csv, List<AltFormatSpec> altFormats) {
if (altFormats.isEmpty()) {
return false;
}
CsvTableCollector collector = new CsvTableCollector(true);
export(altFormats, collector, true);
collector.writeCsv(csv);
return true;
}
private AltFormatsSchema() {}
}

+ 132
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/CommentsSchema.java View File

@ -0,0 +1,132 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper;
import static java.util.Comparator.comparing;
import static java.util.function.Function.identity;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor;
import com.google.i18n.phonenumbers.metadata.table.CsvParser;
import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Stream;
/**
* The data schema for handling XML comments. Note that, unlike other "table" schemas, this does
* not represent comments in the form of a CsvTable. This is because comment anchors can appear
* multiple times in the CSV file (so there's no unique key). This is not an issue since the
* internal data representation handles this, but it just means that code cannot be reused as much.
*/
public class CommentsSchema {
private static final String REGION = "Region";
private static final String LABEL = "Label";
private static final String COMMENT = "Comment";
public static final ImmutableList<String> HEADER = ImmutableList.of(REGION, LABEL, COMMENT);
private static final Comparator<Comment> ORDERING = comparing(Comment::getAnchor);
private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace();
private static final RowMapper ROW_MAPPER =
rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected comment header: %s", h));
/** Loads the comments from a given file path. */
public static ImmutableList<Comment> loadComments(Path path) {
if (!Files.exists(path)) {
return ImmutableList.of();
}
try (Reader csv = Files.newBufferedReader(path)) {
return importComments(csv);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@VisibleForTesting
static ImmutableList<Comment> importComments(Reader csv) throws IOException {
List<Comment> comments = new ArrayList<>();
Consumer<Stream<String>> rowCallback = getRowCallback(comments);
try (BufferedReader r = new BufferedReader(csv)) {
CSV_PARSER.parse(r.lines(),
row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText)));
}
return ImmutableList.sortedCopyOf(ORDERING, comments);
}
public static ImmutableList<Comment> importComments(Supplier<List<String>> rows) {
List<Comment> comments = new ArrayList<>();
Consumer<Stream<String>> rowCallback = getRowCallback(comments);
// Expect header row always.
rowCallback.accept(rows.get().stream());
List<String> row;
while ((row = rows.get()) != null) {
rowCallback.accept(row.stream());
}
return ImmutableList.sortedCopyOf(ORDERING, comments);
}
private static Consumer<Stream<String>> getRowCallback(List<Comment> comments) {
return ROW_MAPPER.mapTo(row -> {
if (row.containsKey(COMMENT)) {
comments.add(
Comment.fromText(
Anchor.of(PhoneRegion.of(row.get(REGION)), row.get(LABEL)),
row.get(COMMENT)));
}
});
}
/** Exports alternate formats to a collector (potentially escaping fields for CSV). */
public static void export(
List<Comment> comments, Consumer<Stream<String>> collector, boolean toCsv) {
collector.accept(HEADER.stream());
Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity();
comments.stream()
.sorted(ORDERING)
.forEach(c -> collector.accept(Stream.of(
c.getAnchor().region().toString(), c.getAnchor().label(), escapeFn.apply(c.toText()))));
}
/** Helper method to write comments in same CSV format as CsvTable. */
public static boolean exportCsv(Writer csv, List<Comment> comments) {
if (comments.isEmpty()) {
return false;
}
CsvTableCollector collector = new CsvTableCollector(true);
export(comments, collector, true);
collector.writeCsv(csv);
return true;
}
}

+ 236
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/CsvData.java View File

@ -0,0 +1,236 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata;
import com.google.auto.value.AutoValue;
import com.google.auto.value.extension.memoized.Memoized;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey;
import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeKey;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode;
import com.google.i18n.phonenumbers.metadata.table.DiffKey;
import com.google.i18n.phonenumbers.metadata.table.DiffKey.Status;
import com.google.i18n.phonenumbers.metadata.table.RangeKey;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
/**
* All CSV based tables and legacy XML for a single calling code. This is the data from which all
* legacy data can be reconstructed (metadata XML, carrier/geocode/timezone mappings).
*
* <p>This is loaded at once, possibly from multiple files, since conversion to legacy formats
* often requires more than one of these data structures.
*/
@AutoValue
public abstract class CsvData {
/** CSV data loading API. */
public interface CsvDataProvider {
/** Loads the top-level metadata table which containing data for all supported calling codes.*/
CsvTable<DigitSequence> loadMetadata() throws IOException;
/** Loads the CSV data for a single calling code. */
CsvData loadData(DigitSequence cc) throws IOException;
}
/**
* Creates a single CsvData instance, either directly or from a provider. The given metadata
* table will have the single row relating to the specified calling code removed.
*/
public static CsvData create(
DigitSequence cc,
CsvTable<DigitSequence> allMetadata,
CsvTable<RangeKey> ranges,
CsvTable<ShortcodeKey> shortcodes,
CsvTable<ExampleNumberKey> examples,
CsvTable<String> formats,
ImmutableList<AltFormatSpec> altFormats,
CsvTable<String> operators,
ImmutableList<Comment> comments) {
// Row keys are unique, so we end up with at most 1 row in the filtered table.
CsvTable<DigitSequence> ccMetadata =
allMetadata.toBuilder().filterRows(r -> r.equals(cc)).build();
checkMetadata(!ccMetadata.getKeys().isEmpty(), "no such calling code %s in metadata", cc);
checkRegions(ccMetadata, ranges, shortcodes);
checkNoOverlappingRows(ranges);
checkNoOverlappingShortcodeRows(shortcodes);
return new AutoValue_CsvData(
cc, ccMetadata, ranges, shortcodes, examples, formats, altFormats, operators, comments);
}
private static void checkNoOverlappingRows(CsvTable<RangeKey> csv) {
RangeTree allRanges = RangeTree.empty();
for (RangeKey key : csv.getKeys()) {
RangeTree ranges = key.asRangeTree();
checkMetadata(allRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key);
allRanges = allRanges.union(ranges);
}
}
private static void checkNoOverlappingShortcodeRows(CsvTable<ShortcodeKey> csv) {
Map<PhoneRegion, RangeTree> allRangesMap = new HashMap<>();
for (ShortcodeKey key : csv.getKeys()) {
RangeTree allRegionRanges = allRangesMap.getOrDefault(key.getRegion(), RangeTree.empty());
RangeTree ranges = key.getRangeKey().asRangeTree();
checkMetadata(allRegionRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key);
allRangesMap.put(key.getRegion(), allRegionRanges.union(ranges));
}
}
private static void checkRegions(
CsvTable<DigitSequence> metadata,
CsvTable<RangeKey> ranges,
CsvTable<ShortcodeKey> shortcodes) {
DigitSequence cc = Iterables.getOnlyElement(metadata.getKeys());
PhoneRegion mainRegion = metadata.getOrDefault(cc, MetadataTableSchema.MAIN_REGION);
Regions extraRegions = metadata.getOrDefault(cc, MetadataTableSchema.EXTRA_REGIONS);
ImmutableSet<PhoneRegion> csvRegions = ranges
.getValues(RangesTableSchema.CSV_REGIONS).stream()
.flatMap(r -> r.getValues().stream())
.collect(toImmutableSet());
if (extraRegions.getValues().isEmpty()) {
checkMetadata(csvRegions.size() == 1 && csvRegions.contains(mainRegion),
"inconsistent regions:\nmetadata: %s\nranges table: %s", mainRegion, csvRegions);
} else {
checkMetadata(!extraRegions.getValues().contains(mainRegion),
"invalid metadata: main region is duplicated in 'extra regions' column");
checkMetadata(
csvRegions.contains(mainRegion)
&& csvRegions.containsAll(extraRegions.getValues())
&& csvRegions.size() == extraRegions.getValues().size() + 1,
"inconsistent regions:\nmetadata: %s + %s\nranges table: %s",
mainRegion, extraRegions, csvRegions);
}
ImmutableSet<PhoneRegion> shortcodeRegions =
shortcodes.getKeys().stream().map(ShortcodeKey::getRegion).collect(toImmutableSet());
checkMetadata(csvRegions.containsAll(shortcodeRegions),
"unexpected regions for shortcodes:\nmetadata: %s\nshortcode regions: %s",
csvRegions, shortcodeRegions);
}
/** The difference between two CSV snapshots captured as a set of CVS tables. */
@AutoValue
public abstract static class Diff {
private static <K> Optional<CsvTable<DiffKey<K>>> diff(CsvTable<K> lhs, CsvTable<K> rhs) {
CsvTable<DiffKey<K>> diff = CsvTable.diff(lhs, rhs, DiffMode.CHANGES);
if (diff.getKeys().stream().anyMatch(k -> k.getStatus() != Status.UNCHANGED)) {
return Optional.of(diff);
}
return Optional.empty();
}
// Visible for AutoValue
Diff() {}
/** Returns the contextualized diff of the ranges table. */
public abstract Optional<CsvTable<DiffKey<RangeKey>>> rangesDiff();
/** Returns the contextualized diff of the shortcodes table. */
public abstract Optional<CsvTable<DiffKey<ShortcodeKey>>> shortcodesDiff();
/** Returns the contextualized diff of the examples table. */
public abstract Optional<CsvTable<DiffKey<ExampleNumberKey>>> examplesDiff();
/** Returns the contextualized diff of the formats table. */
public abstract Optional<CsvTable<DiffKey<String>>> formatsDiff();
/** Returns the contextualized diff of the operators table. */
public abstract Optional<CsvTable<DiffKey<String>>> operatorsDiff();
}
/** Creates the diff between two CSV data snapshots. */
public static Diff diff(CsvData before, CsvData after) {
// TODO: Add diffing for comments and/or alternate formats.
return new AutoValue_CsvData_Diff(
Diff.diff(before.getRanges(), after.getRanges()),
Diff.diff(before.getShortcodes(), after.getShortcodes()),
Diff.diff(before.getExamples(), after.getExamples()),
Diff.diff(before.getFormats(), after.getFormats()),
Diff.diff(before.getOperators(), after.getOperators()));
}
// Visible for AutoValue
CsvData() {}
/** Returns the calling code for this CSV data. */
public abstract DigitSequence getCallingCode();
/**
* Returns the single row of the metadata table for the calling code (see
* {@code MetadataTableSchema}).
*/
public abstract CsvTable<DigitSequence> getMetadata();
/** Returns the ranges table for the calling code (see {@code RangesTableSchema}) */
public abstract CsvTable<RangeKey> getRanges();
/** Returns the shortcode table for the calling code (see {@code ShortcodesTableSchema}) */
public abstract CsvTable<ShortcodeKey> getShortcodes();
/** Returns the examples table for the calling code (see {@code ExamplesTableSchema}). */
public abstract CsvTable<ExampleNumberKey> getExamples();
/** Returns the format table for the calling code (see {@code FormatsTableSchema}). */
public abstract CsvTable<String> getFormats();
/**
* Returns the alternate format table for the calling code (see {@code AltFormatsTableSchema}).
*/
public abstract ImmutableList<AltFormatSpec> getAltFormats();
/** Returns the operator table for the calling code (see {@code OperatorsTableSchema}). */
public abstract CsvTable<String> getOperators();
/** Returns the set of comments for the calling code. */
public abstract ImmutableList<Comment> getComments();
@Memoized
public RangeTable getRangesAsTable() {
return RangesTableSchema.toRangeTable(getRanges());
}
@Memoized
public ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodesAsTables() {
return ShortcodesTableSchema.toShortcodeTables(getShortcodes());
}
@Memoized
public ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExamplesAsTable() {
return ExamplesTableSchema.toExampleTable(getExamples());
}
/** Canonicalizes range tables in the CSV data. This is potentially slow for large regions. */
// TODO: Is there any way to reliably detect canonical CSV for sub-regions?
public final CsvData canonicalizeRangeTables() {
CsvTable<RangeKey> ranges = RangesTableSchema.toCsv(getRangesAsTable());
CsvTable<ShortcodeKey> shortcodes = ShortcodesTableSchema.toCsv(getShortcodesAsTables());
return create(
getCallingCode(),
getMetadata(),
ranges,
shortcodes,
getExamples(),
getFormats(),
getAltFormats(),
getOperators(),
getComments()
);
}
}

+ 126
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java View File

@ -0,0 +1,126 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey.ORDERING;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Table;
import com.google.common.collect.Table.Cell;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
/**
* The schema of the "Example Numbers" table with rows keyed by {@link ExampleNumberKey} and
* columns:
* <ol>
* <li>{@link #NUMBER}: The national number
* <li>{@link #COMMENT}: Evidence for why an example number was chosen.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce leading columns:
* <ol>
* <li>{@code Region}: The region code of the example number.
* <li>{@code Type}: The {@link ValidNumberType} of the example number.
* </ol>
*/
public final class ExamplesTableSchema {
/** A key for rows in the example numbers table. */
@AutoValue
public abstract static class ExampleNumberKey {
public static final Comparator<ExampleNumberKey> ORDERING =
Comparator.comparing(ExampleNumberKey::getRegion).thenComparing(ExampleNumberKey::getType);
public static ExampleNumberKey of(PhoneRegion region, ValidNumberType type) {
return new AutoValue_ExamplesTableSchema_ExampleNumberKey(region, type);
}
public abstract PhoneRegion getRegion();
public abstract ValidNumberType getType();
}
/** A number column containing the digit sequence of a national number. */
public static final Column<DigitSequence> NUMBER = Column.create(
DigitSequence.class, "Number", DigitSequence.empty(), DigitSequence::of);
/** A general comment field, usually describing how an example number was determined. */
public static final Column<String> COMMENT = Column.ofString("Comment");
private static final CsvKeyMarshaller<ExampleNumberKey> MARSHALLER = new CsvKeyMarshaller<>(
ExamplesTableSchema::write,
ExamplesTableSchema::read,
Optional.of(ORDERING),
"Region",
"Type");
private static final Schema COLUMNS = Schema.builder()
.add(NUMBER)
.add(COMMENT)
.build();
/** Schema instance defining the example numbers CSV table. */
public static final CsvSchema<ExampleNumberKey> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS);
/**
* Converts a {@link Table} of example numbers into a {@link CsvTable}, using
* {@link ExampleNumberKey}s as row keys.
*/
public static CsvTable<ExampleNumberKey> toCsv(
Table<PhoneRegion, ValidNumberType, DigitSequence> table) {
ImmutableTable.Builder<ExampleNumberKey, Column<?>, Object> out = ImmutableTable.builder();
out.orderRowsBy(ORDERING).orderColumnsBy(COLUMNS.ordering());
for (Cell<PhoneRegion, ValidNumberType, DigitSequence> c : table.cellSet()) {
out.put(ExampleNumberKey.of(c.getRowKey(), c.getColumnKey()), NUMBER, c.getValue());
}
return CsvTable.from(SCHEMA, out.build());
}
/**
* Converts a {@link Table} of example numbers into a {@link CsvTable}, using
* {@link ExampleNumberKey}s as row keys.
*/
public static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence>
toExampleTable(CsvTable<ExampleNumberKey> csv) {
ImmutableTable.Builder<PhoneRegion, ValidNumberType, DigitSequence> out =
ImmutableTable.builder();
for (ExampleNumberKey k : csv.getKeys()) {
out.put(k.getRegion(), k.getType(), csv.getOrDefault(k, NUMBER));
}
return out.build();
}
private static Stream<String> write(ExampleNumberKey key) {
return Stream.of(key.getRegion().toString(), key.getType().toString());
}
private static ExampleNumberKey read(List<String> parts) {
return ExampleNumberKey.of(
PhoneRegion.of(parts.get(0)), ValidNumberType.valueOf(parts.get(1)));
}
private ExamplesTableSchema() {}
}

+ 68
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java View File

@ -0,0 +1,68 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.model.CsvData.CsvDataProvider;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import java.io.IOException;
import java.nio.file.Path;
/**
* A CSV provider which reads files rooted in a given directory. The file layout should match that
* in the CSV metadata directory ({@code googledata/third_party/i18n/phonenumbers/metadata}).
*/
public final class FileBasedCsvLoader implements CsvDataProvider {
/** Returns a CSV loader which reads files from the given base directory. */
public static FileBasedCsvLoader using(Path dir) throws IOException {
return new FileBasedCsvLoader(dir);
}
private final Path root;
private final CsvTable<DigitSequence> metadata;
private FileBasedCsvLoader(Path root) throws IOException {
this.root = checkNotNull(root);
this.metadata = MetadataTableSchema.SCHEMA.load(root.resolve("metadata.csv"));
}
@Override
public CsvTable<DigitSequence> loadMetadata() {
return metadata;
}
@Override
public CsvData loadData(DigitSequence cc) throws IOException {
Path ccDir = root.resolve(cc.toString());
return CsvData.create(
cc,
metadata,
RangesTableSchema.SCHEMA.load(csvFile(ccDir, "ranges")),
ShortcodesTableSchema.SCHEMA.load(csvFile(ccDir, "shortcodes")),
ExamplesTableSchema.SCHEMA.load(csvFile(ccDir, "examples")),
FormatsTableSchema.SCHEMA.load(csvFile(ccDir, "formats")),
AltFormatsSchema.loadAltFormats(csvFile(ccDir, "altformats")),
OperatorsTableSchema.SCHEMA.load(csvFile(ccDir, "operators")),
CommentsSchema.loadComments(csvFile(ccDir, "comments"))
);
}
private static Path csvFile(Path dir, String name) {
return dir.resolve(name + ".csv");
}
}

+ 637
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatSpec.java View File

@ -0,0 +1,637 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import com.google.auto.value.AutoValue;
import com.google.common.base.CharMatcher;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.function.ToIntFunction;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* A specifier for the three types of format available in a formatting rule, "national",
* "international" and "carrier specific". Each format is represented by a single string which acts
* as a format template, and from which the necessary XML regular expressions can be recovered.
*
* <p>The basic syntax of a specifier is something like {@code "XX XXX-XXXX"}, where '{@code X}'
* represents a digit from the phone number being formatted. When converted into the legacy XML
* syntax, a national specifier with this format would represent the "pattern" attribute
* {@code "(\d{2})(\d{3})(\d{4})"} and the "format" element {@code "$1 $2-$3"}.
*
* <p>By adding the '{@code *}' character, one group of variable length may be defined. Thus
* {@code "XX XXX-XX**"} represents the pattern {@code "(\d{2})(\d{3})(\d{2,4})"}.
*
* <p>If the national prefix should be present, for either national or carrier specific formatting,
* it is represented by the '{@code #}' symbol. Similarly, for carrier specific formatting, the
* '{@code @}' symbol represents the carrier code placeholder (and must be present exactly once in
* any carrier specific format specifier).
*
* <p>By analyzing the unique prefixes of both national and carrier specific specifiers, the XML
* syntax can be derived. In a fairly simple example, the format specifiers:
* <ul>
* <li>national: {@code "(#XX) XXX-XXXX"}
* <li>carrier: {@code "#@ XX XXX-XXXX"}
* <li>international: {@code "XX XXX XXXX"}
* </ul>
* would result in:
* <ul>
* <li>pattern: {@code "(\d{2})(\d{3})(\d{4})"}
* <li>national_prefix_formatting_rule: {@code "($NP$FG)"}
* <li>carrier_specific_formatting_rule: {@code "$NP$CC $FG"}
* <li>format: {@code "$1 $2-$3"}
* <li>international_format: {@code "$1 $2 $3"}
* </ul>
* The derived "pattern" groups must be the same between all specifiers, while the "national" and
* "carrier" specifiers must share a common suffix after the "first group". This is a limitation of
* the XML representation which must be preserved here.
*
* <p>If no carrier specific format specifier is present, the extraction of a format rule will
* still occur (since the formatting rule also affects "as you type" formatting"). Thus:
* <ul>
* <li>national: {@code "(XX) XXX"}
* </ul>
* will result in:
* <ul>
* <li>format: {@code "$1 $2"}
* <li>national_prefix_formatting_rule: {@code "($FG)"}
* </ul>
* and not:
* <ul>
* <li>format: {@code "($1) $2"}
* </ul>
*
* <p>An international format specifier must exist if international formatting is possible (even if
* it is identical to the national format specifier). If no international specifier exists, then
* the range of phone numbers associated with this format must be a subset of the "no international
* dialling" range, and the derived XML element "intlFormat" will contain the value "NA".
*
* <p>If literal characters such as "*" are required to be present in the format string, they can
* be escaped via a '{@code \}' (backslash) character. The set of characters that might need
* escaping is '{@code X}', '{@code *}', '{@code #}' and '{@code @}'. Note that the dollar symbol
* '{@code $}' is special, and is prohibited from ever appearing in a format specifier (even though
* it's not strictly part of the syntax).
*
* <p>A {@code FormatSpec} also defines the ranges of numbers for which this format applies. This
* is a {@link RangeTree}, rather than a {@code PrefixTree}, since length matters (different
* formats are sometimes distinguished purely on the basis of number length). The possible lengths
* of the range tree must match the possible lengths of all defined specifier strings.
*/
@AutoValue
public abstract class FormatSpec {
/**
* Returns a format specifier from the serialized fields. Note that the given non-local
* specifiers must share certain properties (e.g. same number of format groups, same min/max
* length, same trailing group format). Some of this is necessary due to limitations in how
* formats are represented in the legacy XML schema (e.g. between national and carrier specific
* formats). Exceptions are raised when any of these properties are violated.
*
* @param nationalSpec the national format specifier string (can contain \-escaped characters).
* @param carrierSpec the optional carrier format specifier string.
* @param intlSpec the optional international format specifier string.
* @param localSpec additional local format specifier string.
* @param nationalPrefixOptional allows the national prefix omitted during parsing even if
* present in the format, or given during parsing when not present in the format.
* @param comment a free-from comment for this specifier.
*/
public static FormatSpec of(
String nationalSpec,
Optional<String> carrierSpec,
Optional<String> intlSpec,
Optional<String> localSpec,
boolean nationalPrefixOptional,
Optional<Comment> comment) {
FormatTemplate national = FormatTemplate.parse(nationalSpec);
checkArgument(!national.hasCarrierCode(),
"national format specifier must not contain carrier code: %s", nationalSpec);
Optional<FormatTemplate> carrier = carrierSpec.map(s -> parseCarrierSpec(s, national));
Optional<FormatTemplate> intl = intlSpec.map(s -> parseIntlSpec(s, national));
Optional<FormatTemplate> local = localSpec.map(s -> parseLocalSpec(s, national));
int minLength = national.minLength();
int maxLength = national.maxLength();
return new AutoValue_FormatSpec(
national, carrier, intl, local, minLength, maxLength, nationalPrefixOptional, comment);
}
/**
* Returns a local format specifier for the given template. Local specifiers only have a national
* template and national prefix prohibited.
*/
public static FormatSpec localFormat(FormatTemplate local) {
checkArgument(!local.hasNationalPrefix(),
"a local template must not have national prefix: %s", local);
return new AutoValue_FormatSpec(
local,
Optional.empty(),
Optional.empty(),
Optional.empty(),
local.minLength(),
local.maxLength(),
false,
Optional.empty());
}
/** Returns the national format template (e.g. "#XX XXX XXXX"). */
public abstract FormatTemplate national();
/** Returns the carrier specific format template (e.g. "(@ #XX) XXX XXXX"). */
public abstract Optional<FormatTemplate> carrier();
/** Returns the international format template (e.g. "XX-XXX-XXXX"). */
public abstract Optional<FormatTemplate> international();
/**
* Returns the local format template (e.g. "XXX-XXXX"). Local formats must correspond to the
* "Area Code Length" values in at least some of the ranges to which they are assigned.
*/
public abstract Optional<FormatTemplate> local();
/** Returns the minimum number of digits which this format matches. */
public abstract int minLength();
/** Returns the maximum number of digits which this format matches. */
public abstract int maxLength();
/**
* Returns whether, for formats without a national prefix specified, it is still possible to
* trigger this format by adding a national prefix (even though its is not shown). Formats for
* which this method returns {@code true} are grouped alongside formats with an explicit national
* prefix (since they must be ordered carefully with respect to each other to account for their
* "leading digits").
*/
public abstract boolean nationalPrefixOptional();
/** Returns the free-form comment associated with this format specifier. */
public abstract Optional<Comment> comment();
/**
* Returns the length based bounds for this format (e.g. all digit sequences between the minimum
* and maximum lengths).
*/
public RangeTree getLengthBasedBounds() {
return RangeTree.from(IntStream.rangeClosed(minLength(), maxLength())
.mapToObj(RangeSpecification::any));
}
@Override
public final String toString() {
StringBuilder out = new StringBuilder("FormatSpec{national=").append(national());
carrier().ifPresent(t -> out.append(", carrier=").append(t));
local().ifPresent(t -> out.append(", local=").append(t));
international().ifPresent(t -> out.append(", international=").append(t));
out.append(", minLength=").append(minLength());
out.append(", maxLength=").append(maxLength());
comment().ifPresent(c -> out.append(", comment='").append(c).append('\''));
return out.append('}').toString();
}
// ---- RULE PARSING AND CONVERSION METHODS ----
private static FormatTemplate parseCarrierSpec(String spec, FormatTemplate national) {
FormatTemplate carrier = FormatTemplate.parse(spec);
checkArgument(carrier.hasCarrierCode(),
"carrier format specifier must contain carrier code: %s", spec);
// This verifies the groups have the same lengths, but does not check for same formatting.
checkArgument(carrier.isCompatibleWith(national),
"carrier format specifier must have compatible groups: %s - %s",
national.getSpecifier(), spec);
// This is really ugly, since carrier formats must share the same format in the legacy XML, but
// can have different formatting rules for the first group. The best way to test this is just
// compare the XML output directly instead of trying to reason about groups, since group replace
// also needs to be taken into account.
checkArgument(carrier.getXmlFormat().equals(national.getXmlFormat()),
"carrier format specifier must have equal trailing groups: %s - %s",
national.getSpecifier(), spec);
// Artificial check (currently true everywhere and likely to never be broken). If this is ever
// relaxed, the nationalPrefixForParsing regeneration code will need changing to take account
// of ordering (e.g. generate "(<CC>)?<NP>" instead of "<NP>(<CC>)?").
checkArgument(!carrier.hasNationalPrefix() || spec.indexOf('#') < spec.indexOf('@'),
"national prefix must precede carrier code in carrier format: %s", spec);
return carrier;
}
private static FormatTemplate parseIntlSpec(String spec, FormatTemplate national) {
FormatTemplate intl = FormatTemplate.parse(spec);
// In theory this could be relaxed, but then when the spec is written it cannot just call
// getFormat(). For now, it's always true the international formats don't have "fancy"
// formatting around the first group (i.e. never "(XXX) XXX XXX") which makes sense since
// international formats cannot be assumed to be read by people with local knowledge.
// TODO: To reactivate this check after we are sure that first digit of
// SN of MX is no more 1 and need not to be swallowed when formatting i.e after parsing change.
// Context: We have disabled the following check to fix a MX formatting issue i.e using this
// logic {X>} to remove the mobile token(1) in international format, which is the first digit of
// the mobile subscriber number. More details in b/111967450. In general, international
// format should not have such special formatting. Can be fixed as part of b/138727490.
// checkArgument(!intl.getXmlPrefix().isPresent(),
// "international format specifier must not have separate prefix: %s", spec);
checkArgument(
!intl.hasNationalPrefix(),
"international format specifier must not contain national prefix: %s",
spec);
checkArgument(!intl.hasCarrierCode(),
"international format specifier must not contain carrier code: %s", spec);
checkArgument(intl.isCompatibleWith(national),
"international format specifier must have compatible groups: %s - %s",
national.getSpecifier(), spec);
return intl;
}
private static FormatTemplate parseLocalSpec(String spec, FormatTemplate national) {
FormatTemplate local = FormatTemplate.parse(spec);
checkArgument(!local.getXmlPrefix().isPresent(),
"local format specifier must not have separate prefix: %s", spec);
checkArgument(!local.hasNationalPrefix(),
"local format specifier must not contain national prefix: %s", spec);
checkArgument(!local.hasCarrierCode(),
"local format specifier must not contain carrier code: %s", spec);
checkArgument(local.minLength() < national.minLength(),
"local format specifier must be shorter than the national format: %s - %s",
national.getSpecifier(), spec);
return local;
}
// ---- TEMPLATE CLASSES ----
/**
* A single template corresponding to a format specifier such as {@code "(# XXX) XXX-XXXX"}.
* A template represents one of the types of format (national, international, carrier specific)
* and enforces as much structural correctness as possible.
*
* <p>Templates bridge between the specifier syntax and the XML syntax, with its split prefixes
* and confusing semantics. As such, there's a lot of slightly subtle business logic in the
* parsing of templates that, over time, might need to adapt to real world changes (e.g. suffix
* separators and precise expectations of format structure).
*/
@AutoValue
public abstract static class FormatTemplate {
// This could be extended, but must never overlap with the escape characters used in the
// "skeleton" string. It must also always be limited to the Basic Multilingual Plane (BMP).
// It's really important that '$' is never a meta-character in this syntax, since we escape
// strings like "$FG" which would otherwise be broken.
private static final CharMatcher VALID_TEMPLATE_CHARS =
CharMatcher.ascii().and(CharMatcher.javaIsoControl().negate()).and(CharMatcher.isNot('$'));
private static final CharMatcher VALID_METACHARS = CharMatcher.anyOf("#@X*{>}\\");
// Need to include '$' as a separator, since groups can abut.
private static final CharMatcher SUFFIX_SEPARATOR = CharMatcher.anyOf(". /-$");
private static final char NATIONAL_PREFIX = '#';
private static final char CARRIER_CODE = '@';
private static final char REQUIRED_DIGIT = 'X';
private static final char OPTIONAL_DIGIT = '*';
private static final char SUBSTITUTION_START = '{';
private static final char SUBSTITUTION_MAP = '>';
private static final char SUBSTITUTION_END = '}';
private static final String ESCAPED_NATIONAL_PREFIX = "$NP";
private static final String ESCAPED_CARRIER_CODE = "$CC";
static FormatTemplate parse(String spec) {
checkArgument(VALID_TEMPLATE_CHARS.matchesAllOf(spec),
"illegal characters in template: %s", spec);
List<FormatGroup> groups = new ArrayList<>();
StringBuilder skeleton = new StringBuilder();
boolean hasNationalPrefix = false;
boolean hasCarrierCode = false;
boolean hasVariableLengthGroup = false;
// Used to avoid abutting groups (i.e. "XXX**XX").
boolean canStartGroup = true;
for (int n = 0; n < spec.length(); n++) {
char c = spec.charAt(n);
if (c == REQUIRED_DIGIT) {
checkArgument(canStartGroup, "illegal group start: %s", spec);
FormatGroup group = extractGroup(spec, n);
checkArgument(!(hasVariableLengthGroup && group.isVariableLength()),
"multiple variable length groups not allowed: %s", spec);
hasVariableLengthGroup = group.isVariableLength();
groups.add(group);
skeleton.append(escapeGroupNumber(groups.size()));
// Move to the last character of the group (since we increment again as we loop).
n += group.maxLength() - 1;
canStartGroup = false;
continue;
}
if (c == SUBSTITUTION_START) {
// Expect {GROUP>REPLACEMENT} where group can have optional digits (but normally won't).
checkArgument(canStartGroup, "illegal group start: %s", spec);
checkArgument(spec.charAt(n + 1) == REQUIRED_DIGIT,
"illegal group replacement start: %s", spec);
FormatGroup group = extractGroup(spec, n + 1);
checkArgument(!(hasVariableLengthGroup && group.isVariableLength()),
"multiple variable length groups not allowed: %s", spec);
hasVariableLengthGroup = group.isVariableLength();
// Now expect mapping character and substitution string.
n += group.maxLength() + 1;
checkArgument(spec.charAt(n) == SUBSTITUTION_MAP,
"illegal group replacement (expected %s): '%s'", SUBSTITUTION_MAP, spec);
int end = spec.indexOf(SUBSTITUTION_END, n + 1);
checkArgument(end != -1, "missing group replacement end: %s", spec);
groups.add(group.withReplacement(spec.substring(n + 1, end)));
skeleton.append(escapeGroupNumber(groups.size()));
// Unlike the "normal" case above, you can start another group immediately after this
// (since the {,} make it unambiguous).
n = end;
continue;
}
canStartGroup = true;
if (c == NATIONAL_PREFIX) {
checkArgument(!hasNationalPrefix, "multiple national prefixes not allowed: %s", spec);
hasNationalPrefix = true;
skeleton.append(ESCAPED_NATIONAL_PREFIX);
continue;
}
if (c == CARRIER_CODE) {
checkArgument(!hasCarrierCode, "multiple carrier codes not allowed: %s", spec);
hasCarrierCode = true;
skeleton.append(ESCAPED_CARRIER_CODE);
continue;
}
if (c == '\\') {
// Blows up if trailing '\', but that's fine.
c = spec.charAt(++n);
checkArgument(VALID_METACHARS.matches(c), "invalid escaped character '%s': %s", c, spec);
} else {
checkArgument(c != OPTIONAL_DIGIT, "unexpected optional marker: %s", spec);
}
skeleton.append(c);
}
checkArgument(!groups.isEmpty(), "format specifiers must have at least one group: %s", spec);
// Find the first group which has a replacement (one must exist). This is important for
// determining where the prefix and suffix should be split when considering hoisting the
// prefix into a format rule (see getSuffixStart() / getXmlPrefix() / getXmlFormat()).
int fgIndex = 0;
while (fgIndex < groups.size() && groups.get(fgIndex).replacement().isPresent()) {
fgIndex++;
}
checkArgument(fgIndex < groups.size(), "cannot replace all groups in a template: %s", spec);
return new AutoValue_FormatSpec_FormatTemplate(
spec,
hasNationalPrefix,
hasCarrierCode,
ImmutableList.copyOf(groups),
fgIndex,
skeleton.toString());
}
/**
* Returns the specifier string (e.g. "# XXX-XXXX") which is the serialized form of the
* template.
*/
public abstract String getSpecifier();
/** Whether this template formats a national prefix. */
public abstract boolean hasNationalPrefix();
/** Whether this template formats a carrier selection code prefix. */
public abstract boolean hasCarrierCode();
/** Returns the information about the groups in this template. */
public abstract ImmutableList<FormatGroup> getGroups();
/**
* Returns the index of the first group which does not have a replacement (at least one must).
*/
public abstract int getFirstAvailableGroupIndex();
// This is an internal representation of the format string used by the XML. It differs in that
// it isn't split into prefix and suffix (as required in some situations for the XML). As such
// it only contains "$NP", "$CC", "$<N>", but never "$FG". All valid specifier skeletons must
// contain "$1"..."$<N>" rather than any replacement strings.
abstract String skeleton();
/** Returns the minumin number of digits which can be matched by this template. */
public int minLength() {
return getLength(this, FormatGroup::minLength);
}
/** Returns the maximum number of digits which can be matched by this template. */
public int maxLength() {
return getLength(this, FormatGroup::maxLength);
}
/**
* Returns the maximum number of digits which can be formatted as a single block by this
* template. If no more than this number of digits are entered, they will be formatted as a
* single block by this template.
*
* <p>This is useful when calculating the leading digits of a format since it might be
* acceptable to match shortcodes to some formats if they would still format the shortcode
* within the first block. This simplifies the leading digits in some cases.
*/
public int getBlockFormatLength() {
// If only one group everything is a block, otherwise take the minimum length of the first
// group.
return (getGroups().size() == 1) ? maxLength() : getGroups().get(0).minLength();
}
/** Returns a regex to capture the groups for this template (e.g. "(\d{3})(\d{4,5})") */
public String getXmlCapturingPattern() {
return getGroups().stream()
.map(FormatGroup::toRegex)
.collect(Collectors.joining(")(", "(", ")"));
}
/**
* Returns the format string for use in the XML (e.g. "$1 $2-$3").
*
* <p>For example given the following templates:
* <ul>
* <li>{@code "XXX XXX-XXX"} ==> {@code "$1 $2-$3"}
* <li>{@code "(#XXX) XXX-XXX"} ==> {@code "$1 $2-$3"} (the prefix is hoisted)
* <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$2-$3"} ($1 was replaced and hoisted)
* <li>{@code "{X>}XXX-XXX"} ==> {@code "$2-$3"} ($1 was removed)
* </ul>
*/
public String getXmlFormat() {
int fgIndex = getFirstAvailableGroupIndex();
// Always replace the prefix with $N (which is what $FG maps to). This might be a no-op.
String format = "$" + (fgIndex + 1) + skeleton().substring(getSuffixStart());
// Finally do any group replacement from the skeleton after the "first available group".
//
// Note that this code isn't exercised in data at the moment (2018) but is here to avoid
// needing to place artificial limitations on where group replacement can occur.
for (int n = fgIndex + 1; n < getGroups().size(); n++) {
Optional<String> replacement = getGroups().get(n).replacement();
if (replacement.isPresent()) {
format = format.replace("$" + (n + 1), replacement.get());
}
}
return format;
}
/**
* Returns the format prefix for use in the XML formatting rules (e.g. "($NP $FG)"). If the
* calculated prefix is just "$FG" then nothing is returned (since that's a no-op value).
*
* <p>For example given the following templates:
* <ul>
* <li>{@code "XXX XXX-XXX"} ==> XML prefix is empty
* <li>{@code "(#XXX) XXX-XXX"} ==> {@code "($NP$FG)"}
* <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$NP123 $FG"}
* <li>{@code "{X>}XXX-XXX"} ==> XML prefix is empty (but the format will not contain $1)
* </ul>
*/
public Optional<String> getXmlPrefix() {
String prefix = skeleton().substring(0, getSuffixStart());
// We know that "$<fgIndex + 1>" (substitutions are 1-indexed) is in the prefix and
// should be replaced with "$FG", and everything before that has a replacement.
int fgIndex = getFirstAvailableGroupIndex();
for (int n = 0; n < fgIndex; n++) {
// Everything before the "first available group" must have a replacement (by definition).
prefix = prefix.replace("$" + (n + 1), getGroups().get(n).replacement().get());
}
prefix = prefix.replace("$" + (fgIndex + 1), "$FG");
checkState(prefix.contains("$FG"),
"XML prefix must always contain '$FG' (this must be a code error): %s", prefix);
// After all this work we could still end up with a no-op substitution!
return prefix.equals("$FG") ? Optional.empty() : Optional.of(prefix);
}
/**
* Returns whether all groups have the same "structure" (i.e. min/max length). They can
* differ in terms of having replacements however.
*/
boolean isCompatibleWith(FormatTemplate other) {
if (getGroups().size() != other.getGroups().size()) {
return false;
}
for (int n = 0; n < getGroups().size(); n++) {
if (!getGroups().get(n).isCompatibleWith(other.getGroups().get(n))) {
return false;
}
}
return true;
}
private int getSuffixStart() {
// This is only safe because "\$1" cannot be present ('$' cannot be escaped).
int suffixStart = SUFFIX_SEPARATOR.indexIn(skeleton(), skeleton().indexOf("$1") + 1);
// If no suffix start found, the entire skeleton is the prefix.
if (suffixStart == -1) {
suffixStart = skeleton().length();
}
// Now account for the fact that the first group (and others) could have replacements, which
// pushes the suffix start to just after the "first available group" (which is what becomes
// $FG). If the first available group is "$1" then we just get suffixStart.
int fgNumber = getFirstAvailableGroupIndex() + 1;
checkState(fgNumber < 10, "invalid first group number: %s", fgNumber);
return Math.max(suffixStart, skeleton().indexOf("$" + fgNumber) + 2);
}
@Override
public final String toString() {
return getSpecifier();
}
private static int getLength(FormatTemplate template, ToIntFunction<FormatGroup> lengthFn) {
return template.getGroups().stream().mapToInt(lengthFn).sum();
}
private static FormatGroup extractGroup(String template, int start) {
// We know that 'start' references a group start (i.e. 'X') so length must be at least 1.
int endRequired = findEndOf(REQUIRED_DIGIT, template, start);
int endGroup = findEndOf(OPTIONAL_DIGIT, template, endRequired);
return FormatGroup.of(endRequired - start, endGroup - start);
}
private static int findEndOf(char c, String template, int start) {
int endRequired = CharMatcher.isNot(c).indexIn(template, start);
return endRequired != -1 ? endRequired : template.length();
}
private static String escapeGroupNumber(int n) {
checkArgument(n >= 1 && n <= 9, "bad group number: %s", n);
return "$" + n;
}
}
/** Represents contiguous digit groups in a format (e.g. "XXX" or "XXX***"). */
@AutoValue
public abstract static class FormatGroup {
private static FormatGroup of(int min, int max) {
checkArgument(max >= min, "bad group lengths: %s, %s", min, max);
return new AutoValue_FormatSpec_FormatGroup(min, max, Optional.empty());
}
private FormatGroup withReplacement(String s) {
return new AutoValue_FormatSpec_FormatGroup(minLength(), maxLength(), Optional.of(s));
}
/** Returns the minimum number of digits in this group. */
public abstract int minLength();
/** Returns the maximum number of digits in this group. */
public abstract int maxLength();
/** Returns the optional, arbitrary (possibly empty) replacement string for this group. */
abstract Optional<String> replacement();
/**
* Returns if this group can match a variable number of digits. Only one group in any format
* specifier can have variable length.
*/
private boolean isVariableLength() {
return maxLength() > minLength();
}
/**
* Returns whether two groups have the same "structure" (i.e. min/max lengths), but does not
* compare replacement values. Used only for internal checks.
*/
private boolean isCompatibleWith(FormatGroup other) {
return minLength() == other.minLength() && maxLength() == other.maxLength();
}
private String toRegex() {
if (maxLength() > minLength()) {
return String.format("\\d{%d,%d}", minLength(), maxLength());
} else if (minLength() > 1) {
return String.format("\\d{%d}", minLength());
} else {
return "\\d";
}
}
@Override
public final String toString() {
String group =
Strings.repeat("X", minLength()) + Strings.repeat("*", maxLength() - minLength());
return replacement().map(r -> String.format("{%s>%s}", group, r)).orElse(group);
}
}
}

+ 96
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java View File

@ -0,0 +1,96 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import com.google.common.collect.ImmutableMap;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.Optional;
/**
* The schema of the "Formats" table with rows keyed by ID, and columns:
* <ol>
* <li>{@link #NATIONAL}: Required national format (may contain '#' for national prefix).
* <li>{@link #CARRIER}: Optional carrier format (may contain '#' and '@' for carrier
* specifier). Must be compatible with the national format (same suffix).
* <li>{@link #INTERNATIONAL}: International format (must not contain '#' or '@').
* <li>{@link #LOCAL}: Local format (must not contain '#' or '@', and must correspond to assigned
* area code lengths if present).
* <li>{@link #COMMENT}: Freeform comment text.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce the leading column:
* <ol>
* <li>{@code Id}: The format ID.
* </ol>
*/
public final class FormatsTableSchema {
public static final Column<String> NATIONAL = Column.ofString("National");
public static final Column<String> CARRIER = Column.ofString("Carrier");
public static final Column<String> INTERNATIONAL = Column.ofString("International");
public static final Column<String> LOCAL = Column.ofString("Local");
public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL =
Column.ofBoolean("National Prefix Optional");
/** An arbitrary optional text comment. */
public static final Column<String> COMMENT = Column.ofString("Comment");
private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id");
private static final Schema COLUMNS =
Schema.builder()
.add(NATIONAL)
.add(CARRIER)
.add(INTERNATIONAL)
.add(LOCAL)
.add(NATIONAL_PREFIX_OPTIONAL)
.add(COMMENT)
.build();
/** Schema instance defining the operators CSV table. */
public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS);
/** Converts a CSV table into a map of format specifiers. */
public static ImmutableMap<String, FormatSpec> toFormatSpecs(CsvTable<String> formats) {
ImmutableMap.Builder<String, FormatSpec> specs = ImmutableMap.builder();
for (String id : formats.getKeys()) {
specs.put(
id,
FormatSpec.of(
formats.getOrDefault(id, NATIONAL),
toOptional(formats.getOrDefault(id, CARRIER)),
toOptional(formats.getOrDefault(id, INTERNATIONAL)),
toOptional(formats.getOrDefault(id, LOCAL)),
formats.getOrDefault(id, NATIONAL_PREFIX_OPTIONAL),
toComment(formats.getOrDefault(id, COMMENT))));
}
return specs.build();
}
private static Optional<String> toOptional(String s) {
return s.isEmpty() ? Optional.empty() : Optional.of(s);
}
private static Optional<Comment> toComment(String s) {
return s.isEmpty() ? Optional.empty() : Optional.of(Comment.fromText(s));
}
private FormatsTableSchema() {}
}

+ 36
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/MetadataException.java View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import com.google.errorprone.annotations.FormatMethod;
/**
* Represents an error related to CSV metadata, either structural issues in the CSV or semantic
* errors in the XML representation. MetadataExceptions should only correspond to problems fixable
* by editing the CSV data.
*/
public final class MetadataException extends RuntimeException {
@FormatMethod
public static void checkMetadata(boolean cond, String msg, Object... args) {
if (!cond) {
throw new MetadataException(String.format(msg, args));
}
}
public MetadataException(String message) {
super(message);
}
}

+ 168
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/MetadataTableSchema.java View File

@ -0,0 +1,168 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static java.util.Comparator.naturalOrder;
import com.google.common.collect.ImmutableSet;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.Timezones;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.MultiValue;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Optional;
import java.util.stream.Stream;
/**
* The schema of the "Metadata" table with rows keyed by {@link DigitSequence} and columns:
*
* <ol>
* <li>{@link #MAIN_REGION}: The primary region associated with a calling code.
* <li>{@link #EXTRA_REGIONS}: A list of additional regions shared by the calling code.
* <li>{@link #NATIONAL_PREFIX}: The (optional) prefix used when dialling national numbers.
* <li>{@link #IDD_PREFIX}: The default international dialling (IDD) prefix.
* <li>{@link #TIMEZONE}: The default timezone name(s) for a calling code. Multiple timezones
* can be specific if separated by {@code '&'}.
* <li>{@link #MOBILE_PORTABLE_REGIONS}: A list of regions in which mobile numbers are portable
* between operators.
* <li>{@link #NATIONAL_PREFIX_OPTIONAL}: True if the national prefix is optional throughout the
* numbering plan (e.g. a prefix is defined, but does not have to be present when numbers are
* used).
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce the leading column:
* <ol>
* <li>{@code Calling Code}: The country calling code.
* </ol>
*/
public final class MetadataTableSchema {
/** Values in the "REGIONS" column are a sorted list of region codes. */
public static final class Regions extends MultiValue<PhoneRegion, Regions> {
private static final Regions EMPTY = new Regions(ImmutableSet.of());
public static Column<Regions> column(String name) {
return Column.create(Regions.class, name, EMPTY, Regions::new);
}
public static Regions of(PhoneRegion... regions) {
return new Regions(Arrays.asList(regions));
}
public static Regions of(Iterable<PhoneRegion> regions) {
return new Regions(regions);
}
private Regions(Iterable<PhoneRegion> regions) {
super(regions, ',', naturalOrder(), true);
}
private Regions(String s) {
super(s, PhoneRegion::of, ',', naturalOrder(), true);
}
}
/**
* Values in the "NATIONAL_PREFIX" column are an (unsorted) list of prefixes, with the preferred
* prefix first.
*/
public static final class DigitSequences extends MultiValue<DigitSequence, DigitSequences> {
private static final DigitSequences EMPTY = new DigitSequences(ImmutableSet.of());
public static Column<DigitSequences> column(String name) {
return Column.create(DigitSequences.class, name, EMPTY, DigitSequences::new);
}
public static DigitSequences of(DigitSequence... numbers) {
return new DigitSequences(Arrays.asList(numbers));
}
private DigitSequences(Iterable<DigitSequence> numbers) {
super(numbers, ',', naturalOrder(), false);
}
private DigitSequences(String s) {
super(s, DigitSequence::of, ',', naturalOrder(), false);
}
}
/** The primary region associated with a calling code (e.g. "US" for NANPA). */
public static final Column<PhoneRegion> MAIN_REGION =
Column.create(PhoneRegion.class, "Main Region", PhoneRegion.getUnknown(), PhoneRegion::of);
/** A comma separated list of expected regions for the calling code. */
public static final Column<Regions> EXTRA_REGIONS = Regions.column("Extra Regions");
/**
* A list of prefixes used when dialling national numbers (e.g. "0" for "US"). If more than one
* prefix is given, the first prefix is assumed to be "preferred" and the others are considered
* alternatives. Having multiple prefixes is useful if a country switches between prefixes and
* a period of "parallel running" is needed.
*/
public static final Column<DigitSequences> NATIONAL_PREFIX =
DigitSequences.column("National Prefix");
/**
* The default international dialling (IDD) prefix. This is a string, rather than a digit
* sequence, because it can optionally contain a single '~' character to indicate a pause while
* dialling (e.g. "8~10" in Russia). This is stripped everywhere except when used to populate
* the "preferredInternationalPrefix" attribute in the libphonenumber XML file.
*/
public static final Column<String> IDD_PREFIX = Column.ofString("IDD Prefix");
/**
* The default value for the "Timezone" column in the ranges table (in many regions, this is a
* single constant value).
*/
public static final Column<Timezones> TIMEZONE = RangesTableSchema.TIMEZONE;
/** A comma separated list of regions in which mobile numbers are portable between carriers. */
public static final Column<Regions> MOBILE_PORTABLE_REGIONS =
Regions.column("Mobile Portable Regions");
/** Describes whether the "national prefix" is optional when parsing a national number. */
public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL =
Column.ofBoolean("National Prefix Optional");
/** The preferred prefix for specifying extensions to numbers (e.g. "ext" for "1234 ext 56"). */
public static final Column<String> EXTENSION_PREFIX = Column.ofString("Extension Prefix");
private static final CsvKeyMarshaller<DigitSequence> MARSHALLER = new CsvKeyMarshaller<>(
k -> Stream.of(k.toString()),
p -> DigitSequence.of(p.get(0)),
Optional.of(Comparator.comparing(Object::toString)),
"Calling Code");
private static final Schema COLUMNS = Schema.builder()
.add(MAIN_REGION)
.add(EXTRA_REGIONS)
.add(NATIONAL_PREFIX)
.add(IDD_PREFIX)
.add(TIMEZONE)
.add(MOBILE_PORTABLE_REGIONS)
.add(NATIONAL_PREFIX_OPTIONAL)
.add(EXTENSION_PREFIX)
.build();
/** Schema instance defining the metadata CSV table. */
public static final CsvSchema<DigitSequence> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS);
private MetadataTableSchema() {}
}

+ 750
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/NumberingScheme.java View File

@ -0,0 +1,750 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata;
import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.AREA_CODE_LENGTH;
import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.FORMAT;
import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.NATIONAL_ONLY;
import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.PER_REGION_COLUMNS;
import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.REGIONS;
import static java.lang.Boolean.TRUE;
import static java.util.Comparator.comparing;
import com.google.auto.value.AutoValue;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.PrefixTree;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
/**
* An abstraction of all the phone number metadata known about for a single calling code.
* <p>
* Note that there is no builder for NumberingScheme. The expectation is that CSV tables and other
* primary sources will be used to build numbering schemes at a single point in the business logic.
* Handling incremental modification of a builder, or partially built schemes just isn't something
* that's expected to be needed (though there is {@code TestNumberingScheme} for use in unit tests.
*/
@AutoValue
public abstract class NumberingScheme {
// Bitmask for [1-9] (bits 1..9 set, bit 0 clear).
private static final int NOT_ZERO_MASK = 0x3FE;
/** Top level information about a numbering scheme. */
@AutoValue
public abstract static class Attributes {
/** Returns a new attributes instance for the given data. */
public static Attributes create(
DigitSequence cc,
PhoneRegion mainRegion,
Set<PhoneRegion> extraRegions,
ImmutableSet<DigitSequence> nationalPrefix,
RangeTree carrierPrefixes,
String defaultIddPrefix,
RangeTree allIddRanges,
String extensionPrefix,
Set<PhoneRegion> mobilePortableRegions) {
// In theory there could be IDD prefix for a non-geographic region (and this check could be
// removed) but it's not something we've ever seen and don't have any expectation of.
checkMetadata(!mainRegion.equals(PhoneRegion.getWorld()) || allIddRanges.isEmpty(),
"[%s] IDD prefixes must not be present for non-geographic regions", cc);
checkMetadata(mainRegion.equals(PhoneRegion.getWorld()) || !allIddRanges.isEmpty(),
"[%s] IDD prefixes must be present for all geographic regions", cc);
checkMetadata(nationalPrefix.stream().noneMatch(allIddRanges::contains),
"[%s] National prefix %s and IDD prefixes (%s) must be disjoint",
cc, nationalPrefix, allIddRanges);
checkMetadata(nationalPrefix.stream().noneMatch(carrierPrefixes::contains),
"[%s] National prefix %s and carrier prefixes (%s) must be disjoint",
cc, nationalPrefix, carrierPrefixes);
// Allow exactly one '~' to separate the prefix digits to indicate a pause during dialling
// (this check could be relaxed in future, but it's currently true for all data).
checkMetadata(defaultIddPrefix.isEmpty() || defaultIddPrefix.matches("[0-9]+(?:~[0-9]+)?"),
"[%s] Invalid IDD prefix: %s", cc, defaultIddPrefix);
DigitSequence iddPrefix = DigitSequence.of(defaultIddPrefix.replace("~", ""));
checkMetadata(iddPrefix.isEmpty() || allIddRanges.contains(iddPrefix),
"[%s] IDD ranges must contain the default prefix: %s", cc, iddPrefix);
checkMetadata(!extraRegions.contains(mainRegion),
"[%s] duplicated main region '%s' in extra regions: %s",
cc, mainRegion, extraRegions);
// Main region comes first in iteration order, remaining regions are ordered naturally.
ImmutableSet.Builder<PhoneRegion> set = ImmutableSet.builder();
set.add(mainRegion);
extraRegions.stream().sorted().forEach(set::add);
ImmutableSet<PhoneRegion> allRegions = set.build();
checkMetadata(allRegions.containsAll(mobilePortableRegions),
"invalid mobile portable regions: %s", mobilePortableRegions);
return new AutoValue_NumberingScheme_Attributes(
cc,
allRegions,
nationalPrefix,
carrierPrefixes,
defaultIddPrefix,
allIddRanges,
!extensionPrefix.isEmpty() ? Optional.of(extensionPrefix) : Optional.empty(),
ImmutableSortedSet.copyOf(Ordering.natural(), mobilePortableRegions));
}
/** Returns the unique calling code of this numbering scheme. */
public abstract DigitSequence getCallingCode();
/**
* Returns the regions represented by this numbering scheme. The main region is always present
* and listed first, and remaining regions are listed in "natural" order.
*/
public abstract ImmutableSet<PhoneRegion> getRegions();
/**
* Returns the "main" region for this numbering scheme. The notion of a main region for a
* country calling code is slightly archaic and mostly comes from the way in which the legacy
* XML data is structured. However there are a few places in the public API where the "main"
* region is returned in situations of ambiguity, so it can be useful to know it.
*/
public final PhoneRegion getMainRegion() {
return getRegions().asList().get(0);
}
/**
* Returns all possible national prefixes which can be used when dialling national numbers. In
* most cases this set just contains the preferred prefix, but alternate values may be present
* when a region switches between prefixes or for other reasons. Any "non preferred" prefixes
* are recognized only during parsing, and otherwise ignored.
*
* <p>If there is a preferred prefix, it is listed first, otherwise the set is empty.
*/
public abstract ImmutableSet<DigitSequence> getNationalPrefixes();
/**
* Returns the (possibly empty) prefix used when dialling national numbers (e.g. "0" for "US").
* Not all regions require a prefix for national dialling.
*/
public DigitSequence getPreferredNationalPrefix() {
ImmutableSet<DigitSequence> prefixes = getNationalPrefixes();
return prefixes.isEmpty() ? DigitSequence.empty() : prefixes.iterator().next();
}
/**
* Returns all carrier prefixes for national dialling. This range must not contain the national
* prefix.
*/
public abstract RangeTree getCarrierPrefixes();
/**
* Returns the (possible empty) default international dialling (IDD) prefix, possibly
* containing a '~' to indicate a pause during dialling (e.g. "8~10" for Russia).
*/
public abstract String getDefaultIddPrefix();
/**
* Returns all IDD prefixes which may be used for international dialling. If the default prefix
* is not empty it must be contained in this range.
*/
public abstract RangeTree getIddPrefixes();
/** Returns the preferred label to use for indicating extensions for numbers. */
public abstract Optional<String> getExtensionPrefix();
/** Returns the regions in which mobile numbers are portable between carriers. */
public abstract ImmutableSet<PhoneRegion> getMobilePortableRegions();
}
/**
* Creates a numbering scheme from a range table and example numbers. No rules are applied to the
* data in the tables, and they are assumed to be complete.
*/
public static NumberingScheme from(
Attributes attributes,
RangeTable xmlTable,
Map<PhoneRegion, RangeTable> shortcodeMap,
Map<String, FormatSpec> formats,
ImmutableList<AltFormatSpec> altFormats,
Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers,
List<Comment> comments) {
checkPossibleRegions(attributes.getRegions(), xmlTable);
checkNationalOnly(attributes, xmlTable);
checkUnambiguousIdd(attributes, xmlTable, formats);
ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes =
checkShortCodeConsistency(shortcodeMap, xmlTable);
return new AutoValue_NumberingScheme(
attributes,
xmlTable,
shortcodes,
checkFormatConsistency(attributes, formats, xmlTable, shortcodes),
checkAltFormatConsistency(altFormats, formats, xmlTable),
checkExampleNumbers(attributes.getRegions(), xmlTable, exampleNumbers),
addSyntheticComments(comments, attributes));
}
// Adds the first comments for main and auxiliary regions, giving the English name and detailing
// auxiliary region information if necessary.
private static ImmutableList<Comment> addSyntheticComments(
List<Comment> comments, Attributes attributes) {
PhoneRegion mainRegion = attributes.getMainRegion();
if (!mainRegion.equals(PhoneRegion.getWorld())) {
List<Comment> modified = new ArrayList<>(getRegionNameComments(mainRegion));
List<PhoneRegion> auxRegions =
attributes.getRegions().asList().subList(1, attributes.getRegions().size());
if (!auxRegions.isEmpty()) {
String comment = String.format("Main region for '%s'", Joiner.on(',').join(auxRegions));
modified.add(Comment.create(Comment.anchor(mainRegion), ImmutableList.of(comment)));
for (PhoneRegion r : auxRegions) {
modified.addAll(getRegionNameComments(r));
String auxComment =
String.format("Calling code and formatting shared with '%s'", mainRegion);
modified.add(Comment.create(Comment.anchor(r), ImmutableList.of(auxComment)));
}
}
// Do this last, since order matters (because anchors are not unique) and we want the
// synthetic comments to come first.
modified.addAll(comments);
comments = modified;
}
return ImmutableList.copyOf(comments);
}
private static List<Comment> getRegionNameComments(PhoneRegion region) {
ImmutableList<String> enName = ImmutableList.of(region.getEnglishNameForXmlComments());
return ImmutableList.of(
Comment.create(Comment.anchor(region), enName),
Comment.create(Comment.shortcodeAnchor(region), enName));
}
private static void checkPossibleRegions(Set<PhoneRegion> regions, RangeTable xmlTable) {
ImmutableSet<PhoneRegion> actual = REGIONS.extractGroupColumns(xmlTable.getColumns()).keySet();
// Allow no region column in the table if there's only one region (since it's implicit).
checkState((actual.isEmpty() && regions.size() == 1) || actual.equals(regions),
"regions added to range table do not match the expected numbering scheme regions\n"
+ "expected: %s\n"
+ "actual: %s\n",
regions, actual);
}
// An assumption has generally been that if a range is "national only" then it either:
// a) belongs to only one region (the one it's national only for)
// b) belongs to at least the main region (since in some schemes ranges mostly just overlap all
// possible regions).
// Thus we preclude the possibility of having a "national only" number that appears in multiple
// regions, but not the main region.
//
// If this check is ever removed (because there is real data where this is not the case), then
// the code which generates the "<noInternationalDialling>" patterns will have to be revisited.
private static void checkNationalOnly(Attributes attributes, RangeTable xmlTable) {
RangeTree allNationalOnly = xmlTable.getRanges(NATIONAL_ONLY, true);
if (allNationalOnly.isEmpty()) {
return;
}
ImmutableList<PhoneRegion> regions = attributes.getRegions().asList();
PhoneRegion main = regions.get(0);
// Anything assigned to the main region can be ignored as we allow it to have multiple regions.
// Now we have to ensure that these ranges are assigned to exactly one auxiliary region.
RangeTree remaining =
allNationalOnly.subtract(xmlTable.getRanges(REGIONS.getColumn(main), true));
if (remaining.isEmpty()) {
return;
}
DigitSequence cc = attributes.getCallingCode();
for (PhoneRegion r : regions.subList(1, regions.size())) {
RangeTree auxNationalOnly =
xmlTable.getRanges(REGIONS.getColumn(r), true).intersect(allNationalOnly);
// Anything already removed from "remaining" was already accounted for by another region.
checkMetadata(remaining.containsAll(auxNationalOnly),
"[%s] %s has national-only ranges which overlap other regions: %s",
cc, r, auxNationalOnly.subtract(remaining));
remaining = remaining.subtract(auxNationalOnly);
}
// This is not data issue since it should have been checked already, this is bug.
checkState(remaining.isEmpty(), "[%s] ranges not assigned to any region: %s", cc, remaining);
}
/**
* Ensures no national range can start with an IDD (international dialling code of any kind).
* This is slightly more complex than just looking for any IDD prefix at the start of a range
* because of cases like India, where "00800..." is a valid range and does start with IDD.
*
* <p>We allow this because:
* <ol>
* <li>The number is required to have the national prefix in front, so must be dialled as
* {@code 000800...} (according to the Indian numbering plan)
* <li>and {@code 000...} is not a valid sequence that would lead to dialing into another region,
* because all calling codes start with {@code [1-9]}.
* </ol>
*/
private static void checkUnambiguousIdd(
Attributes attributes, RangeTable xmlTable, Map<String, FormatSpec> formats) {
// It can be empty for non-geographic (world) numbering schemes.
if (attributes.getIddPrefixes().isEmpty()) {
return;
}
// All IDDs extended by one non-zero digit. These are the prefixes which if dialled may end
// up in another region, so they cannot be allowed at the start of any national number.
RangeTree iddPlusOneDigit = attributes.getIddPrefixes().map(r -> r.extendByMask(NOT_ZERO_MASK));
// We only care about ranges up to this length, which can speed things up.
int maxPrefixLength = iddPlusOneDigit.getLengths().last();
// Now prefix any ranges which could be dialled with a national prefix with all possible
// national prefixes, based on how they are formatted (and assume that no format means no
// national prefix).
RangeTree withNationalPrefix = RangeTree.empty();
RangeTree withoutNationalPrefix = xmlTable.getRanges(FORMAT, FORMAT.defaultValue());
for (String fid : formats.keySet()) {
FormatSpec spec = formats.get(fid);
// Only bother with ranges up to the maximum prefix length we care about.
RangeTree r = xmlTable.getRanges(FORMAT, fid).slice(0, maxPrefixLength);
if (spec.nationalPrefixOptional()) {
withNationalPrefix = withNationalPrefix.union(r);
withoutNationalPrefix = withoutNationalPrefix.union(r);
} else if (spec.national().hasNationalPrefix()) {
withNationalPrefix = withNationalPrefix.union(r);
} else {
withoutNationalPrefix = withoutNationalPrefix.union(r);
}
}
// Only here due to lambdas requiring an effectively final field (this makes me sad).
RangeTree withNationalPrefixCopy = withNationalPrefix;
RangeTree allDiallablePrefixes =
withoutNationalPrefix
.union(attributes.getNationalPrefixes().stream()
.map(RangeSpecification::from)
.map(p -> withNationalPrefixCopy.prefixWith(p))
.reduce(RangeTree.empty(), RangeTree::union));
// These are prefixes which are claimed to be nationally diallable but overlap with the IDD.
RangeTree iddOverlap = PrefixTree.from(iddPlusOneDigit).retainFrom(allDiallablePrefixes);
checkMetadata(iddOverlap.isEmpty(),
"[%s] ranges cannot start with IDD: %s", attributes.getCallingCode(), iddOverlap);
}
/**
* Ensures the shortcodes are disjoint from main ranges and consistent with each other by format
* (since format information isn't held separately for each shortcode table).
*/
private static ImmutableSortedMap<PhoneRegion, RangeTable> checkShortCodeConsistency(
Map<PhoneRegion, RangeTable> shortcodeMap, RangeTable table) {
ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes =
ImmutableSortedMap.copyOf(shortcodeMap);
shortcodes.forEach((region, shortcodeTable) -> {
RangeTree overlap = table.getAllRanges().intersect(shortcodeTable.getAllRanges());
checkMetadata(overlap.isEmpty(),
"Shortcode and national numbers overlap for %s: %s", region, overlap);
});
return shortcodes;
}
private static final Schema FORMAT_SCHEMA =
Schema.builder().add(AREA_CODE_LENGTH).add(FORMAT).build();
// We actually explicitly permit duplicate formats (for now) since the XML has them. Later, once
// everything is settled, it might be possible to add a check here.
private static ImmutableMap<String, FormatSpec> checkFormatConsistency(
Attributes attributes,
Map<String, FormatSpec> formatMap,
RangeTable table,
Map<PhoneRegion, RangeTable> shortcodes) {
DigitSequence cc = attributes.getCallingCode();
RangeTable.Builder allFormats = RangeTable.builder(FORMAT_SCHEMA);
allFormats.copyNonDefaultValues(AREA_CODE_LENGTH, table, OverwriteMode.ALWAYS);
allFormats.copyNonDefaultValues(FORMAT, table, OverwriteMode.ALWAYS);
// Throws a RangeException (IllegalArgumentException) if inconsistent write occurs.
shortcodes.values()
.forEach(t -> allFormats.copyNonDefaultValues(FORMAT, t, OverwriteMode.SAME));
RangeTable formatTable = allFormats.build();
ImmutableMap<String, FormatSpec> formats = ImmutableMap.copyOf(formatMap);
// TODO: Make this "equals" eventually (since it currently sees "synthetic" IDs).
checkMetadata(
formats.keySet().containsAll(formatTable.getAssignedValues(FORMAT)),
"[%s] mismatched format IDs: %s",
cc, Sets.symmetricDifference(formatTable.getAssignedValues(FORMAT), formats.keySet()));
// If any of the checks relating to carrier formats are relaxed here, it might be necessary to
// re-evaluate the logic around regeneration of nationalPrefixForParsing (so be careful!).
boolean carrierTemplatesExist = false;
boolean nationalPrefixExistsForFormatting = false;
boolean nationalPrefixSometimesOptional = false;
for (String id : formats.keySet()) {
FormatSpec spec = formats.get(id);
RangeTree assigned = allFormats.getRanges(FORMAT, id);
checkMetadata(!assigned.isEmpty(),
"[%s] format specifier '%s' not assigned to any range: %s", cc, id, spec);
checkFormatLengths(cc, spec, assigned);
checkLocalFormatLengths(cc, formatTable, spec, assigned);
carrierTemplatesExist |= spec.carrier().isPresent();
nationalPrefixExistsForFormatting |=
spec.national().hasNationalPrefix()
|| spec.carrier().map(FormatTemplate::hasNationalPrefix).orElse(false);
nationalPrefixSometimesOptional |= spec.nationalPrefixOptional();
}
checkMetadata(attributes.getCarrierPrefixes().isEmpty() || carrierTemplatesExist,
"[%s] carrier prefixes exist but no formats have carrier templates: %s",
cc, formats.values());
checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixExistsForFormatting,
"[%s] if no national prefix exists, it cannot be specified in any format template: %s",
cc, formats.values());
checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixSometimesOptional,
"[%s] if no national prefix exists, it cannot be optional for formatting: %s",
cc, formats.values());
return formats;
}
// Checks that the ranges to which formats are assigned don't have lengths outside the possible
// lengths of that format (e.g. we don't have "12xx" assigned to the format "XXX-XXX").
private static void checkFormatLengths(DigitSequence cc, FormatSpec spec, RangeTree assigned) {
TreeSet<Integer> unexpected = new TreeSet<>(assigned.getLengths());
unexpected.removeAll(ContiguousSet.closed(spec.minLength(), spec.maxLength()));
if (!unexpected.isEmpty()) {
RangeTree bad = RangeTree.empty();
for (int n : unexpected) {
bad = bad.union(assigned.intersect(RangeTree.from(RangeSpecification.any(n))));
}
throw new IllegalArgumentException(String.format(
"[%s] format %s assigned to ranges of invalid length: %s", cc, spec, bad));
}
}
// Checks that the local lengths for ranges (as determined by area code length) is compatible
// with the assigned local format specifier. Note that it is allowed to have an area code length
// of zero and still be assigned a format with a local specifier (the specifier may be shared
// with other ranges which do have an area code length).
private static void checkLocalFormatLengths(
DigitSequence cc, RangeTable formatTable, FormatSpec spec, RangeTree assigned) {
if (!spec.local().isPresent()) {
return;
}
ImmutableSet<Integer> lengths =
formatTable.subTable(assigned, AREA_CODE_LENGTH).getAssignedValues(AREA_CODE_LENGTH);
FormatTemplate local = spec.local().get();
// Format specifiers either vary length in the area code or the local number, but not both.
int localLength = local.minLength();
int localVariance = local.maxLength() - local.minLength();
if (localVariance == 0) {
// If there's no length variation in the "local" part, it means the area code length can
// be variable.
ContiguousSet<Integer> acls =
ContiguousSet.closed(spec.minLength() - localLength, spec.maxLength() - localLength);
checkMetadata(acls.containsAll(lengths),
"[%s] area code lengths '%s' not supported by format: %s", cc, acls, spec);
} else {
// If the length variation of the format is in the trailing "local" part, we expect the a
// unique area code length (only one "group" in the format can be variable).
checkMetadata((spec.maxLength() - spec.minLength()) == localVariance,
"[%s] invalid local format (bad length) in format specifier %s", cc, spec);
int acl = spec.minLength() - localLength;
checkMetadata(lengths.size() == 1 && lengths.contains(acl),
"[%s] implied area code length(s) %s does not match expected length (%s) of format: %s",
cc, lengths, acl, spec);
}
}
private static ImmutableList<AltFormatSpec> checkAltFormatConsistency(
ImmutableList<AltFormatSpec> altFormats,
Map<String, FormatSpec> formats,
RangeTable xmlTable) {
for (AltFormatSpec altFormat : altFormats) {
String parentId = altFormat.parentFormatId();
FormatSpec parent = formats.get(parentId);
checkMetadata(parent != null, "unknown parent format ID in alternate format: %s", altFormat);
Set<Integer> altLengths = getLengths(altFormat.template());
checkMetadata(getLengths(parent.national()).containsAll(altLengths),
"alternate format lengths must be bounded by parent format lengths: %s", altFormat);
// Only care about the parent ranges which have the same length(s) as the alt format.
RangeTree lengthMask = RangeTree.from(altLengths.stream().map(RangeSpecification::any));
RangeTree ranges = xmlTable.getRanges(FORMAT, parentId).intersect(lengthMask);
RangeTree captured = PrefixTree.from(altFormat.prefix()).retainFrom(ranges);
checkMetadata(!captured.isEmpty(),
"alternate format must capture some of the parent format ranges: %s", altFormat);
int prefixLength = altFormat.prefix().length();
if (prefixLength > 0) {
// A really ugly, but useful check to find if there's a better prefix. Specifically, it
// determines if the given prefix is "over-capturing" ranges (e.g. prefix is "1[2-8]" but
// only "1[3-6]" exists in the parent format's assigned ranges). Since this is an odd, non
// set-like operation, it's just done "manually" using bit masks. It's not a union of the
// paths, it's a "squashing" (since it results in the smallest single range specification).
//
// Start with all the paths trimmed to the prefix length (e.g. "123", "145", "247"). All
// range specifications in the slice are the same length as the prefix we started with.
RangeTree slice = captured.slice(prefixLength);
// Now union the digit masks at each depth for all paths in the slice (in theory there
// could be a "squash" operation on RangeSpecification to do all this).
int[] masks = new int[prefixLength];
slice.asRangeSpecifications().forEach(s -> {
for (int n = 0; n < prefixLength; n++) {
masks[n] |= s.getBitmask(n);
}
});
// Now reconstruct the single "squashed" range specification (e.g. "[12][24][357]").
RangeSpecification minSpec = RangeSpecification.empty();
for (int n = 0; n < prefixLength; n++) {
minSpec = minSpec.extendByMask(masks[n]);
}
checkMetadata(minSpec.equals(altFormat.prefix()),
"alternate format prefix '%s' is too broad, it should be '%s' for: %s",
altFormat.prefix(), minSpec, altFormat);
}
}
return altFormats;
}
private static Set<Integer> getLengths(FormatTemplate t) {
return ContiguousSet.closed(t.minLength(), t.maxLength());
}
// Checks that example numbers are valid numbers in the ranges for their type.
private static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> checkExampleNumbers(
Set<PhoneRegion> regions,
RangeTable table,
Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers) {
for (PhoneRegion r : regions) {
RangeTable regionTable =
table.subTable(table.getRanges(REGIONS.getColumn(r), TRUE), XmlRangesSchema.TYPE);
Map<ValidNumberType, DigitSequence> regionExamples = exampleNumbers.row(r);
ImmutableSet<ValidNumberType> types = regionTable.getAssignedValues(XmlRangesSchema.TYPE);
checkMetadata(types.equals(regionExamples.keySet()),
"mismatched types for example numbers in region %s\nExpected: %s\nActual: %s",
r, types, regionExamples);
for (ValidNumberType t : types) {
DigitSequence exampleNumber = regionExamples.get(t);
RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, t);
// Special case, since we permit example numbers for fixed line/mobile to be valid for the
// combined range as well.
//
// This logic smells, since it reveals information about the XML structure (in which fixed
// line and mobile ranges can overlap). However if we insist that a fixed line examples are
// in the "fixed line only" range, we end up with problems if (mobile == fixed line), since
// there is no "fixed line only" range (but there is an example number in the XML).
if (t == ValidNumberType.MOBILE || t == ValidNumberType.FIXED_LINE) {
ranges = ranges.union(
regionTable.getRanges(XmlRangesSchema.TYPE, ValidNumberType.FIXED_LINE_OR_MOBILE));
}
checkMetadata(ranges.contains(exampleNumber),
"invalid example number '%s' of type %s in region %s", exampleNumber, t, r);
}
}
return ImmutableTable.copyOf(exampleNumbers);
}
public abstract Attributes getAttributes();
// TODO: Inline the wrapper methods below.
/** Returns the unique calling code of this numbering scheme. */
public DigitSequence getCallingCode() {
return getAttributes().getCallingCode();
}
/**
* Returns the regions represented by this numbering scheme. The main region is always present
* and listed first, and remaining regions are listed in "natural" order.
*/
public ImmutableSet<PhoneRegion> getRegions() {
return getAttributes().getRegions();
}
/**
* Returns a range table containing per-range attributes according to
* {@link XmlRangesSchema#COLUMNS}.
*/
public abstract RangeTable getTable();
/**
* Returns a RangeTable restricted to the given region, which conforms to the
* {@link XmlRangesSchema} schema, with the exception that no region columns exist.
*/
public final RangeTable getTableFor(PhoneRegion region) {
checkArgument(getRegions().contains(region),
"invalid region '%s' for calling code '%s'", region, getCallingCode());
return getTable()
.subTable(getTable().getRanges(REGIONS.getColumn(region), TRUE), PER_REGION_COLUMNS);
}
public abstract ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodes();
/** Returns the RangeTable for the shortcodes of the given region. */
public final Optional<RangeTable> getShortcodesFor(PhoneRegion region) {
checkArgument(getRegions().contains(region),
"invalid region '%s' for calling code '%s'", region, getCallingCode());
return Optional.ofNullable(getShortcodes().get(region));
}
/** Returns the map of format ID to format specifier. */
public abstract ImmutableMap<String, FormatSpec> getFormats();
/** Returns a list of alternate formats which are also expected for this numbering scheme. */
public abstract ImmutableList<AltFormatSpec> getAlternateFormats();
/** Returns a table of example numbers for each region code and number type. */
public abstract ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExampleNumbers();
/**
* Returns all comments known about by this numbering scheme. Internal method, callers should
* always use {@link #getComments(Anchor)} instead.
*/
abstract ImmutableList<Comment> getAllComments();
/** Returns comments with a specified anchor for this numbering scheme. */
public ImmutableList<Comment> getComments(Anchor anchor) {
checkArgument(getAttributes().getRegions().contains(anchor.region()),
"invalid region: %s", anchor.region());
return getAllComments().stream()
.filter(c -> c.getAnchor().equals(anchor))
.collect(toImmutableList());
}
/**
* An encapsulation of a comment to be associated with an element in the XML. Rather than have
* many APIs for setting/getting comments on a {@link NumberingScheme}, the approach taken here
* is to let comments describe for themselves where they go but keep them in one big bucket.
* <p>
* This simplifies a lot of the intermediate APIs in the builders, but is less efficient (since
* finding comments is now a linear search). If this is ever an issue, they should be mapped by
* key, using a {@code ListMultimap<String, Comment>} (since comments are also ordered by their
* number).
*/
@AutoValue
public abstract static class Comment {
private static final Joiner JOIN_LINES = Joiner.on('\n');
private static final Splitter SPLIT_LINES = Splitter.on('\n');
/** An anchor defining which element, in which territory, a comment should be attached to. */
@AutoValue
public abstract static class Anchor implements Comparable<Anchor> {
// Special anchor for comments that are not stored in the comment table, but are attached to
// data directly (e.g. formats).
private static final Anchor ANONYMOUS = of(PhoneRegion.getUnknown(), "");
private static final Comparator<Anchor> ORDERING =
comparing(Anchor::region).thenComparing(Anchor::label);
/** Creates a comment anchor from a region and xml type. */
static Anchor of(PhoneRegion region, String label) {
// TODO: Add check for valid label.
return anchor(region, label);
}
/** The region of the territory this comment should be attached to. */
public abstract PhoneRegion region();
/**
* The type in the territory this comment should be attached to. If missing, attach this
* comment to the main comment block for the territory.
*/
public abstract String label();
@Override
public int compareTo(Anchor that) {
return ORDERING.compare(this, that);
}
}
// Private since we want to funnel people through type safe factory methods.
private static Anchor anchor(PhoneRegion region, String label) {
return new AutoValue_NumberingScheme_Comment_Anchor(region, label);
}
/** Returns a key identifying a comment for a region. */
public static Anchor anchor(PhoneRegion region) {
return anchor(region, "XML");
}
/** Returns a key identifying a comment for the validation range of a given type in a region. */
public static Anchor anchor(PhoneRegion region, XmlNumberType xmlType) {
return anchor(region, xmlType.toString());
}
/**
* Returns a key identifying a comment for the validation range of a given shortcode type in
* a region.
*/
public static Anchor shortcodeAnchor(PhoneRegion region) {
return anchor(region, "SC");
}
/**
* Returns a key identifying a comment for the validation range of a given shortcode type in
* a region.
*/
public static Anchor shortcodeAnchor(PhoneRegion region, XmlShortcodeType xmlType) {
return anchor(region, xmlType.toString());
}
/** Creates a comment the applies to data identified by the specified key. */
public static Comment create(Anchor anchor, List<String> lines) {
return new AutoValue_NumberingScheme_Comment(anchor, ImmutableList.copyOf(lines));
}
/** Creates a comment the applies to data identified by the specified key. */
public static Comment createAnonymous(List<String> lines) {
return new AutoValue_NumberingScheme_Comment(Anchor.ANONYMOUS, ImmutableList.copyOf(lines));
}
public static Comment fromText(Anchor anchor, String text) {
return create(anchor, SPLIT_LINES.splitToList(text));
}
public static Comment fromText(String text) {
return createAnonymous(SPLIT_LINES.splitToList(text));
}
/**
* Returns the key which defines what this comment relates to (and thus where it should appear
* in the XML file).
*/
public abstract Anchor getAnchor();
/** The lines of a single mulit-line comment. */
// TODO: Switch to a single string (with newlines) which is what's done elsewhere.
public abstract ImmutableList<String> getLines();
public String toText() {
return JOIN_LINES.join(getLines());
}
// Visible for AutoValue.
Comment() {}
}
// Visible for AutoValue.
NumberingScheme() {}
}

+ 63
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/NumberingSchemes.java View File

@ -0,0 +1,63 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.util.function.Function.identity;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.MetadataKey;
import java.util.List;
/**
* Collection of numbering schemes, mapped primarily by calling code, but available via other
* mappings (e.g. metadata key) for convenience.
*/
// TODO: Delete this (it's hardly used and very little more than a simple collection).
@AutoValue
public abstract class NumberingSchemes {
/**
* Aggregates a list of numbering schemes into a single collection which mirrors the structure and
* mapping of the libphonenumber XML metadata file.
*/
public static NumberingSchemes from(List<NumberingScheme> schemes) {
ImmutableMap<DigitSequence, NumberingScheme> map =
schemes.stream().collect(toImmutableMap(NumberingScheme::getCallingCode, identity()));
ImmutableSet<MetadataKey> allKeys = map.values().stream()
.flatMap(s -> s.getRegions().stream().map(r -> MetadataKey.create(r, s.getCallingCode())))
.collect(toImmutableSet());
return new AutoValue_NumberingSchemes(map, allKeys);
}
/** Returns a mapping of top-level numbering schemes by calling code. */
// TODO: Rename to getSchemeMap() since it's confusing, or add a direct getter.
public abstract ImmutableMap<DigitSequence, NumberingScheme> getSchemes();
/** Returns the set of all calling codes for top-level schemes in this collection. */
public ImmutableSet<DigitSequence> getCallingCodes() {
return getSchemes().keySet();
}
/** Returns the set of all metadata keys for regional schemes in this collection. */
public abstract ImmutableSet<MetadataKey> getKeys();
// Visible for AutoValue.
NumberingSchemes() {}
}

+ 88
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/OperatorsTableSchema.java View File

@ -0,0 +1,88 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.ColumnGroup;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.Schema;
/**
* The schema of the "Operators" table with rows keyed by operator ID and columns:
* <ol>
* <li>{@link #SELECTION_CODES}: Operator selection codes for national dialling.
* <li>{@link #IDD_PREFIXES}: International direct dialling codes.
* <li>{@link #NAMES}: A group of columns containing the name of the operator, potential in
* multiple languages. Note that English translations for all operators need not be present.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce the leading column:
* <ol>
* <li>{@code Id}: The operator ID.
* </ol>
*
* <p>The default IDD prefix should not be in this table, but is instead stored in the top-level
* {@link MetadataTableSchema#IDD_PREFIX} column.
*
* <p>Note that there is a special case in which we need to store a selection code or IDD code, but
* it does not below to a operator with an assigned range (e.g. it's a universally available code).
* In these situations, you should ensure that the operator ID starts with "__" (double underscore)
* to prevent consistency checks from complaining about unassigned operators. You can also omit a
* name for the row, but should probably add a comment.
*/
public final class OperatorsTableSchema {
/**
* A comma separated list of "selection codes" (as range specifications) which are added to
* national numbers (not always as a prefix) to select an operator for national dialling.
* This will often contain many of the same values as IDD_CODES but need not be identical.
*
* <p>Note that while a single operator may have more than one code associated with it, the same
* code cannot appear in more than one row in this table.
*/
public static final Column<String> SELECTION_CODES = Column.ofString("Domestic Selection Codes");
/**
* A comma separated list of "International Direct Dialing" codes (as range specifications) which
* are prefixes for international dialling. This will often contain many of the same prefixes as
* SELECTION_CODES but need not be identical.
*
* <p>Note that while a single operator may have more than one code associated with it, the same
* code cannot appear in more than one row in this table.
*/
public static final Column<String> IDD_PREFIXES = Column.ofString("International Dialling Codes");
/** The "Name:XXX" column group in the operator table. */
public static final ColumnGroup<SimpleLanguageTag, String> NAMES =
ColumnGroup.byLanguage(Column.ofString("Name"));
public static final Column<String> COMMENT = RangesTableSchema.COMMENT;
private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id");
private static final Schema COLUMNS = Schema.builder()
.add(SELECTION_CODES)
.add(IDD_PREFIXES)
.add(NAMES)
.add(COMMENT)
.build();
/** Schema instance defining the operators CSV table. */
public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS);
private OperatorsTableSchema() {}
}

+ 396
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java View File

@ -0,0 +1,396 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.DiscreteDomain.integers;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static java.util.Comparator.comparing;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.joining;
import com.google.common.base.Splitter;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Range;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions;
import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.Change;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.ColumnGroup;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.MultiValue;
import com.google.i18n.phonenumbers.metadata.table.RangeKey;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.time.ZoneId;
import java.util.List;
import java.util.NavigableSet;
import java.util.Optional;
import java.util.TreeSet;
import java.util.stream.Stream;
/**
* The schema of the standard "Ranges" table with rows keyed by {@link RangeKey} and columns:
* <ol>
* <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not
* the same a XmlNumberType or ValidNumberType). All ranges should be assigned a type.
* <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF
* can yield the internal ValidNumberType). All ranges should be assigned a tariff.
* <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from
* numbers in a range for local dialling. Local only lengths are derived using this column.
* <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its
* region. The "noInternationalDialling" ranges are derived from this column.
* <li>{@link #SMS}: True if numbers in a range are expected to support SMS.
* <li>{@link #OPERATOR}: The expected operator (carrier) ID for a range (or empty if no carrier
* is known).
* <li>{@link #FORMAT}: The expected format ID for a range (or empty if no formatting should be
* applied).
* <li>{@link #TIMEZONE}: The timezone names for a range (or empty to imply the default
* timezones). Multiple timezones can be specific if separated by {@code '&'}.
* <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are
* set {@code true} that range is valid within the region {@code XX}.
* <li>{@link #GEOCODES}: A group of String columns in the form "Geocode:XXX" containing the
* geocode string for a range, where {@code XXX} is the language code of the string.
* <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid.
* <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce leading columns:
* <ol>
* <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]").
* <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9").
* </ol>
*/
public final class RangesTableSchema {
/**
* External number type enum. This is technically much better than ValidNumberType since it
* splits type and cost properly. Unfortunately the internal logic of the phonenumber library
* doesn't really cope with this, which is why we convert to {@code XmlRangesSchema} before
* creating legacy data structures.
*
* <p>This enum can be modified as new types are requested from data providers, providing the
* type mapping to ValidNumberType is updated appropriately. Note that until it's clear that
* mapping types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we
* should be very careful about using the additional types. Additional types need to be removed
* before the generated table can be turned into a {@link NumberingScheme}.
*/
public enum ExtType {
/** Default value not permitted in real data. */
UNKNOWN,
/** Maps to {@link ValidNumberType#FIXED_LINE}. */
FIXED_LINE,
/** Maps to {@link ValidNumberType#MOBILE}. */
MOBILE,
/** Maps to {@link ValidNumberType#FIXED_LINE_OR_MOBILE}. */
FIXED_LINE_OR_MOBILE,
/** Maps to {@link ValidNumberType#VOIP}. */
VOIP,
/** Maps to {@link ValidNumberType#PAGER}. */
PAGER,
/** Maps to {@link ValidNumberType#PERSONAL_NUMBER}. */
PERSONAL_NUMBER,
/** Maps to {@link ValidNumberType#UAN}. */
UAN,
/** Maps to {@link ValidNumberType#VOICEMAIL}. */
VOICEMAIL,
/** Machine-to-machine numbers (additional type for future support). */
M2M,
/** ISP dial-up numbers (additional type for future support). */
ISP;
private static final ImmutableMap<ExtType, ValidNumberType> TYPE_MAP =
Stream.of(
ExtType.FIXED_LINE,
ExtType.MOBILE,
ExtType.FIXED_LINE_OR_MOBILE,
ExtType.PAGER,
ExtType.PERSONAL_NUMBER,
ExtType.UAN,
ExtType.VOICEMAIL,
ExtType.VOIP)
.collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name())));
public Optional<ValidNumberType> toValidNumberType() {
return Optional.ofNullable(TYPE_MAP.get(this));
}
}
/**
* External tariff enum. By splitting tariff information out from the "line type", we can
* represent a much wider (and more realistic) set of combinations for number ranges. When
* combined with {@link ExtType}, this maps back to {@code ValidNumberType}.
*/
public enum ExtTariff {
/** Does not affect ValidNumberType mapping. */
STANDARD_RATE,
/** Maps to {@link ValidNumberType#TOLL_FREE}. */
TOLL_FREE,
/** Maps to {@link ValidNumberType#SHARED_COST}. */
SHARED_COST,
/** Maps to {@link ValidNumberType#PREMIUM_RATE}. */
PREMIUM_RATE;
private static final ImmutableMap<ExtTariff, ValidNumberType> TARIFF_MAP =
Stream.of(ExtTariff.TOLL_FREE, ExtTariff.SHARED_COST, ExtTariff.PREMIUM_RATE)
.collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name())));
public Optional<ValidNumberType> toValidNumberType() {
return Optional.ofNullable(TARIFF_MAP.get(this));
}
}
/** The value in the "TIMEZONE" column, which is effectively a list of timezone strings. */
public static final class Timezones extends MultiValue<ZoneId, Timezones> {
public static Column<Timezones> column(String name) {
return Column.create(Timezones.class, name, new Timezones(""), Timezones::new);
}
public Timezones(Iterable<ZoneId> ids) {
super(ids, '&', comparing(ZoneId::getId), true);
}
public Timezones(String s) {
super(s, ZoneId::of, '&', comparing(ZoneId::getId), true);
}
}
public static final Column<ExtType> TYPE = Column.of(ExtType.class, "Type", ExtType.UNKNOWN);
public static final Column<ExtTariff> TARIFF =
Column.of(ExtTariff.class, "Tariff", ExtTariff.STANDARD_RATE);
/**
* The "Area Code Length" column in the range table, denoting the length of a prefix which can
* be removed from all numbers in a range to obtain locally diallable numbers. If an
* "area code" is not optional for dialling, then no value should be set here.
*/
public static final Column<Integer> AREA_CODE_LENGTH =
Column.ofUnsignedInteger("Area Code Length");
/** Denotes ranges which cannot be dialled internationally. */
public static final Column<Boolean> NATIONAL_ONLY = Column.ofBoolean("National Only");
/** Denotes ranges which can reasonably be expected to receive SMS. */
public static final Column<Boolean> SMS = Column.ofBoolean("Sms");
/** The ID of the primary/original operator assigned to a range. */
public static final Column<String> OPERATOR = Column.ofString("Operator");
/** The ID of the format assigned to a range. */
public static final Column<String> FORMAT = Column.ofString("Format");
/** An '&'-separated list of timezone IDs associated with this range. */
public static final Column<Timezones> TIMEZONE = Timezones.column("Timezone");
/** The "Region:XX" column group in the range table. */
public static final ColumnGroup<PhoneRegion, Boolean> REGIONS =
ColumnGroup.byRegion(Column.ofBoolean("Region"));
/** The "Regions" column in the CSV table. */
public static final Column<Regions> CSV_REGIONS = Regions.column("Regions");
/** The "Geocode:XXX" column group in the range table. */
public static final ColumnGroup<SimpleLanguageTag, String> GEOCODES =
ColumnGroup.byLanguage(Column.ofString("Geocode"));
/** The provenance column indicating why a range is considered valid. */
public static final Column<Provenance> PROVENANCE =
Column.of(Provenance.class, "Provenance", Provenance.UNKNOWN);
/** An arbitrary text comment, usually (at least) supplying information about the provenance. */
public static final Column<String> COMMENT = Column.ofString("Comment");
/** Marshaller for constructing CsvTable from RangeTable. */
private static final CsvKeyMarshaller<RangeKey> MARSHALLER = new CsvKeyMarshaller<>(
RangesTableSchema::write,
RangesTableSchema::read,
Optional.of(RangeKey.ORDERING),
"Prefix",
"Length");
/** The non-key columns of a range table. */
public static final Schema TABLE_COLUMNS =
Schema.builder()
.add(TYPE)
.add(TARIFF)
.add(AREA_CODE_LENGTH)
.add(NATIONAL_ONLY)
.add(SMS)
.add(OPERATOR)
.add(FORMAT)
.add(TIMEZONE)
.add(REGIONS)
.add(GEOCODES)
.add(PROVENANCE)
.add(COMMENT)
.build();
/**
* The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced
* by the CSV regions multi-value. This allows region codes to be serialize in a single column
* (which is far nicer when looking at data in a spreadsheet). In the range table, this is
* normalized into the boolean column group (because that's far nicer to work with).
*/
private static final Schema CSV_COLUMNS =
Schema.builder()
.add(TYPE)
.add(TARIFF)
.add(AREA_CODE_LENGTH)
.add(NATIONAL_ONLY)
.add(SMS)
.add(OPERATOR)
.add(FORMAT)
.add(TIMEZONE)
.add(CSV_REGIONS)
.add(GEOCODES)
.add(PROVENANCE)
.add(COMMENT)
.build();
/** Schema instance defining the ranges CSV table. */
public static final CsvSchema<RangeKey> SCHEMA = CsvSchema.of(MARSHALLER, CSV_COLUMNS);
/**
* Converts a {@link RangeTable} to a {@link CsvTable}, using {@link RangeKey}s as row keys and
* preserving the original table columns. The {@link CsvSchema} of the returned table is not
* guaranteed to be the {@link #SCHEMA} instance if the given table had different columns.
*/
@SuppressWarnings("unchecked")
public static CsvTable<RangeKey> toCsv(RangeTable table) {
CsvTable.Builder<RangeKey> csv = CsvTable.builder(SCHEMA);
ImmutableSet<Column<Boolean>> regionColumns =
REGIONS.extractGroupColumns(table.getColumns()).values();
TreeSet<PhoneRegion> regions = new TreeSet<>();
for (Change c : table.toChanges()) {
for (RangeKey k : RangeKey.decompose(c.getRanges())) {
regions.clear();
c.getAssignments().forEach(a -> {
// We special case the regions column, converting a group of boolean columns into a
// multi-value of region codes. If the column is in the group, it must hold Booleans.
if (regionColumns.contains(a.column())) {
if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) {
regions.add(REGIONS.getKey(a.column()));
}
} else {
csv.put(k, a);
}
});
// We can do this out-of-sequence because the table will order its columns.
if (!regions.isEmpty()) {
csv.put(k, CSV_REGIONS, Regions.of(regions));
}
}
}
return csv.build();
}
/**
* Converts a {@link RangeKey} based {@link CsvTable} to a {@link RangeTable}, preserving the
* original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be
* the {@link #SCHEMA} instance if the given table had different columns.
*/
public static RangeTable toRangeTable(CsvTable<RangeKey> csv) {
RangeTable.Builder out = RangeTable.builder(TABLE_COLUMNS);
for (RangeKey k : csv.getKeys()) {
Change.Builder change = Change.builder(k.asRangeTree());
csv.getRow(k).forEach((c, v) -> {
// We special case the regions column, converting a comma separated list of region codes
// into a series of boolean column assignments.
if (c.equals(CSV_REGIONS)) {
CSV_REGIONS.cast(v).getValues().forEach(r -> change.assign(REGIONS.getColumn(r), true));
} else {
change.assign(c, v);
}
});
out.apply(change.build(), OverwriteMode.NEVER);
}
return out.build();
}
// Shared by ShortcodeTableSchema
public static Stream<String> write(RangeKey key) {
return Stream.of(key.getPrefix().toString(), formatLength(key.getLengths()));
}
// Shared by ShortcodeTableSchema
public static RangeKey read(List<String> parts) {
return RangeKey.create(RangeSpecification.parse(parts.get(0)), parseLengths(parts.get(1)));
}
private static String formatLength(ImmutableSortedSet<Integer> lengthSet) {
checkArgument(!lengthSet.isEmpty());
ImmutableRangeSet<Integer> r =
ImmutableRangeSet.unionOf(
lengthSet.stream()
.map(n -> Range.singleton(n).canonical(integers()))
.collect(toImmutableList()));
return r.asRanges().stream().map(RangesTableSchema::formatRange).collect(joining(","));
}
private static String formatRange(Range<Integer> r) {
ContiguousSet<Integer> s = ContiguousSet.create(r, integers());
switch (s.size()) {
case 1:
return String.valueOf(s.first());
case 2:
return s.first() + "," + s.last();
default:
return s.first() + "-" + s.last();
}
}
private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults();
private static final Splitter RANGE_SPLITTER = Splitter.on('-').trimResults().limit(2);
private static NavigableSet<Integer> parseLengths(String s) {
NavigableSet<Integer> lengths = new TreeSet<>();
for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
if (lengthOrRange.contains("-")) {
List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
int lo = parseInt(lohi.get(0));
int hi = parseInt(lohi.get(1));
checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
checkArgument(lengths.isEmpty() || lo > lengths.last(), "Overlapping ranges: %s", s);
lengths.addAll(ContiguousSet.closed(lo, hi));
} else {
int length = parseInt(lengthOrRange);
checkArgument(lengths.isEmpty() || length > lengths.last(), "Overlapping ranges: %s", s);
lengths.add(length);
}
}
return lengths;
}
private static int parseInt(String s) {
return Integer.parseUnsignedInt(s, 10);
}
private RangesTableSchema() {}
}

+ 228
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ShortcodesTableSchema.java View File

@ -0,0 +1,228 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap;
import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EMERGENCY;
import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EXPANDED_EMERGENCY;
import static java.util.function.Function.identity;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Maps;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff;
import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType;
import com.google.i18n.phonenumbers.metadata.table.Change;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller;
import com.google.i18n.phonenumbers.metadata.table.CsvSchema;
import com.google.i18n.phonenumbers.metadata.table.CsvTable;
import com.google.i18n.phonenumbers.metadata.table.RangeKey;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Stream;
/**
* The schema of the standard "Shortcodes" table with rows keyed by {@link RangeKey} and columns:
* <ol>
* <li>{@link #TYPE}: The semantic type of numbers in a range. All ranges should be assigned a
* type.
* <li>{@link #TARIFF}: The expected cost of numbers in a range. All ranges should be assigned a
* tariff.
* <li>{@link #SMS}: True if numbers in a range are expected to support SMS.
* <li>{@link #SUBREGION}: True if numbers in a range are expected to be only diallable from a
* geographic subregion (rather than the whole region).
* <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid.
* <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance.
* </ol>
*
* <p>Rows keys are serialized via the marshaller and produce leading columns:
* <ol>
* <li>{@code Region}: The region code for which this range applies.
* <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]").
* <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9").
* </ol>
*
* <p>Note that the region must be part of the key, since some shortcodes have different types
* between different regions.
*/
public final class ShortcodesTableSchema {
/**
* The row key of the shortcode table, specifying region and range key. This permits all
* shortcodes to be stored in a single table (which is very helpful in NANPA, where there are
* many regions, most with only a tiny amount of shortcode information).
*/
@AutoValue
public abstract static class ShortcodeKey {
private static final Comparator<ShortcodeKey> ORDERING = Comparator
.comparing(ShortcodeKey::getRegion)
.thenComparing(ShortcodeKey::getRangeKey, RangeKey.ORDERING);
private static final CsvKeyMarshaller<ShortcodeKey> MARSHALLER = new CsvKeyMarshaller<>(
ShortcodeKey::write,
ShortcodeKey::read,
Optional.of(ShortcodeKey.ORDERING),
"Region",
"Prefix",
"Length");
private static Stream<String> write(ShortcodeKey key) {
return Stream.concat(
Stream.of(key.getRegion().toString()),
RangesTableSchema.write(key.getRangeKey()));
}
private static ShortcodeKey read(List<String> parts) {
return ShortcodeKey.create(
PhoneRegion.of(parts.get(0)),
RangesTableSchema.read(parts.subList(1, parts.size())));
}
public static ShortcodeKey create(PhoneRegion region, RangeKey rangeKey) {
checkArgument(!region.equals(PhoneRegion.getUnknown()), "region must be valid");
return new AutoValue_ShortcodesTableSchema_ShortcodeKey(region, rangeKey);
}
public abstract PhoneRegion getRegion();
public abstract RangeKey getRangeKey();
}
/** Shortcode type enum. */
public enum ShortcodeType {
/** Default value not permitted in real data. */
UNKNOWN,
/**
* General purpose non-governmental services including commercial or charity services. This is
* the default type for shortcodes if no other category is more applicable.
*/
COMMERCIAL,
/**
* Non-emergency, government run public services (e.g. directory enquiries).
*/
PUBLIC_SERVICE,
/**
* Public services which provide important non-emergency information for health or safety
* (e.g. https://www.police.uk/contact/101/).
*/
EXPANDED_EMERGENCY,
/**
* Primary public emergency numbers (i.e. police, fire or ambulance) which are available to
* everyone. Numbers in this category must be toll-free and not carrier specific. Mobile phone
* manufacturers will often allow these numbers to be dialled from a locked device, so it's
* important that they work for everyone.
*/
EMERGENCY;
}
private static final ImmutableBiMap<ExtTariff, XmlShortcodeType> XML_TARIFF_MAP =
Stream.of(ExtTariff.TOLL_FREE, ExtTariff.STANDARD_RATE, ExtTariff.PREMIUM_RATE)
.collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name())));
private static final ImmutableBiMap<ShortcodeType, XmlShortcodeType> XML_TYPE_MAP =
Stream.of(EXPANDED_EMERGENCY, EMERGENCY)
.collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name())));
/** Return the known mapping from the schema shortcode types to the XML type. */
public static Optional<XmlShortcodeType> getXmlType(ShortcodeType type) {
return Optional.ofNullable(XML_TYPE_MAP.get(type));
}
/** Return the mapping from the schema tariff to the XML type. */
public static XmlShortcodeType getXmlType(ExtTariff tariff) {
XmlShortcodeType xmlType = XML_TARIFF_MAP.get(tariff);
checkArgument(xmlType != null, "shortcodes do not support tariff: %s", tariff);
return xmlType;
}
public static final Column<ShortcodeType> TYPE =
Column.of(ShortcodeType.class, "Type", ShortcodeType.UNKNOWN);
public static final Column<ExtTariff> TARIFF = RangesTableSchema.TARIFF;
public static final Column<Boolean> SMS = RangesTableSchema.SMS;
public static final Column<Boolean> CARRIER_SPECIFIC = Column.ofBoolean("Carrier Specific");
public static final Column<Boolean> SUBREGION = Column.ofBoolean("Subregion");
public static final Column<String> FORMAT = RangesTableSchema.FORMAT;
public static final Column<Provenance> PROVENANCE = RangesTableSchema.PROVENANCE;
public static final Column<String> COMMENT = RangesTableSchema.COMMENT;
private static final Schema COLUMNS =
Schema.builder()
.add(TYPE)
.add(TARIFF)
.add(SMS)
.add(CARRIER_SPECIFIC)
.add(SUBREGION)
.add(FORMAT)
.add(PROVENANCE)
.add(COMMENT)
.build();
/** Schema instance defining the "Shortcodes" CSV table. */
public static final CsvSchema<ShortcodeKey> SCHEMA =
CsvSchema.of(ShortcodeKey.MARSHALLER, COLUMNS);
/**
*/
public static CsvTable<ShortcodeKey> toCsv(Map<PhoneRegion, RangeTable> tables) {
CsvTable.Builder<ShortcodeKey> csv = CsvTable.builder(SCHEMA);
tables.forEach((r, t) -> {
for (Change c : t.toChanges()) {
for (RangeKey k : RangeKey.decompose(c.getRanges())) {
csv.put(ShortcodeKey.create(r, k), c.getAssignments());
}
}
});
return csv.build();
}
/**
* Maps a single shortcode CSV table into a map of region specific range tables. Note that the
* ranges in these tables do not need to be consistent across regions (e.g. "toll free" in one
* might be "premium rate" in the other).
*/
public static ImmutableSortedMap<PhoneRegion, RangeTable> toShortcodeTables(
CsvTable<ShortcodeKey> csv) {
// Retain order of regions in the CSV table (not natural region order).
Map<PhoneRegion, RangeTable.Builder> builderMap = new LinkedHashMap<>();
for (ShortcodeKey k : csv.getKeys()) {
// Basically the same as for RangesTableSchema, except that we deal with region codes in the
// key.
Change.Builder change = Change.builder(k.getRangeKey().asRangeTree());
csv.getRow(k).forEach(change::assign);
PhoneRegion region = k.getRegion();
RangeTable.Builder table = builderMap.get(region);
if (table == null) {
table = RangeTable.builder(COLUMNS);
builderMap.put(region, table);
}
table.apply(change.build(), OverwriteMode.NEVER);
}
return ImmutableSortedMap.copyOf(Maps.transformValues(builderMap, RangeTable.Builder::build));
}
private ShortcodesTableSchema() {}
}

+ 154
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/XmlRangesSchema.java View File

@ -0,0 +1,154 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN;
import static com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode.NEVER;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.ColumnGroup;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import com.google.i18n.phonenumbers.metadata.table.Schema;
import java.util.Optional;
/**
* A schema describing the columns which are required for creating a {@link NumberingScheme}.
* <ol>
* <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not the same as
* an {@code XmlNumberType}). All ranges should be assigned a validation type.
* <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from
* numbers in a range for local dialling. Local only lengths are derived using this column.
* <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its
* region. The "noInternationalDialling" ranges are derived from this column.
* <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are
* set {@code true} that range is valid within the region {@code XX}.
* </ol>
*
* <p>This schema is sufficient for generating {@link NumberingScheme} instances, but isn't what we
* expect to import data from (which is why it doesn't have a {@code CsvKeyMarshaller} associated
* with it. That's covered by the {@code RangesTableSchema}.
*/
public final class XmlRangesSchema {
/**
* The internal "Type" column in the range table This is present in the schema and used is a lot
* of places, but it is not what the type/tariff data is imported as (it's derived from other
* columns).
*/
public static final Column<ValidNumberType> TYPE =
Column.of(ValidNumberType.class, "Type", UNKNOWN);
/**
* The "Area Code Length" column in the range table, denoting the length of a prefix which can
* be removed from all numbers in a range to obtain locally diallable numbers. If an
* "area code" is not optional for dialling, then no value should be set here.
*/
public static final Column<Integer> AREA_CODE_LENGTH = RangesTableSchema.AREA_CODE_LENGTH;
/** Denotes ranges which cannot be dialled internationally. */
public static final Column<Boolean> NATIONAL_ONLY = RangesTableSchema.NATIONAL_ONLY;
/** Format specifier IDs. */
public static final Column<String> FORMAT = RangesTableSchema.FORMAT;
/** The "Region:XX" column group in the range table. */
public static final ColumnGroup<PhoneRegion, Boolean> REGIONS = RangesTableSchema.REGIONS;
/** The standard columns required for generating a {@link NumberingScheme}. */
public static final Schema COLUMNS =
Schema.builder()
.add(TYPE)
.add(AREA_CODE_LENGTH)
.add(NATIONAL_ONLY)
.add(FORMAT)
.add(REGIONS)
.build();
/** Columns for per-region tables (just {@link #COLUMNS} without {@link #REGIONS}). */
public static final Schema PER_REGION_COLUMNS =
Schema.builder()
.add(TYPE)
.add(AREA_CODE_LENGTH)
.add(NATIONAL_ONLY)
.add(FORMAT)
.build();
public static RangeTable fromExternalTable(RangeTable src) {
checkArgument(RangesTableSchema.TABLE_COLUMNS.isSubSchemaOf(src.getSchema()),
"unexpected schema for source table, should be subschema of %s",
RangesTableSchema.TABLE_COLUMNS);
RangeTree unknown = src.getRanges(RangesTableSchema.TYPE, ExtType.UNKNOWN);
checkMetadata(unknown.isEmpty(), "source table contains unknown type for ranges\n%s", unknown);
checkSourceColumn(src, RangesTableSchema.TYPE);
checkSourceColumn(src, RangesTableSchema.TARIFF);
// We can copy most columns verbatim.
RangeTable.Builder dst = RangeTable.builder(COLUMNS);
copyColumn(src, dst, AREA_CODE_LENGTH);
copyColumn(src, dst, NATIONAL_ONLY);
copyColumn(src, dst, FORMAT);
REGIONS.extractGroupColumns(src.getColumns()).values().forEach(c -> copyColumn(src, dst, c));
// But the type column must be inferred from a combination of the external type and tariff.
// Tariff takes precedence, so we do type first and then overwrite ranges for tariff.
// We also capture unsupported ranges as they must be ignored in this conversion.
RangeTree unsupportedRanges = RangeTree.empty();
for (ExtType extType : src.getAssignedValues(RangesTableSchema.TYPE)) {
RangeTree ranges = src.getRanges(RangesTableSchema.TYPE, extType);
Optional<ValidNumberType> t = extType.toValidNumberType();
if (t.isPresent()) {
dst.assign(TYPE, t.get(), ranges, OverwriteMode.NEVER);
} else {
unsupportedRanges = unsupportedRanges.union(ranges);
}
}
// Because we know that both the type and tariff columns have assignments for every range (and
// there's no "unknown" values for these) we can just ignore "standard rate" tariff ranges
// since they must have had a type assigned above already.
for (ExtTariff extTariff : src.getAssignedValues(RangesTableSchema.TARIFF)) {
// Ignore unsupported ranges here (since otherwise they could add ranges based only on the
// tariff, which would be wrong). For example, a toll free ISP number range should NOT be
// in the table as TOLL_FREE, since ISP numbers should not be in the table at all (until
// such time as they are a fully supported type).
RangeTree ranges =
src.getRanges(RangesTableSchema.TARIFF, extTariff).subtract(unsupportedRanges);
extTariff.toValidNumberType()
.ifPresent(t -> dst.assign(TYPE, t, ranges, OverwriteMode.ALWAYS));
}
return dst.build();
}
private static void checkSourceColumn(RangeTable table, Column<?> col) {
checkMetadata(table.getAssignedRanges(col).equals(table.getAllRanges()),
"table is missing assignments in column %s for ranges\n%s",
col, table.getAllRanges().subtract(table.getAssignedRanges(col)));
}
private static void copyColumn(RangeTable src, RangeTable.Builder dst, Column<?> col) {
if (src.getColumns().contains(col)) {
src.getAssignedValues(col).forEach(v -> dst.assign(col, v, src.getRanges(col, v), NEVER));
}
}
private XmlRangesSchema() {}
}

+ 92
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Assignment.java View File

@ -0,0 +1,92 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.auto.value.AutoValue;
import com.google.common.base.Splitter;
import java.util.List;
import java.util.Optional;
import javax.annotation.Nullable;
/**
* A single assignment of a column to a value. This can be used to change values in a
* {@code RangeTable} and well as query for ranges with its value.
*/
@AutoValue
public abstract class Assignment<T extends Comparable<T>> {
private static final Splitter SPLITTER = Splitter.on("=").limit(2).trimResults();
/**
* Parses a string of the form {@code "<column>=<value>"} to create an assignment using the given
* schema. The named column must exist in the schema, and the associated value must be a valid
* value within that column.
* <p>
* Whitespace before and after the column or value is ignored. If the value is omitted, then an
* unassignment is returned.
*/
public static Assignment<?> parse(String s, Schema schema) {
List<String> parts = SPLITTER.splitToList(s);
checkArgument(parts.size() == 2, "invalid assigment string: %s", s);
Column<?> column = schema.getColumn(parts.get(0));
return create(column, column.parse(parts.get(1)));
}
// Type capture around AutoValue is a little painful, so this static helper ... helps.
private static <T extends Comparable<T>> Assignment<T> create(Column<T> c, @Nullable Object v) {
T value = c.cast(v);
return new AutoValue_Assignment<>(c, Optional.ofNullable(value));
}
/**
* Returns an assignment in the given column for the specified, non null, value.
* <p>
* Note that an assignment for the default value of a column will return an explicit assignment
* for that value, rather than an "unassignment" in that column; so
* {@code Assignment.of(c, c.defaultValue())} is not equal to {@code unassign(c)}, even though
* they may have the same effect when applied to a range table, and may even have the same
* {@link #toString()} representation (in the case of String columns).
*/
public static <T extends Comparable<T>> Assignment<T> of(Column<T> c, Object v) {
return new AutoValue_Assignment<>(c, Optional.of(c.cast(v)));
}
@SuppressWarnings("unchecked")
public static <T extends Comparable<T>> Assignment<T> ofOptional(Column<T> c, Optional<?> v) {
// Casting the value makes the optional cast below safe.
v.ifPresent(c::cast);
return new AutoValue_Assignment<>(c, (Optional<T>) v);
}
/**
* Returns an unassignment in the given column. The {@link #value()} of this assignment is empty.
*/
public static <T extends Comparable<T>> Assignment<T> unassign(Column<T> c) {
return new AutoValue_Assignment<>(c, Optional.empty());
}
/** The column in which the assignment applies. */
public abstract Column<T> column();
/** The value in the column, or empty to signify unassignment. */
public abstract Optional<T> value();
@Override
public final String toString() {
return String.format("%s=%s", column().getName(), value().map(Object::toString).orElse(""));
}
}

+ 131
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Change.java View File

@ -0,0 +1,131 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
/**
* A change which can be applied to a range table. Changes are applied sequentially to build a
* range table and new changes overwrite existing mappings. Changes are additive, and cannot be
* used to remove ranges from a table (but they can unassign previous assignments).
*/
@AutoValue
public abstract class Change {
private static final Change EMPTY = of(RangeTree.empty(), ImmutableList.of());
/** A builder for changes that supports assigning and unassigning column values for a range. */
public static final class Builder {
private final RangeTree ranges;
private final Map<Column<?>, Assignment<?>> assignments = new LinkedHashMap<>();
private Builder(RangeTree ranges) {
this.ranges = checkNotNull(ranges);
}
/**
* Assigns the optional value in the given column for the ranges of this builder (an empty
* value has the effect of unassigning the value in the table that this change is applied to).
*/
public Builder assign(Assignment<?> assignment) {
checkArgument(assignments.put(assignment.column(), assignment) == null,
"Column already assigned: %s", assignment.column());
return this;
}
/** Assigns the non-null value in the given column for the ranges of this builder. */
public Builder assign(Column<?> column, Object value) {
return assign(Assignment.of(column, value));
}
/** Unassigns any values in the given column for the ranges of this builder. */
public Builder unassign(Column<?> column) {
return assign(Assignment.unassign(column));
}
/** Builds an immutable change from the current state of this builder. */
public Change build() {
return Change.of(ranges, assignments.values());
}
}
public static Builder builder(RangeTree ranges) {
return new Builder(ranges);
}
/** Returns the empty change which has no effect when applied to any table. */
public static Change empty() {
return EMPTY;
}
/** Builds a change from a set of assignments (columns must be unique). */
public static Change of(RangeTree ranges, Iterable<Assignment<?>> assignments) {
ImmutableList<Assignment<?>> a = ImmutableList.copyOf(assignments);
checkArgument(a.size() == a.stream().map(Assignment::column).distinct().count(),
"cannot supply different assignments for the same column: %s", a);
return new AutoValue_Change(ranges, a);
}
/**
* Returns the ranges affected by this change. These ranges are added to the table and
* optionally assigned category values according to {@link #getAssignments()}. No other ranges
* will be affected by this change.
*/
public abstract RangeTree getRanges();
/**
* Returns a list of assignments to be applied for this change. Note that the set of columns for
* these assignments is itself also a set (i.e. no two assignments in a change ever share the
* same column).
*/
public abstract ImmutableList<Assignment<?>> getAssignments();
/** Returns whether this change contains any of the specified values in a given column. */
@SafeVarargs
public final <T extends Comparable<T>> boolean hasAssignment(Column<T> column, T... values) {
for (Assignment<?> a : getAssignments()) {
if (column.equals(a.column())) {
return a.value().map(v -> Arrays.asList(values).contains(column.cast(v))).orElse(false);
}
}
return false;
}
/**
* Returns the value of the column in this change (or empty if there was not value or the value
* was empty. This because it conflates "no value" and "explicitly empty value", this method
* might not be suitable for Changes that unassign values.
*/
public final <T extends Comparable<T>> Optional<T> getAssignment(Column<T> column) {
for (Assignment<?> a : getAssignments()) {
if (column.equals(a.column())) {
return a.value().map(column::cast);
}
}
return Optional.empty();
}
// Visible for AutoValue.
Change() {}
}

+ 217
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Column.java View File

@ -0,0 +1,217 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.CharMatcher.inRange;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
import com.google.auto.value.AutoValue;
import com.google.common.base.CaseFormat;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableMap;
import java.util.function.Function;
import javax.annotation.Nullable;
/**
* A column specifier which holds a set of values that are allowed with a column.
*/
@AutoValue
public abstract class Column<T extends Comparable<T>> {
private static final ImmutableMap<String, Boolean> BOOLEAN_MAP =
ImmutableMap.of("true", TRUE, "TRUE", TRUE, "false", FALSE, "FALSE", false);
private static final CharMatcher ASCII_LETTER_OR_DIGIT =
inRange('a', 'z').or(inRange('A', 'Z')).or(inRange('0', '9'));
private static final CharMatcher LOWER_ASCII_LETTER_OR_DIGIT =
inRange('a', 'z').or(inRange('0', '9'));
private static final CharMatcher LOWER_UNDERSCORE =
CharMatcher.is('_').or(LOWER_ASCII_LETTER_OR_DIGIT);
/**
* Returns a column for the specified type with a given parsing function. Use alternate helper
* methods for creating columns of common types.
*/
public static <T extends Comparable<T>> Column<T> create(
Class<T> clazz, String name, T defaultValue, Function<String, T> parseFn) {
return new AutoValue_Column<>(
checkName(name), clazz, parseFn, String::valueOf, defaultValue, null);
}
/**
* Returns a column for the specified enum type. The string representation of a value in this
* column is just the {@code toString()} value of the enum.
*/
public static <T extends Enum<T>> Column<T> of(Class<T> clazz, String name, T defaultValue) {
return create(clazz, name, defaultValue, s -> Enum.valueOf(clazz, toEnumName(s)));
}
/**
* Returns a column for strings. In there serialized form, strings do not preserve leading or
* trailing whitespace, unless surrounded by double-quotes (e.g. {@code " foo "}). The quotes are
* stripped on parsing and added back for any String value with leading/trailing whitespace. The
* default value is the empty string.
*/
public static Column<String> ofString(String name) {
return new AutoValue_Column<>(
checkName(name), String.class, Column::trimOrUnquote, Column::maybeQuote, "", null);
}
/**
* Returns a column for unsigned integers. The string representation of a value in this column
* matches the {@link Integer#toString(int)} value. The default value is {@code 0}.
*/
public static Column<Integer> ofUnsignedInteger(String name) {
return create(Integer.class, name, 0, Integer::parseUnsignedInt);
}
/**
* Returns a column for booleans. The string representation of a value in this column can be any
* of "true", "false", "TRUE", "FALSE" (but not things like "True", "T" or "YES"). The default
* value is {@code false}.
*/
public static Column<Boolean> ofBoolean(String name) {
return create(Boolean.class, name, false, BOOLEAN_MAP::get);
}
private static String checkName(String name) {
checkArgument(name.indexOf(':') == -1, "invalid column name: %s", name);
return name;
}
// Converts to UPPER_UNDERSCORE naming for enums.
private static String toEnumName(String name) {
// Allow conversion for lower_underscore and lowerCamel, since UPPER_UNDERSCORE is so "LOUD".
// We can be sloppy with respect to errors here since all runtime exceptions are handled.
if (LOWER_ASCII_LETTER_OR_DIGIT.matches(name.charAt(0))) {
if (LOWER_UNDERSCORE.matchesAllOf(name)) {
name = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.UPPER_UNDERSCORE, name);
} else if (ASCII_LETTER_OR_DIGIT.matchesAllOf(name)) {
name = CaseFormat.LOWER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, name);
} else {
// Message/type not important here since all exceptions are replaced anyway.
throw new IllegalArgumentException();
}
}
return name;
}
// Trims whitespace from a serialize string, unless the value is surrounded by double-quotes (in
// which case the quotes are removed). This is done to permit the rare use of leading/trailing
// whitespace in data in a visually distinct and deliberate way.
private static String trimOrUnquote(String s) {
if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) {
return s.substring(1, s.length() - 1);
}
return whitespace().trimFrom(s);
}
// Surrounds any string with whitespace at either end with double quotes.
private static String maybeQuote(String s) {
if (s.length() > 0
&& (whitespace().matches(s.charAt(0)) || whitespace().matches(s.charAt(s.length() - 1)))) {
return '"' + s + '"';
}
return s;
}
/** Returns the column name (which can be used as a human readable title if needed). */
public abstract String getName();
abstract Class<T> type();
// The parsing function from a string to a value.
abstract Function<String, T> parseFn();
// The serialization function from a value to a String. This must be the inverse of the parseFn.
abstract Function<T, String> serializeFn();
/** Default value for this column (inferred for unassigned ranges when a snapshot is built). */
public abstract T defaultValue();
// This is very private and should only be used in this class.
@Nullable abstract Column<T> owningGroup();
/** Attempts to cast the given instance to the runtime type of this column. */
@Nullable public final T cast(@Nullable Object value) {
return type().cast(value);
}
/**
* Returns the value of this column based on its serialized representation (which is not
* necessarily its {@code toString()} representation).
*/
@Nullable public final T parse(String id) {
if (id.isEmpty()) {
return null;
}
try {
// TODO: Technically wrong, since for String columns this will unquote strings.
// Hopefully this won't be an issue, since quoting is really only likely to be used for
// preserving whitespace (which i
T value = parseFn().apply(id);
if (value != null) {
return value;
}
} catch (RuntimeException e) {
// fall through
}
throw new IllegalArgumentException(
String.format("unknown value '%s' in column '%s'", id, getName()));
}
/**
* Returns the serialized representation of a value in this column. This is the stored
* representation of the value, not the value itself.
*/
public final String serialize(@Nullable Object value) {
return (value != null) ? serializeFn().apply(cast(value)) : "";
}
// Only to be called by ColumnGroup.
final Column<T> fromPrototype(String suffix) {
String name = getName() + ":" + checkName(suffix);
return new AutoValue_Column<T>(name, type(), parseFn(), serializeFn(), defaultValue(), this);
}
final boolean isIn(ColumnGroup<?, ?> group) {
return group.prototype().equals(owningGroup());
}
@Override
public final String toString() {
return "Column{'" + getName() + "'}";
}
@Override
public final boolean equals(Object obj) {
if (!(obj instanceof Column<?>)) {
return false;
}
Column<?> c = (Column<?>) obj;
return c.getName().equals(getName()) && c.type().equals(type());
}
@Override
public final int hashCode() {
return getName().hashCode() ^ type().hashCode();
}
// Visible only for AutoValue
Column() {}
}

+ 100
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/ColumnGroup.java View File

@ -0,0 +1,100 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap;
import static java.util.function.Function.identity;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableBiMap;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
import java.util.Set;
import java.util.function.Function;
/** A group of {@link RangeTable} columns. */
@AutoValue
public abstract class ColumnGroup<K, T extends Comparable<T>> {
/**
* Returns a group for columns with the same type as the given "prototype" column and which has a
* a prefix that's the name of the prototype. Suffix values are parsed using the given function.
*/
public static <K, T extends Comparable<T>> ColumnGroup<K, T> of(
Column<T> prototype, Function<String, K> parseFn) {
return new AutoValue_ColumnGroup<>(prototype, parseFn);
}
/** Returns a group for the specified prototype column keyed by {@link PhoneRegion}. */
public static <T extends Comparable<T>> ColumnGroup<PhoneRegion, T> byRegion(
Column<T> prototype) {
return of(prototype, PhoneRegion::of);
}
/** Returns a group for the specified prototype column keyed by {@link SimpleLanguageTag}. */
public static <T extends Comparable<T>> ColumnGroup<SimpleLanguageTag, T> byLanguage(
Column<T> prototype) {
return of(prototype, SimpleLanguageTag::of);
}
// Internal use only.
abstract Column<T> prototype();
abstract Function<String, K> parseFn();
/** Returns the column for a specified key. */
public Column<T> getColumn(K key) {
// The reason this does not just call "prototype().fromPrototype(...)" is that the key may not
// be parsable by the function just because it's the "right" type. This allows people to pass
// in a function that limits columns to some subset of the domain (e.g. a subset of region
// codes).
return getColumnFromId(key.toString());
}
/** Returns the column for a specified ID string. */
public Column<T> getColumnFromId(String id) {
try {
Object unused = parseFn().apply(id);
} catch (RuntimeException e) {
throw new IllegalArgumentException(
String.format("invalid column %s, not in group: %s", id, this), e);
}
return prototype().fromPrototype(id);
}
/** Returns the key of a column in this group. */
@SuppressWarnings("unchecked")
public K getKey(Column<?> c) {
checkArgument(c.isIn(this), "column %s in not group %s", c, this);
// Cast is safe since any column in this group is a Column<T>.
return extractKey((Column<T>) c);
}
/** Returns a bidirectional mapping from group key to column, for columns in this group. */
@SuppressWarnings("unchecked")
public ImmutableBiMap<K, Column<T>> extractGroupColumns(Set<Column<?>> columns) {
return columns.stream()
.filter(c -> c.isIn(this))
// Cast is safe since any column in this group is a Column<T>.
.map(c -> (Column<T>) c)
.collect(toImmutableBiMap(this::extractKey, identity()));
}
// Assumes we've already verified that the column is in this group.
private K extractKey(Column<T> column) {
String name = column.getName();
return parseFn().apply(name.substring(name.lastIndexOf(':') + 1));
}
}

+ 74
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvKeyMarshaller.java View File

@ -0,0 +1,74 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.Comparator.naturalOrder;
import com.google.common.collect.ImmutableList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Stream;
/** Marshaller to handle key serialization and ordering in {@code CsvTable}. */
public final class CsvKeyMarshaller<K> {
private final Function<K, Stream<String>> serialize;
private final Function<List<String>, K> deserialize;
private final Optional<Comparator<K>> ordering;
private final ImmutableList<String> columns;
public static CsvKeyMarshaller<String> ofSortedString(String columnName) {
return new CsvKeyMarshaller<String>(
Stream::of, p -> p.get(0), Optional.of(naturalOrder()), columnName);
}
public CsvKeyMarshaller(
Function<K, Stream<String>> serialize,
Function<List<String>, K> deserialize,
Optional<Comparator<K>> ordering,
String... columns) {
this(serialize, deserialize, ordering, ImmutableList.copyOf(columns));
}
public CsvKeyMarshaller(
Function<K, Stream<String>> serialize,
Function<List<String>, K> deserialize,
Optional<Comparator<K>> ordering,
List<String> columns) {
this.serialize = checkNotNull(serialize);
this.deserialize = checkNotNull(deserialize);
this.ordering = checkNotNull(ordering);
this.columns = ImmutableList.copyOf(columns);
}
public ImmutableList<String> getColumns() {
return columns;
}
Stream<String> serialize(K key) {
return serialize.apply(key);
}
K deserialize(List<String> keyParts) {
return deserialize.apply(keyParts);
}
Optional<Comparator<K>> ordering() {
return ordering;
}
}

+ 241
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java View File

@ -0,0 +1,241 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.CharMatcher.isNot;
import static com.google.common.base.CharMatcher.javaIsoControl;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Streams;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import java.util.stream.Stream;
import javax.annotation.Nullable;
/**
* An efficient, fluent CSV parser which operates on a {@link Stream} of lines. It handles quoting
* of values, whitespace trimming and mapping values via a "schema" row.
*
* <p>This class is sadly necessary since the one in {@code com.google.common.text} doesn't support
* ignoring whitespace (and making it do so would take longer than writing this).
*
* <p>This class is immutable and thread-safe.
*/
// TODO: Investigate other "standard" CSV parsers such as org.apache.commons.csv.
public final class CsvParser {
/**
* A consumer for CSV rows which can automatically map values according to a header row.
*
* <p>This class is immutable and thread-safe.
*/
public static final class RowMapper {
@Nullable private final Consumer<ImmutableList<String>> headerHandler;
private RowMapper(Consumer<ImmutableList<String>> headerHandler) {
this.headerHandler = headerHandler;
}
public Consumer<Stream<String>> mapTo(Consumer<ImmutableMap<String, String>> handler) {
return new Consumer<Stream<String>>() {
private ImmutableList<String> header = null;
@Override
public void accept(Stream<String> row) {
if (header == null) {
// Can contain duplicates (but that's bad for mapping).
header = row.collect(toImmutableList());
checkArgument(
header.size() == header.stream().distinct().count(),
"duplicate values in CSV header: %s",
header);
if (headerHandler != null) {
headerHandler.accept(header);
}
} else {
ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
int i = 0;
for (String v : Streams.iterating(row)) {
checkArgument(i < header.size(),
"too many columns (expected %s): %s", header.size(), map);
if (!v.isEmpty()) {
map.put(header.get(i++), v);
}
}
handler.accept(map.build());
}
}
};
}
}
private static final CharMatcher NON_WHITESPACE = CharMatcher.whitespace().negate();
private static final char QUOTE = '"';
private static final CharMatcher VALID_DELIMITER_CHAR =
NON_WHITESPACE.and(javaIsoControl().negate()).and(isNot(QUOTE)).or(CharMatcher.anyOf(" \t"));
public static CsvParser withSeparator(char delimiter) {
return new CsvParser(delimiter, false, false);
}
public static CsvParser commaSeparated() {
return withSeparator(',');
}
public static CsvParser tabSeparated() {
return withSeparator('\t');
}
public static RowMapper rowMapper() {
return new RowMapper(null);
}
public static RowMapper rowMapper(Consumer<ImmutableList<String>> headerHandler) {
return new RowMapper(headerHandler);
}
private final char delimiter;
private final boolean trimWhitespace;
private final boolean allowMultiline;
private CsvParser(char delimiter, boolean trimWhitespace, boolean allowMultiline) {
checkArgument(VALID_DELIMITER_CHAR.matches(delimiter),
"invalid delimiter: %s", delimiter);
this.delimiter = delimiter;
this.trimWhitespace = trimWhitespace;
this.allowMultiline = allowMultiline;
}
public CsvParser trimWhitespace() {
checkArgument(NON_WHITESPACE.matches(delimiter),
"cannot trim whitespace if delimiter is whitespace");
return new CsvParser(delimiter, true, allowMultiline);
}
public CsvParser allowMultiline() {
return new CsvParser(delimiter, trimWhitespace, true);
}
public void parse(Stream<String> lines, Consumer<Stream<String>> rowCallback) {
// Allow whitespace delimiter if we aren't also trimming whitespace.
List<String> row = new ArrayList<>();
StringBuilder buffer = new StringBuilder();
Iterator<String> it = lines.iterator();
while (parseRow(it, row, buffer)) {
rowCallback.accept(row.stream());
row.clear();
}
}
private boolean parseRow(Iterator<String> lines, List<String> row, StringBuilder buffer) {
if (!lines.hasNext()) {
return false;
}
// First line of potentially several which make up this row.
String line = lines.next();
int start = maybeTrimWhitespace(line, 0);
while (start < line.length()) {
// "start" is the start of the next part and must be a valid index into current "line".
// Could be high or low surrogate if badly formed string, or just point at the delimiter.
char c = line.charAt(start);
int pos;
if (c == QUOTE) {
// Quoted value, maybe parse and unescape multiple lines here.
pos = ++start;
while (true) {
if (pos == line.length()) {
buffer.append(line, start, pos);
checkArgument(allowMultiline && lines.hasNext(),
"unterminated quoted value: %s", buffer);
buffer.append('\n');
line = lines.next();
start = 0;
pos = 0;
}
c = line.charAt(pos);
if (c == QUOTE) {
buffer.append(line, start, pos++);
if (pos == line.length()) {
break;
}
if (line.charAt(pos) != QUOTE) {
pos = maybeTrimWhitespace(line, pos);
checkArgument(pos == line.length() || line.codePointAt(pos) == delimiter,
"unexpected character (expected delimiter) in: %s", line);
break;
}
// "Double double quotes, what does it mean?" (oh yeah, a single double quote).
buffer.append(QUOTE);
start = pos + 1;
}
pos++;
}
row.add(buffer.toString());
buffer.setLength(0);
} else if (c == delimiter) {
// Empty unquoted empty value (e.g. "foo,,bar").
row.add("");
pos = start;
} else {
// Non-empty unquoted value.
pos = line.indexOf(delimiter, start + 1);
if (pos == -1) {
pos = line.length();
}
String value = line.substring(start, maybeTrimTrailingWhitespace(line, pos));
checkArgument(value.indexOf(QUOTE) == -1,
"quotes cannot appear in unquoted values: %s", value);
row.add(value);
}
if (pos == line.length()) {
// We hit end-of-line at the end of a value, so just return (no trailing empty value).
return true;
}
// If not end-of-line, "pos" points at the last delimiter, so we can find the next start.
start = maybeTrimWhitespace(line, pos + 1);
}
// We hit end-of-line either immediately, or after a delimiter. Either way we always need to
// add a trailing empty value for consistency.
row.add("");
return true;
}
private int maybeTrimWhitespace(String s, int i) {
if (trimWhitespace) {
i = NON_WHITESPACE.indexIn(s, i);
if (i == -1) {
i = s.length();
}
}
return i;
}
private int maybeTrimTrailingWhitespace(String s, int i) {
if (trimWhitespace) {
// There is no "lastIndexIn(String, int)" sadly.
while (i > 0 && whitespace().matches(s.charAt(i - 1))) {
i--;
}
}
return i;
}
}

+ 108
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvSchema.java View File

@ -0,0 +1,108 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.function.BiConsumer;
/**
* A CSV schema is a combination of a key marshaller and table columns. A CSV schema defines a
* CSV table with key columns, followed by non-key columns.
*/
@AutoValue
public abstract class CsvSchema<K> {
/**
* Returns a schema for a CSV file using the given marshaller to define key columns, and a table
* schema to define any additional columns in a row.
*/
public static <K> CsvSchema<K> of(CsvKeyMarshaller<K> marshaller, Schema columns) {
return new AutoValue_CsvSchema<>(marshaller, columns);
}
/** The marshaller defining table keys and how they are serialized in CSV. */
public abstract CsvKeyMarshaller<K> keyMarshaller();
/** The table schema defining non-key columns in the table. */
public abstract Schema columns();
/** Returns the ordering for keys in the CSV table, as defined by the key marshaller. */
public Optional<Comparator<K>> rowOrdering() {
return keyMarshaller().ordering();
}
/**
* Returns the ordering for additional non-key columns in the CSV table as defined by the table
* schema.
*/
public Comparator<Column<?>> columnOrdering() {
return columns().ordering();
}
/**
* Extracts the non-key columns of a table from the header row. The header row is expected to
* contain the names of all columns (including key columns) in the CSV table and this method
* verifies that the key columns are present as expected before resolving the non-key columns
* in order.
*/
public ImmutableList<Column<?>> parseHeader(List<String> header) {
int hsize = keyMarshaller().getColumns().size();
checkArgument(header.size() >= hsize, "CSV header too short: %s", header);
checkArgument(header.subList(0, hsize).equals(keyMarshaller().getColumns()),
"Invalid CSV header: %s", header);
ImmutableList.Builder<Column<?>> columns = ImmutableList.builder();
header.subList(hsize, header.size()).forEach(s -> columns.add(columns().getColumn(s)));
return columns.build();
}
/** Parses a row from a CSV table containing unescaped values. */
public void parseRow(
ImmutableList<Column<?>> columns, List<String> row, BiConsumer<K, List<Assignment<?>>> fn) {
int hsize = keyMarshaller().getColumns().size();
checkArgument(row.size() >= hsize, "CSV row too short: %s", row);
K key = keyMarshaller().deserialize(row.subList(0, hsize));
List<Assignment<?>> rowAssignments = new ArrayList<>();
for (int n = 0; n < row.size() - hsize; n++) {
Column<?> c = columns.get(n);
rowAssignments.add(
Assignment.ofOptional(c, Optional.ofNullable(c.parse(row.get(n + hsize)))));
}
fn.accept(key, rowAssignments);
}
public CsvTable<K> load(Path file) throws IOException {
if (!Files.exists(file)) {
return CsvTable.builder(this).build();
}
try (Reader csv = Files.newBufferedReader(file)) {
return CsvTable.importCsv(this, csv);
}
}
public CsvTable<K> load(Reader reader) throws IOException {
return CsvTable.importCsv(this, reader);
}
}

+ 589
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java View File

@ -0,0 +1,589 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.ImmutableSortedSet.toImmutableSortedSet;
import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_CHANGED;
import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_ONLY;
import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_CHANGED;
import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_ONLY;
import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.UNCHANGED;
import com.google.auto.value.AutoValue;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.collect.Tables;
import com.google.common.collect.TreeBasedTable;
import com.google.common.escape.CharEscaperBuilder;
import com.google.common.escape.Escaper;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Stream;
import javax.annotation.Nullable;
/**
* A general tabular representation of {@link Column} based data, which can include range data
* (via {@link RangeTable}) or other tabular data using a specified row key implementation.
*
* @param <K> the row key type.
*/
@AutoValue
public abstract class CsvTable<K> {
// Trim whitespace (since CSV files may be textually aligned) but don't allow multiline values
// (we handle that by JSON style escaping to keep the "one row per line" assumption true).
public static final String DEFAULT_DELIMETER = ";";
private static final CsvParser CSV_PARSER =
CsvParser.withSeparator(DEFAULT_DELIMETER.charAt(0)).trimWhitespace();
/**
* Mode to control how diffs are generated. If a diff table, rows have an additional
* {@code Status} applied to describe whether they are unchanged, modified or exclusive (i.e.
* exist only in one of the source tables).
*/
public enum DiffMode {
/** Include all rows in the "diff table" (unchanged, modified or exclusive). */
ALL,
/** Include only changed rows in the "diff table" (modified or exclusive). */
CHANGES,
/** Include only left-hand-side rows in the "diff table" (unchanged, modified or exclusive). */
LHS,
/** Include only right-hand-side rows in the "diff table" (unchanged, modified or exclusive). */
RHS,
}
/** A simple builder for programmatic generation of CSV tables. */
public static final class Builder<T> {
private final CsvSchema<T> schema;
private final Table<T, Column<?>, Object> table;
private Builder(CsvSchema<T> schema) {
this.schema = checkNotNull(schema);
// Either use insertion order or sorted order for rows (depends on schema).
if (schema.rowOrdering().isPresent()) {
this.table = TreeBasedTable.create(schema.rowOrdering().get(), schema.columnOrdering());
} else {
this.table = Tables.newCustomTable(
new LinkedHashMap<>(),
() -> new TreeMap<>(schema.columnOrdering()));
}
}
/**
* Puts a row into the table using the specific mappings (potentially overwriting any existing
* row).
*/
public Builder<T> putRow(T key, Map<Column<?>, ?> row) {
table.rowMap().remove(key);
return addRow(key, row);
}
/**
* Adds a new row to the table using the specific mappings (the row must not already be
* present).
*/
public Builder<T> addRow(T key, Map<Column<?>, ?> row) {
checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this);
row.forEach((c, v) -> table.put(key, c, v));
return this;
}
/**
* Adds a new row to the table using the specific mappings (the row must not already be
* present).
*/
public Builder<T> addRow(T key, List<Assignment<?>> row) {
checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this);
put(key, row);
return this;
}
/** Puts (overwrites) a single value in the table. */
public <V extends Comparable<V>> Builder<T> put(T key, Column<V> c, @Nullable V v) {
schema.columns().checkColumn(c);
if (v != null) {
table.put(key, c, c.cast(v));
} else {
table.remove(key, c);
}
return this;
}
/** Puts (overwrites) a sequence of values in the table. */
public Builder<T> put(T key, Iterable<Assignment<?>> assign) {
for (Assignment<?> a : assign) {
if (a.value().isPresent()) {
table.put(key, a.column(), a.value().get());
} else {
table.remove(key, a.column());
}
}
return this;
}
/** Puts (overwrites) a sequence of values in the table. */
public Builder<T> put(T key, Assignment<?>... assign) {
return put(key, Arrays.asList(assign));
}
/** Returns an unmodifiable view of the keys for the table. */
public Set<T> getKeys() {
return Collections.unmodifiableSet(table.rowKeySet());
}
/** Gets a single value in the table (or null). */
public <V extends Comparable<V>> V get(T key, Column<V> c) {
return c.cast(table.get(key, c));
}
/** Removes an entire row from the table (does nothing if the row did no exist). */
public Builder<T> removeRow(T key) {
table.rowKeySet().remove(key);
return this;
}
/** Filters the rows of a table, keeping those which match the given predicate. */
public Builder<T> filterRows(Predicate<T> predicate) {
Set<T> rows = table.rowKeySet();
// Copy to avoid concurrent modification exception.
for (T key : ImmutableSet.copyOf(table.rowKeySet())) {
if (!predicate.test(key)) {
rows.remove(key);
}
}
return this;
}
/** Filters the columns of a table, keeping only those which match the given predicate. */
public Builder<T> filterColumns(Predicate<Column<?>> predicate) {
Set<Column<?>> toRemove =
table.columnKeySet().stream().filter(predicate.negate()).collect(toImmutableSet());
table.columnKeySet().removeAll(toRemove);
return this;
}
/** Builds the immutable CSV table. */
public CsvTable<T> build() {
return from(schema, table);
}
@Override
public String toString() {
return build().toString();
}
}
/** Returns a builder for a CSV table with the expected key and column semantics. */
public static <K> Builder<K> builder(CsvSchema<K> schema) {
return new Builder<>(schema);
}
/** Returns a CSV table based on the given table with the expected key and column semantics. */
public static <K> CsvTable<K> from(CsvSchema<K> schema, Table<K, Column<?>, Object> table) {
ImmutableSet<Column<?>> columns = table.columnKeySet().stream()
.sorted(schema.columnOrdering())
.collect(toImmutableSet());
columns.forEach(schema.columns()::checkColumn);
return new AutoValue_CsvTable<>(
schema,
ImmutableMap.copyOf(Maps.transformValues(table.rowMap(), ImmutableMap::copyOf)),
columns);
}
/**
* Imports a semicolon separated CSV file. The CSV file needs to have the following layout:
* <pre>
* Key1 ; Key2 ; Column1 ; Column2 ; Column3
* k1 ; k2 ; OTHER ; "Text" ; true
* ...
* </pre>
* Where the first {@code N} columns represent the row key (as encapsulated by the key
* {@link CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema}
* via the column names.
* <p>
* Column values are represented in a semi-typed fashion according to the associated column (some
* columns require values to be escaped, others do not). Note that it's the column that defines
* whether the value needs escaping, not the content of the value itself (all values in a String
* column are required to be quoted).
*/
public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv) throws IOException {
return importCsv(schema, csv, CSV_PARSER);
}
/** Imports a CSV file using a specified parser. */
public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv, CsvParser csvParser)
throws IOException {
TableParser<K> parser = new TableParser<>(schema);
try (BufferedReader r = new BufferedReader(csv)) {
csvParser.parse(
r.lines(),
row -> parser.accept(
row.map(CsvTable::unescapeSingleLineCsvText).collect(toImmutableList())));
}
return parser.done();
}
/**
* Imports a sequence of rows to create a CSV table. The values in the rows are unescaped and
* require no explicit parsing.
*/
public static <K> CsvTable<K> importRows(CsvSchema<K> schema, Supplier<List<String>> rows) {
TableParser<K> parser = new TableParser<>(schema);
List<String> row;
while ((row = rows.get()) != null) {
parser.accept(row);
}
return parser.done();
}
/**
* Creates a "diff table" based on the given left and right table inputs. The resulting table
* has a new key column which indicates (via the {@code Status} enum) how rows difference between
* the left and right tables.
*/
public static <K> CsvTable<DiffKey<K>> diff(CsvTable<K> lhs, CsvTable<K> rhs, DiffMode mode) {
checkArgument(lhs.getSchema().equals(rhs.getSchema()), "Cannot diff with different schemas");
checkNotNull(mode, "Must specify a diff mode");
CsvKeyMarshaller<DiffKey<K>> marshaller = DiffKey.wrap(lhs.getSchema().keyMarshaller());
CsvSchema<DiffKey<K>> diffSchema = CsvSchema.of(marshaller, lhs.getSchema().columns());
Builder<DiffKey<K>> diff = CsvTable.builder(diffSchema);
if (mode != DiffMode.RHS) {
Sets.difference(lhs.getKeys(), rhs.getKeys())
.forEach(k -> diff.addRow(DiffKey.of(LHS_ONLY, k), lhs.getRow(k)));
}
if (mode != DiffMode.LHS) {
Sets.difference(rhs.getKeys(), lhs.getKeys())
.forEach(k -> diff.addRow(DiffKey.of(RHS_ONLY, k), rhs.getRow(k)));
}
for (K key : Sets.intersection(lhs.getKeys(), rhs.getKeys())) {
Map<Column<?>, Object> lhsRow = lhs.getRow(key);
Map<Column<?>, Object> rhsRow = rhs.getRow(key);
if (lhsRow.equals(rhsRow)) {
if (mode != DiffMode.CHANGES) {
diff.addRow(DiffKey.of(UNCHANGED, key), lhsRow);
}
} else {
if (mode != DiffMode.RHS) {
diff.addRow(DiffKey.of(LHS_CHANGED, key), lhsRow);
}
if (mode != DiffMode.LHS) {
diff.addRow(DiffKey.of(RHS_CHANGED, key), rhsRow);
}
}
}
return diff.build();
}
/** Returns the schema for this table. */
public abstract CsvSchema<K> getSchema();
/** Returns the rows of the table (not public to avoid access to untyped access). */
// Note that this cannot easily be replaced by ImmutableTable (as of Jan 2019) because
// ImmutableTable has severe limitations on how row/column ordering is handled that make the
// row/column ordering required in CsvTable currently impossible.
abstract ImmutableMap<K, ImmutableMap<Column<?>, Object>> getRows();
/**
* Returns the set of columns for the table (excluding the synthetic key columns, which are
* handled by the marshaller).
*/
public abstract ImmutableSet<Column<?>> getColumns();
/** Returns whether a row is in the table. */
public boolean isEmpty() {
return getRows().isEmpty();
}
/** Returns the set of keys for the table. */
public ImmutableSet<K> getKeys() {
return getRows().keySet();
}
/** Returns a single row as a map of column assignments. */
public ImmutableMap<Column<?>, Object> getRow(K rowKey) {
ImmutableMap<Column<?>, Object> row = getRows().get(rowKey);
return row != null ? row : ImmutableMap.of();
}
/** Returns whether a row is in the table. */
public boolean containsRow(K rowKey) {
return getKeys().contains(rowKey);
}
public Builder<K> toBuilder() {
Builder<K> builder = builder(getSchema());
getRows().forEach(builder::putRow);
return builder;
}
/** Returns the table column names, including the key columns, in schema order. */
public Stream<String> getCsvHeader() {
return Stream.concat(
getSchema().keyMarshaller().getColumns().stream(),
getColumns().stream().map(Column::getName));
}
/** Returns the unescaped CSV values for the specified row, in order. */
public Stream<String> getCsvRow(K key) {
checkArgument(getKeys().contains(key), "no such row: %s", key);
// Note that we pass the raw value (possibly null) to serialize so that we don't conflate
// missing and default values.
return Stream.concat(
getSchema().keyMarshaller().serialize(key),
getColumns().stream().map(c -> c.serialize(getOrNull(key, c))));
}
/**
* Exports the given table by writing its values as semicolon separated "CSV", with or without
* alignment. For example (with alignment):
*
* <pre>
* Key1 ; Key2 ; Column1 ; Column2 ; Column3
* k1 ; k2 ; OTHER ; "Text" ; true
* ...
* </pre>
*
* Where the first {@code N} columns represent the row key (as encapsulated by the key {@link
* CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the
* column names.
*/
public boolean exportCsv(Writer writer, boolean align) {
return exportCsvHelper(writer, align, getColumns());
}
/**
* Exports the given table by writing its values as semicolon separated "CSV", with or without
* alignment. For example (with alignment):
*
* <pre>
* Key1 ; Key2 ; Column1 ; Column2 ; Column3
* k1 ; k2 ; OTHER ; "Text" ; true
* ...
* </pre>
*
* Where the first {@code N} columns represent the row key (as encapsulated by the key {@link
* CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the
* column names. This will add columns that are part of the schema for the given table but have no
* assigned values.
*/
public boolean exportCsvWithEmptyColumnsPresent(Writer writer, boolean align) {
return exportCsvHelper(
writer,
align,
Stream.concat(getSchema().columns().getColumns().stream(), getColumns().stream())
.collect(ImmutableSet.toImmutableSet()));
}
private boolean exportCsvHelper(
Writer writer, boolean align, ImmutableSet<Column<?>> columnsToExport) {
if (isEmpty()) {
// Exit for empty tables (CSV file is truncated). The caller may then delete the empty file.
return false;
}
CsvTableCollector collector = new CsvTableCollector(align);
collector.accept(
Stream.concat(
getSchema().keyMarshaller().getColumns().stream(),
columnsToExport.stream().map(Column::getName))
.distinct());
for (K k : getKeys()) {
// Format raw values (possibly null) to avoid default values everywhere.
collector.accept(
Stream.concat(
getSchema().keyMarshaller().serialize(k),
columnsToExport.stream().map(c -> formatValue(c, getOrNull(k, c)))));
}
collector.writeCsv(writer);
return true;
}
@Nullable private <T extends Comparable<T>> T getOrNull(K rowKey, Column<T> column) {
return column.cast(getRow(rowKey).get(column));
}
/**
* Returns the value from the underlying table for the given row and column if present.
*/
public <T extends Comparable<T>> Optional<T> get(K rowKey, Column<T> column) {
return Optional.ofNullable(getOrNull(rowKey, column));
}
/**
* Returns the value from the underlying table for the given row and column, or the (non-null)
* default value.
*/
public <T extends Comparable<T>> T getOrDefault(K rowKey, Column<T> column) {
T value = getOrNull(rowKey, column);
return value != null ? value : column.defaultValue();
}
/**
* Returns the set of unique values in the given column. Note that if some rows do not have a
* value, then this will NOT result in the column default value being in the returned set. An
* empty column will result in an empty set being returned here.
*/
public <T extends Comparable<T>> ImmutableSortedSet<T> getValues(Column<T> column) {
return getKeys().stream()
.map(k -> getOrNull(k, column))
.filter(Objects::nonNull)
.collect(toImmutableSortedSet(Ordering.natural()));
}
@Override
public final String toString() {
StringWriter w = new StringWriter();
exportCsv(w, true);
return w.toString();
}
/** Parses CSV data on per-row basis, deserializing keys and adding values to a table. */
static class TableParser<K> implements Consumer<List<String>> {
private final Builder<K> table;
// Set when the header row is processed.
private ImmutableList<Column<?>> columns = null;
TableParser(CsvSchema<K> schema) {
this.table = builder(schema);
}
@Override
public void accept(List<String> row) {
if (columns == null) {
columns = table.schema.parseHeader(row);
} else {
table.schema.parseRow(columns, row, table::addRow);
}
}
public CsvTable<K> done() {
return table.build();
}
}
// Newlines can, in theory, be emitted "raw" in the CSV output inside a quoted string, but
// this breaks all sorts of nice properties of CSV files, since there's no longer one row per
// line. This export process escapes literal newlines and other control characters into Json
// like escape sequences ('\n', '\t', '\\' etc...). Unlike Json however, any double-quotes are
// _not_ escaped via '\' since the CSV way to escape those is via doubling. We leave other
// non-ASCII characters as-is, since this is meant to be as human readable as possible.
private static final Escaper ESCAPER = new CharEscaperBuilder()
.addEscape('\n', "\\n")
.addEscape('\r', "\\r")
.addEscape('\t', "\\t")
.addEscape('\\', "\\\\")
// This is a special case only required when writing CSV file (since the parser handles
// unescaping quotes when they are read back in). In theory it should be part of a separate
// step during CSV writing, but it's not worth splitting it out. This is not considered an
// unsafe char (since it definitely does appear).
.addEscape('"', "\"\"")
.toEscaper();
private static final CharMatcher ESCAPED_CHARS = CharMatcher.anyOf("\n\r\t\\");
private static final CharMatcher UNSAFE_CHARS =
CharMatcher.javaIsoControl().and(ESCAPED_CHARS.negate());
private static String formatValue(Column<?> column, @Nullable Object value) {
String unescaped = column.serialize(value);
if (unescaped.isEmpty()) {
return unescaped;
}
// Slightly risky with enums, since an enum could have ';' in its toString() representation.
// However since columns and their semantics are tightly controlled, this should never happen.
if (Number.class.isAssignableFrom(column.type())
|| column.type() == Boolean.class
|| column.type().isEnum()) {
checkArgument(ESCAPED_CHARS.matchesNoneOf(unescaped), "Bad 'safe' value: %s", unescaped);
return unescaped;
}
return escapeForSingleLineCsv(unescaped);
}
/**
* Escapes and quotes an arbitrary text string, ensuring it is safe for use as a single-line CSV
* value. Newlines, carriage returns and tabs are backslash escaped (as is backslash itself) and
* other ISO control characters are not permitted.
*
* <p>The purpose of this method is to make arbitrary Unicode text readable in a single line of
* a CSV file so that we can rely on per-line processing tools, such as "grep" or "sed" if needed
* without requiring expensive conversion to/from a spreadsheet.
*/
public static String escapeForSingleLineCsv(String unescaped) {
checkArgument(UNSAFE_CHARS.matchesNoneOf(unescaped), "Bad string value: %s", unescaped);
return '"' + ESCAPER.escape(unescaped) + '"';
}
/**
* Unescapes a line of text escaped by {@link #escapeForSingleLineCsv(String)} to restore literal
* newlines and other backslash-escaped characters. Note that if the given string already has
* newlines present, they are preserved but will then be escaped if the text is re-escaped later.
*/
public static String unescapeSingleLineCsvText(String s) {
int i = s.indexOf('\\');
if (i == -1) {
return s;
}
StringBuilder out = new StringBuilder();
int start = 0;
do {
out.append(s, start, i);
char c = s.charAt(++i);
out.append(checkNotNull(UNESCAPE.get(c), "invalid escape sequence: \\%s", c));
start = i + 1;
i = s.indexOf('\\', start);
} while (i != -1);
return out.append(s, start, s.length()).toString();
}
private static final ImmutableMap<Character, Character> UNESCAPE =
ImmutableMap.<Character, Character>builder()
.put('n', '\n')
.put('r', '\r')
.put('t', '\t')
.put('\\', '\\')
.build();
// Visible for AutoValue only.
CsvTable() {}
}

+ 99
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTableCollector.java View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.stream.Collectors.joining;
import com.google.common.collect.ImmutableList;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.function.Consumer;
import java.util.stream.IntStream;
import java.util.stream.Stream;
/** Collects cell values and tracks maximum cell width to make it easy to output aligned CSV. */
public final class CsvTableCollector implements Consumer<Stream<String>> {
private final NavigableMap<Integer, Integer> maxLengths = new TreeMap<>();
private final List<List<String>> cells = new ArrayList<>();
private final boolean align;
public CsvTableCollector(boolean align) {
this.align = align;
}
/**
* Writes the contents of this table, with optional alignment, as a CSV table. Returns whether
* anything was written.
*/
public void writeCsv(Writer writer) {
try (PrintWriter out = new PrintWriter(writer)) {
// Pad elements with whitespace when aligning (since we've gone to all the effort of padding
// everything else).
String joiner = align ? " ; " : ";";
for (int rowIndex = 0; rowIndex < cells.size(); rowIndex++) {
// No need to use CharMatcher to trim "properly" since only ASCII space is possible.
out.println(getRow(rowIndex).collect(joining(joiner)).trim());
}
}
}
/**
* Accepts the next row in the CSV table. Note that the first consumer returned is expected to
* have the title row written to it.
*
* <p>Values passed into the accept method of the returned consumer are expected to have already
* been escaped if necessary. The caller must call the {@link Consumer#accept(Object)} method for
* every column of the table, even if only to pass an empty string to indicate an empty cell.
*/
@Override
public void accept(Stream<String> row) {
ImmutableList<String> rowValues = row.collect(toImmutableList());
for (int i = 0; i < rowValues.size(); i++) {
updateMaxLength(rowValues.get(i), i);
}
cells.add(rowValues);
}
private Stream<String> getRow(int index) {
List<String> row = cells.get(index);
int length = row.size();
while (length > 0 && row.get(length - 1).isEmpty()) {
length--;
}
if (align) {
return IntStream.range(0, length).mapToObj(n -> pad(row.get(n), maxLength(n)));
}
return row.stream().limit(length);
}
private static String pad(String s, int len) {
return len > 0 ? String.format("%-" + len + "s", s) : "";
}
private int maxLength(int index) {
return maxLengths.getOrDefault(index, 0);
}
private void updateMaxLength(String s, int index) {
// Note: This isn't Unicode aware, but in reality it's not that important.
maxLengths.put(index, Math.max(s.length(), maxLength(index)));
}
}

+ 100
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/DiffKey.java View File

@ -0,0 +1,100 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Stream;
/** Key for use in "diff" tables, allowing rows to be marked with a diff status. */
@AutoValue
public abstract class DiffKey<K> {
/**
* Status for rows in a "diff table". Every row in a diff table has a {@code DiffKey}, with a
* status. Modified rows appear twice in the diff table, once for the left-side row, and once for
* the right-side row.
*/
public enum Status {
/** A row which appears exclusively in the left-hand-side of the diff. */
LHS_ONLY("----"),
/** A row which appears exclusively in the right-hand-side of the diff. */
RHS_ONLY("++++"),
/** The left-hand-side row which was modified by the diff. */
LHS_CHANGED("<<<<"),
/** The right-hand-side row which was modified by the diff. */
RHS_CHANGED(">>>>"),
/** A row unchanged by the diff. */
UNCHANGED("====");
private static final ImmutableMap<String, Status> MAP =
Maps.uniqueIndex(EnumSet.allOf(Status.class), Status::getLabel);
private final String label;
Status(String label) {
this.label = label;
}
String getLabel() {
return label;
}
static Status parse(String s) {
return MAP.get(s);
}
}
static <K> CsvKeyMarshaller<DiffKey<K>> wrap(CsvKeyMarshaller<K> keyMarshaller) {
List<String> keyColumns = new ArrayList<>();
keyColumns.add("Diff");
keyColumns.addAll(keyMarshaller.getColumns());
return new CsvKeyMarshaller<>(
serialize(keyMarshaller), deserialize(keyMarshaller), ordering(keyMarshaller), keyColumns);
}
static <K> DiffKey<K> of(Status status, K key) {
return new AutoValue_DiffKey<>(status, key);
}
public abstract Status getStatus();
public abstract K getOriginalKey();
private static <T> Function<DiffKey<T>, Stream<String>> serialize(CsvKeyMarshaller<T> m) {
return k -> Stream.concat(Stream.of(k.getStatus().getLabel()), m.serialize(k.getOriginalKey()));
}
private static <T> Function<List<String>, DiffKey<T>> deserialize(CsvKeyMarshaller<T> m) {
return r ->
new AutoValue_DiffKey<>(Status.parse(r.get(0)), m.deserialize(r.subList(1, r.size())));
}
private static <T> Optional<Comparator<DiffKey<T>>> ordering(CsvKeyMarshaller<T> m) {
return m.ordering().map(o -> {
// Weird bug (possibly IntelliJ) means it really doesn't do well inferring types over lambdas
// for this sort of chained API call. Pulling into separate variables works fine.
Comparator<DiffKey<T>> keyFn = Comparator.comparing(DiffKey::getOriginalKey, o);
return keyFn.thenComparing(DiffKey::getStatus);
});
}
}

+ 186
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/DisjointRangeMap.java View File

@ -0,0 +1,186 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Maps.filterValues;
import static com.google.common.collect.Maps.transformValues;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.annotation.Nullable;
/**
* A mapping from category values to a set of disjoint ranges. This is used only by the RangeTable
* class to represent a column of values.
*/
final class DisjointRangeMap<T extends Comparable<T>> {
static final class Builder<T extends Comparable<T>> {
private final Column<T> column;
private final SortedMap<T, RangeTree> map = new TreeMap<>();
// Cache of all assigned ranges, used repeatedly by RangeTable . This could be recalculated
// every time it's needed, but it's just as easy to keep it cached here.
private RangeTree assignedRanges = RangeTree.empty();
Builder(Column<T> column) {
this.column = checkNotNull(column);
}
/**
* Returns the ranges assigned to the given value (returns the empty range if the given value
* is unassigned in this column). Note that unlike table operations, it makes no sense to allow
* {@code null} to be used to determine the unassigned ranges, since calculating that requires
* knowledge of the table in which this column exists.
*/
RangeTree getRanges(Object value) {
T checkedValue = column.cast(checkNotNull(value));
return map.getOrDefault(checkedValue, RangeTree.empty());
}
/** Returns the currently assigned ranges for this column. */
RangeTree getAssignedRanges() {
return assignedRanges;
}
/**
* Checks whether the "proposed" assignment would succeed with the specified overwrite mode
* (assignments always succeed if the mode is {@link OverwriteMode#ALWAYS} ALWAYS). If the
* given value is {@code null} and the mode is not {@code ALWAYS}, this method ensures that
* none of the given ranges are assigned to any value in this column.
* <p>
* This is useful as a separate method when multiple changes are to be made which cannot be
* allowed to fail halfway through.
*
* @throws IllegalArgumentException if the value cannot be added to the column.
* @throws RangeException if the write is not possible with the given mode.
*/
T checkAssign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) {
// Always check the proposed value (for consistency).
T checkedValue = column.cast(value);
if (mode != OverwriteMode.ALWAYS) {
checkArgument(checkedValue != null,
"Assigning a null value (unassignment) with mode other than ALWAYS makes no sense: %s",
mode);
if (mode == OverwriteMode.SAME) {
// Don't care about ranges that are already in the map.
ranges = ranges.subtract(map.getOrDefault(checkedValue, RangeTree.empty()));
}
RangeException.checkDisjoint(column, checkedValue, assignedRanges, ranges, mode);
}
return checkedValue;
}
/**
* Assigns the given ranges to the specified value in this column. After a call to
* {@code assign()} with a non-null value it is true that:
* <ul>
* <li>The result of {@code getRanges(value)} will contain at least the given ranges.
* <li>No ranges assigned to any other category value will intersect with the given ranges.
* </ul>
* If ranges are "assigned" to {@code null}, it has the effect of unassigning them.
*
* @param value the category value to assign ranges to, or {@code null} to unassign.
* @param ranges the ranges to assign to the category value with ID {@code id}.
* @param mode the overwrite mode describing how to handle existing assignments.
* @throws IllegalArgumentException if the assignment violates the given {@link OverwriteMode}.
*/
void assign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) {
T checkedValue = checkAssign(value, ranges, mode);
// Now unassign the ranges for all other values (only necessary if mode is "ALWAYS" since in
// other modes we've already ensured there's no intersection).
if (mode == OverwriteMode.ALWAYS) {
RangeTree overlap = assignedRanges.intersect(ranges);
if (!overlap.isEmpty()) {
for (Entry<T, RangeTree> e : map.entrySet()) {
// Skip needless extra work for the value we are about to assign.
if (!e.getKey().equals(checkedValue)) {
e.setValue(e.getValue().subtract(overlap));
}
}
}
}
if (checkedValue != null) {
map.put(checkedValue, ranges.union(map.getOrDefault(checkedValue, RangeTree.empty())));
assignedRanges = assignedRanges.union(ranges);
} else {
assignedRanges = assignedRanges.subtract(ranges);
}
}
/** Builds the range map. */
DisjointRangeMap<T> build() {
return new DisjointRangeMap<T>(column, map, assignedRanges);
}
}
private final Column<T> column;
private final ImmutableSortedMap<T, RangeTree> map;
private final RangeTree assignedRanges;
private DisjointRangeMap(
Column<T> column, SortedMap<T, RangeTree> map, RangeTree assignedRanges) {
this.column = checkNotNull(column);
this.map = ImmutableSortedMap.copyOfSorted(filterValues(map, r -> !r.isEmpty()));
this.assignedRanges = assignedRanges;
}
/**
* Returns the ranges assigned to the given value.
*
* @throws IllegalArgumentException if {@code value} is not a value in this category.
*/
RangeTree getRanges(Object value) {
return map.get(column.cast(value));
}
/** Returns all values assigned to non-empty ranges in this column. */
ImmutableSet<T> getAssignedValues() {
return map.keySet();
}
/** Returns the union of all assigned ranges in this column. */
RangeTree getAssignedRanges() {
return assignedRanges;
}
/** Intersects this column with the given bounds. */
DisjointRangeMap<T> intersect(RangeTree bounds) {
return new DisjointRangeMap<T>(
column, transformValues(map, r -> r.intersect(bounds)), assignedRanges.intersect(bounds));
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof DisjointRangeMap<?>)) {
return false;
}
// No need to check "assignedRanges" since it's just a cache of other values anyway.
DisjointRangeMap<?> other = (DisjointRangeMap<?>) obj;
return this == other || (column.equals(other.column) && map.equals(other.map));
}
@Override
public int hashCode() {
return column.hashCode() ^ map.hashCode();
}
}

+ 116
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/MultiValue.java View File

@ -0,0 +1,116 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.CharMatcher.whitespace;
import static com.google.common.collect.ImmutableList.toImmutableList;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Comparators;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import java.util.Comparator;
import java.util.function.Function;
/**
* A wrapper to permit sets of values to be specified as a single "cell" in a CsvTable or
* RangeTable. Currently only sets of values are permitted (not lists) so duplicate elements are
* not allowed. This is easy to change in future, but the real data suggests no use case for that.
*
* <p>The expectation of this class is that specific, non-generic subclasses will be made to
* "solidify" the choice of value type, separator and value ordering. This is why those specific
* attributes are not tested in the equals()/hashCode() methods, since they are expected to be
* constant for a given implementation. Subclasses should be final, and look something like:
* <pre> {@code
* public static final class Foos extends MultiValue<Foo, Foos> {
* private static final Foos EMPTY = new Foos(ImmutableSet.of());
*
* public static Column<Foos> column(String name) {
* return Column.create(Foos.class, name, EMPTY, Foos::new);
* }
*
* public static Foos of(Iterable<Foo> foos) {
* return new Foos(foos);
* }
*
* private Foos(Iterable<Foo> foos) { super(foos, <separator>, <ordering>, <sorted>); }
* private Foos(String s) { super(s, <parseFn>, <separator>, <ordering>, <sorted>); }
* }
* }</pre>
* where {@code <separator>}, {@code <ordering>} and {@code <sorted>} are the same constants in
* both places.
*/
public abstract class MultiValue<T, M extends MultiValue<T, M>>
implements Comparable<M> {
private final ImmutableSet<T> values;
private final char separator;
private final Comparator<Iterable<T>> comparator;
protected MultiValue(
String s, Function<String, T> fn, char separator, Comparator<T> comparator, boolean sorted) {
this(parse(s, fn, separator), separator, comparator, sorted);
}
protected MultiValue(
Iterable<T> values, char separator, Comparator<T> comparator, boolean sorted) {
this.separator = separator;
this.values =
sorted ? ImmutableSortedSet.copyOf(comparator, values) : ImmutableSet.copyOf(values);
this.comparator = Comparators.lexicographical(comparator);
}
private static <T> ImmutableList<T> parse(String s, Function<String, T> fn, char separator) {
Splitter splitter = Splitter.on(separator).omitEmptyStrings().trimResults(whitespace());
return splitter.splitToList(s).stream().map(fn).collect(toImmutableList());
}
public final ImmutableSet<T> getValues() {
return values;
}
public final char separator() {
return separator;
}
@Override
public final int compareTo(M that) {
// The separator doesn't factor in here since it's always the same.
return comparator.compare(this.getValues(), that.getValues());
}
@Override
@SuppressWarnings({"unchecked", "EqualsGetClass"})
public final boolean equals(Object obj) {
// Check exact subclass, since we expect separators and ordering to always be the same.
if (obj == null || obj.getClass() != getClass()) {
return false;
}
return getValues().equals(((MultiValue<T, M>) obj).getValues());
}
@Override
public final int hashCode() {
return getValues().hashCode();
}
@Override
public final String toString() {
return Joiner.on(separator()).join(getValues());
}
}

+ 74
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeException.java View File

@ -0,0 +1,74 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.stream.Collectors.joining;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import javax.annotation.Nullable;
/** A structured exception which should be used whenever structural errors occur in table data. */
public final class RangeException extends IllegalArgumentException {
// Called when assigning ranges, depending on the overwrite mode. As more cases are added,
// consider refactoring and subclassing for clean semantics.
static <T extends Comparable<T>> void checkDisjoint(
Column<T> column, T value, RangeTree existing, RangeTree ranges, OverwriteMode mode) {
RangeTree intersection = existing.intersect(ranges);
if (!intersection.isEmpty()) {
// A non-empty intersection implies both inputs are also non-empty.
throw new RangeException(column, value, existing, ranges, intersection, mode);
}
}
RangeException(Column<?> column,
@Nullable Object value,
RangeTree existing,
RangeTree ranges,
RangeTree intersection,
OverwriteMode mode) {
super(explain(checkNotNull(column), value, existing, ranges, intersection, checkNotNull(mode)));
}
private static String explain(
Column<?> column,
@Nullable Object value,
RangeTree existing,
RangeTree ranges,
RangeTree intersection,
OverwriteMode mode) {
return String.format(
"cannot assign non-disjoint ranges for value '%s' in column '%s' using overwrite mode: %s\n"
+ "overlapping ranges:\n%s"
+ "existing ranges:\n%s"
+ "new ranges:\n%s",
value, column, mode, toLines(intersection), toLines(existing), toLines(ranges));
}
private static String toLines(RangeTree ranges) {
checkArgument(!ranges.isEmpty());
return ranges.asRangeSpecifications().stream().map(s -> " " + s + "\n").collect(joining());
}
// We suppress stack traces for "semantic" exceptions, since these aren't intended to indicate
// bugs, but rather user error (for which a stack trace is not very useful).
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}

+ 215
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeKey.java View File

@ -0,0 +1,215 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
import static java.lang.Integer.numberOfTrailingZeros;
import static java.util.Comparator.comparing;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedSet;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer;
import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.NavigableSet;
import java.util.Set;
/**
* A range key is somewhat similar to a {@link RangeSpecification}, except that it can encode
* multiple possible lengths for a single range prefix. Range keys are particularly useful as
* unique "row keys" when representing range trees as tabular data.
*/
@AutoValue
public abstract class RangeKey {
/**
* Order by prefix first and then minimum length. For row keys representing disjoint ranges, this
* will be a total ordering (since the comparison is really with the "shortest" digit sequence in
* the ranges, which must be distinct for disjoint ranges).
*/
public static final Comparator<RangeKey> ORDERING =
comparing(RangeKey::getPrefix, comparing(s -> s.min().toString()))
.thenComparing(RangeKey::getLengths, comparing(NavigableSet::first));
/**
* Creates a range key representing ranges with a prefix of some set of lengths. The prefix must
* not be longer than the possible lengths and cannot end with an "any" edge (i.e. "x").
*/
public static RangeKey create(RangeSpecification prefix, Set<Integer> lengths) {
checkArgument(prefix.length() == 0 || prefix.getBitmask(prefix.length() - 1) != ALL_DIGITS_MASK,
"prefix cannot end with an 'any' edge: %s", prefix);
ImmutableSortedSet<Integer> sorted = ImmutableSortedSet.copyOf(lengths);
checkArgument(sorted.first() >= prefix.length(),
"lengths cannot be shorter than the prefix: %s - %s", prefix, lengths);
return new AutoValue_RangeKey(prefix, sorted);
}
/**
* Decomposes the given range tree into a sorted sequence of keys, representing the same digit
* sequences. The resulting keys form a disjoint covering of the original range set, and no
* two keys will contain the same prefix (but prefixes of keys may overlap, even if the ranges
* they ultimately represent do not). The resulting sequence is ordered by {@link #ORDERING}.
*/
public static ImmutableList<RangeKey> decompose(RangeTree tree) {
List<RangeKey> keys = new ArrayList<>();
// The ALLOW_EDGE_SPLITTING strategy works best for the case of generating row keys because it
// helps avoid having the same sequence appear in multiple rows. Note however than even this
// strategy isn't perfect, and partially overlapping ranges with different lengths can still
// cause issues. For example, 851 appears as a prefix for 2 rows in the following (real world)
// example.
// prefix=85[1-9], length=10
// prefix=8[57]1, length=11
// However a given digit sequence will still only appear in (at most) one range key based on
// its length.
for (RangeTree f : RangeTreeFactorizer.factor(tree, MergeStrategy.ALLOW_EDGE_SPLITTING)) {
KeyVisitor.visit(f, keys);
}
return ImmutableList.sortedCopyOf(ORDERING, keys);
}
// A recursive descent visitor that splits range keys from the visited tree on the upward phase
// of visitation. After finding the terminal node, the visitor tries to strip as much of the
// trailing "any" path as possible, to leave the prefix. Note that the visitor can never start
// another downward visitation while its processing the "any" paths, because if it walks up
// through an "any" path, the node it reaches cannot have any other edges coming from it (the
// "any" path is all the possible edges).
private static class KeyVisitor implements DfaVisitor {
private static void visit(RangeTree tree, List<RangeKey> keys) {
KeyVisitor v = new KeyVisitor(keys);
tree.accept(v);
// We may still need to emit a key for ranges with "any" paths that reach the root node.
int lengthMask = v.lengthMask;
// Shouldn't happen for phone numbers, since it implies the existence of "zero length" digit
// sequences.
if (tree.getInitial().canTerminate()) {
lengthMask |= 1;
}
if (lengthMask != 0) {
// Use the empty specification as a prefix since the ranges are defined purely by length.
keys.add(new AutoValue_RangeKey(RangeSpecification.empty(), buildLengths(lengthMask)));
}
}
// Collection of extracted keys.
private final List<RangeKey> keys;
// Current path from the root of the tree being visited.
private RangeSpecification path = RangeSpecification.empty();
// Non-zero when we are in the "upward" phase of visitation, processing trailing "any" paths.
// When zero we are either in a "downward" phase or traversing up without stripping paths.
private int lengthMask = 0;
private KeyVisitor(List<RangeKey> keys) {
this.keys = checkNotNull(keys);
}
@Override
public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
checkState(lengthMask == 0,
"during downward tree traversal, length mask should be zero (was %s)", lengthMask);
RangeSpecification oldPath = path;
path = path.extendByMask(edge.getDigitMask());
if (target.equals(RangeTree.getTerminal())) {
lengthMask = (1 << path.length());
// We might emit the key immediately for ranges without trailing paths (e.g. "1234").
maybeEmitKey();
} else {
target.accept(this);
// If we see a terminating node, we are either adding a new possible length to an existing
// key or starting to process a new key (we don't know and it doesn't matter providing we
// capture the current length in the mask).
if (target.canTerminate()) {
lengthMask |= (1 << path.length());
}
maybeEmitKey();
}
path = oldPath;
}
// Conditionally emits a key for the current path prefix and possible lengths if we've found
// the "end" of an "any" path (e.g. we have possible lengths and the edge above us is not an
// "any" path).
private void maybeEmitKey() {
if (lengthMask != 0 && path.getBitmask(path.length() - 1) != ALL_DIGITS_MASK) {
keys.add(new AutoValue_RangeKey(path, buildLengths(lengthMask)));
lengthMask = 0;
}
}
}
/**
* Returns the prefix for this range key. All digit sequences matches by this key are of the
* form {@code "<prefix>xxxx"} for some number of "any" edges. This prefix can be "empty" for
* ranges such as {@code "xxxx"}.
*/
public abstract RangeSpecification getPrefix();
/**
* Returns the possible lengths for digit sequences matched by this key. The returned set is
* never empty.
*/
public abstract ImmutableSortedSet<Integer> getLengths();
/**
* Converts the range key into a sequence of range specifications, ordered by length. The
* returned set is never empty.
*/
public final ImmutableList<RangeSpecification> asRangeSpecifications() {
RangeSpecification s = getPrefix();
return getLengths().stream()
.map(n -> s.extendByLength(n - s.length()))
.collect(toImmutableList());
}
public final RangeTree asRangeTree() {
RangeSpecification s = getPrefix();
return RangeTree.from(getLengths().stream().map(n -> s.extendByLength(n - s.length())));
}
/*
* Checks if the RangeKey contains a range represented by the given prefix and length.
*/
public boolean contains(DigitSequence prefix, Integer length) {
return asRangeSpecifications().stream()
.anyMatch(
specification ->
specification.matches(
prefix.extendBy(DigitSequence.zeros(length - prefix.length()))));
}
private static ImmutableSortedSet<Integer> buildLengths(int lengthMask) {
checkArgument(lengthMask != 0);
ImmutableSortedSet.Builder<Integer> lengths = ImmutableSortedSet.naturalOrder();
do {
int length = numberOfTrailingZeros(lengthMask);
lengths.add(length);
// Clear each bit as we go.
lengthMask &= ~(1 << length);
} while (lengthMask != 0);
return lengths.build();
}
}

+ 951
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java View File

@ -0,0 +1,951 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.collect.Maps.immutableEntry;
import static java.util.Comparator.comparing;
import static java.util.Map.Entry.comparingByKey;
import static java.util.stream.Collectors.joining;
import com.google.auto.value.AutoValue;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
import com.google.common.collect.UnmodifiableIterator;
import com.google.i18n.phonenumbers.metadata.PrefixTree;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Function;
import javax.annotation.Nullable;
/**
* A tabular representation of attributes, assigned to number ranges.
* <p>
* A {@code RangeTable} is equivalent to {@code Table&lt;RangeSpecification, Column, Value&gt;},
* but is expressed as a mapping of {@code (Column, Value) -> RangeTree} (since {@code RangeTree}
* is not a good key). To keep the data structurally equivalent to its tabular form, it's important
* that within a column, all assigned ranges are mutually disjoint (and thus a digit sequence can
* have at most one value assigned in any column).
*
* <h3>Table Schemas</h3>
* A table requires a {@link Schema}, which defines the columns which can be present and their
* order. Column ordering is important since it relates to how rules are applied (see below).
*
* <h3>Columns and Column Groups</h3>
* A {@link Column} defines a category of values of a particular type (e.g. String, Boolean,
* Integer or user specified enums) and a default value. New columns can be implemented easily and
* can choose to limit their values to some known set.
* <p>
* A {@link ColumnGroup} defines a related set of columns of the same type. The exact set of
* columns available in a group is not necessarily known in advance. A good example of a column
* group is having columns for names is different languages. A column group of "Name" could define
* columns such as "Name:en", "Name:fr", "Name:ja" etc. which contain the various translations of
* the value. The first time a value is added for a column inferred by a column group, that column
* is created.
* <p>
* An {@link Assignment} is a useful way to encapsulate "a value in a column" and can be used to
* assign or unassign values to ranges, or query for the ranges which have that assignment.
*
* <h3>Builders and Unassigned Values</h3>
* To allow a {@code RangeTable} to fully represent data in a tabular way, it must be possible to
* have rows in a table for which no value is assigned in any column. Unassigned ranges can be
* added to a builder via the {@link Builder#add(RangeTree)} method, and these "empty rows" are
* preserved in the final table.
* <p>
* This is useful since it allows a {@link Change} to affect no columns, but still have an effect
* on the final table. It's also useful when applying rules to infer values and fill-in column
* defaults.
*/
public final class RangeTable {
/** Overwrite rules for modifying range categorization. */
public enum OverwriteMode {
/** Only assign ranges that were previously unassigned. */
NEVER,
/** Only assign ranges that were either unassigned or had the same value. */
SAME,
/** Always assign ranges (and unassign them from any other values in the same category). */
ALWAYS;
}
/** A builder for an immutable range table to which changes and rules can be applied. */
public static final class Builder {
// The schema for the table to be built.
private final Schema schema;
// The map of per-column ranges.
private final SortedMap<Column<?>, DisjointRangeMap.Builder<?>> columnRanges;
// The union of all ranges added to the builder (either by assignment or range addition).
// This is not just a cache of all the assigned ranges, since assigning and unassigning a range
// will not cause it to be removed from the table altogether (even if it is no longer assigned
// in any column).
private RangeTree allRanges = RangeTree.empty();
private Builder(Schema schema) {
this.schema = checkNotNull(schema);
this.columnRanges = new TreeMap<>(schema.ordering());
}
// Helper to return an on-demand builder for a column.
private <T extends Comparable<T>> DisjointRangeMap.Builder<T> getOrAddRangeMap(Column<T> c) {
// The generic type of the builder is defined by the column it's building for, and the map
// just uses that column as its key. Thus, if the given column is recognized by the schema,
// the returned builder must be of the same type.
@SuppressWarnings("unchecked")
DisjointRangeMap.Builder<T> ranges = (DisjointRangeMap.Builder<T>)
columnRanges.computeIfAbsent(schema.checkColumn(c), DisjointRangeMap.Builder::new);
return ranges;
}
// ---- Read-only API ----
/** Returns the schema for this builder. */
public Schema getSchema() {
return schema;
}
/**
* Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned
* ranges in the column are returned.
*/
public RangeTree getRanges(Assignment<?> assignment) {
return getRanges(assignment.column(), assignment.value().orElse(null));
}
/**
* Returns ranges for the given value in the specified column. If the value is {@code null},
* then the unassigned ranges in the column are returned. If the column has no values assigned,
* then the empty range is returned (or, if {@code value == null}, all ranges in the table).
*/
public RangeTree getRanges(Column<?> column, @Nullable Object value) {
getSchema().checkColumn(column);
DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column);
if (value != null) {
return rangeMap != null ? rangeMap.getRanges(value) : RangeTree.empty();
} else {
RangeTree all = getAllRanges();
return rangeMap != null ? all.subtract(rangeMap.getAssignedRanges()) : all;
}
}
/**
* Returns all assigned ranges in the specified column. If the column doesn't exist in the
* table, the empty range is returned).
*/
public RangeTree getAssignedRanges(Column<?> column) {
getSchema().checkColumn(column);
DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column);
return rangeMap != null ? rangeMap.getAssignedRanges() : RangeTree.empty();
}
/**
* Returns ranges which were added to this builder, either directly via {@link #add(RangeTree)}
* or indirectly via assignment.
*/
public RangeTree getAllRanges() {
return allRanges;
}
/** Returns all ranges present in this table which are not assigned in any column. */
public RangeTree getUnassignedRanges() {
RangeTree allAssigned = columnRanges.values().stream()
.map(DisjointRangeMap.Builder::getAssignedRanges)
.reduce(RangeTree.empty(), RangeTree::union);
return allRanges.subtract(allAssigned);
}
/**
* Returns a snapshot of the columns in schema order (including empty columns which may have
* been added explicitly or exist due to values being unassigned).
*/
public ImmutableSet<Column<?>> getColumns() {
return columnRanges.entrySet().stream()
.map(Entry::getKey)
.collect(toImmutableSet());
}
// ---- Range assignment/addition/removal ----
/**
* Assigns the specified ranges to the given assignment. If the value is {@code empty}, then
* this has the effect of unassigning the given ranges, but does not remove them from the
* table. If {@code ranges} is empty, this method has no effect.
*
* @throws RangeException if assignment cannot be performed according to the overwrite mode
* (no change will have occurred in the table if this occurs).
*/
public Builder assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode) {
assign(assignment.column(), assignment.value().orElse(null), ranges, mode);
return this;
}
/**
* Assigns the specified ranges to a value within a column (other columns unaffected). If the
* value is {@code null}, then this has the effect of unassigning the given ranges, but does
* not remove them from the table. If {@code ranges} is empty, this method has no effect.
*
* @throws RangeException if assignment cannot be performed according to the overwrite mode
* (no change will have occurred in the table if this occurs).
*/
public Builder assign(
Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode) {
if (!ranges.isEmpty()) {
getOrAddRangeMap(column).assign(value, ranges, mode);
allRanges = allRanges.union(ranges);
}
return this;
}
/**
* Unconditionally assigns all values, ranges and columns in the given table. This does not
* clear any already assigned ranges.
*/
public Builder add(RangeTable table) {
add(table.getAllRanges());
add(table.getColumns());
for (Column<?> column : table.getColumns()) {
for (Object value : table.getAssignedValues(column)) {
assign(column, value, table.getRanges(column, value), OverwriteMode.ALWAYS);
}
}
return this;
}
/**
* Ensures that the given ranges exist in the table, even if no assignments are ever made in
* any columns.
*/
public Builder add(RangeTree ranges) {
allRanges = allRanges.union(ranges);
return this;
}
/** Ensures that the given column exists in the table (even if there are no assignments). */
public Builder add(Column<?> column) {
getOrAddRangeMap(checkNotNull(column));
return this;
}
/** Ensures that the given columns exist in the table (even if there are no assignments). */
public Builder add(Collection<Column<?>> columns) {
columns.forEach(this::add);
return this;
}
/** Removes the given ranges from the table, including all assignments in all columns. */
public Builder remove(RangeTree ranges) {
for (DisjointRangeMap.Builder<?> rangeMap : columnRanges.values()) {
rangeMap.assign(null, ranges, OverwriteMode.ALWAYS);
}
allRanges = allRanges.subtract(ranges);
return this;
}
/** Removes the given column from the table (has no effect if the column is not present). */
public Builder remove(Column<?> column) {
columnRanges.remove(checkNotNull(column));
return this;
}
/** Removes the given columns from the table (has no effect if columns are not present). */
public Builder remove(Collection<Column<?>> columns) {
columns.forEach(this::remove);
return this;
}
/** Copies the assigned, non-default, values of the specified column. */
public <T extends Comparable<T>> Builder copyNonDefaultValues(
Column<T> column, RangeTable src, OverwriteMode mode) {
for (T v : src.getAssignedValues(column)) {
if (!column.defaultValue().equals(v)) {
assign(column, v, src.getRanges(column, v), mode);
}
}
return this;
}
// ---- Applying changes ----
/**
* Unconditionally applies the given change to this range table. Unlike
* {@link #apply(Change, OverwriteMode)}, this method cannot fail, since changes are applied
* unconditionally.
*/
public Builder apply(Change change) {
return apply(change, OverwriteMode.ALWAYS);
}
/**
* Applies the given change to this range table. A change adds ranges to the table, optionally
* assigning them specific category values within columns.
*
* @throws RangeException if the overwrite mode prohibits the modification in this change (the
* builder remains unchanged).
*/
public Builder apply(Change change, OverwriteMode mode) {
RangeTree ranges = change.getRanges();
if (!ranges.isEmpty()) {
// Check first that the assignments will succeed before attempting them (so as not to
// leave the builder in an inconsistent state if it fails).
if (mode != OverwriteMode.ALWAYS) {
for (Assignment<?> a : change.getAssignments()) {
getOrAddRangeMap(a.column()).checkAssign(a.value().orElse(null), ranges, mode);
}
}
for (Assignment<?> a : change.getAssignments()) {
getOrAddRangeMap(a.column()).assign(a.value().orElse(null), ranges, mode);
}
allRanges = allRanges.union(ranges);
}
return this;
}
// ---- Builder related methods ----
/** Builds the range table from the current state of the builder. */
public RangeTable build() {
ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columnRanges.entrySet().stream()
.map(e -> immutableEntry(e.getKey(), e.getValue().build()))
.sorted(comparingByKey(schema.ordering()))
.collect(toImmutableMap(Entry::getKey, Entry::getValue));
return new RangeTable(schema, columnMap, allRanges, getUnassignedRanges());
}
/**
* Returns a new builder with the same state as the current builder. This is useful when state
* is being built up incrementally.
*/
public Builder copy() {
// Can be made more efficient if necessary...
return build().toBuilder();
}
/** Builds a minimal version of this table in which empty columns are no longer present. */
public RangeTable buildMinimal() {
ImmutableSet<Column<?>> empty = columnRanges.entrySet().stream()
.filter(e -> e.getValue().getAssignedRanges().isEmpty())
.map(Entry::getKey)
.collect(toImmutableSet());
remove(empty);
return build();
}
@Override
public final String toString() {
return build().toString();
}
}
/** Returns a builder for a range table with the specified column mapping. */
public static Builder builder(Schema schema) {
return new Builder(schema);
}
public static RangeTable from(
Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t) {
Builder table = builder(schema);
for (Entry<RangeSpecification, Map<Column<?>, Optional<?>>> row : t.rowMap().entrySet()) {
List<Assignment<?>> assignments = row.getValue().entrySet().stream()
.map(e -> Assignment.ofOptional(e.getKey(), e.getValue()))
.collect(toImmutableList());
table.apply(Change.of(RangeTree.from(row.getKey()), assignments));
}
return table.build();
}
// Definition of table columns.
private final Schema schema;
// Mapping to the assigned ranges for each column type.
private final ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges;
// All ranges in this table (possibly larger than union of all assigned ranges in all columns).
private final RangeTree allRanges;
// Ranges unassigned in any column (a subset of, or equal to allRanges).
private final RangeTree unassigned;
private RangeTable(
Schema schema,
ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges,
RangeTree allRanges,
RangeTree unassigned) {
this.schema = checkNotNull(schema);
this.columnRanges = checkNotNull(columnRanges);
this.allRanges = checkNotNull(allRanges);
this.unassigned = checkNotNull(unassigned);
}
/** Returns a builder initialized to the ranges and assignements in this table. */
public Builder toBuilder() {
// Any mode would work here (the builder is empty) but the "always overwrite" mode is fastest.
return new Builder(schema).add(this);
}
private Optional<DisjointRangeMap<?>> getRangeMap(Column<?> column) {
return Optional.ofNullable(columnRanges.get(schema.checkColumn(column)));
}
public Schema getSchema() {
return schema;
}
public ImmutableSet<Column<?>> getColumns() {
return columnRanges.keySet();
}
/**
* Returns the set of values with assigned ranges in the given column.
*
* @throws IllegalArgumentException if the specified column does not exist in this table.
*/
public <T extends Comparable<T>> ImmutableSet<T> getAssignedValues(Column<T> column) {
getSchema().checkColumn(column);
// Safe since if the column is in the schema the values must have been checked when added.
@SuppressWarnings("unchecked")
DisjointRangeMap<T> rangeMap =
(DisjointRangeMap<T>) columnRanges.get(schema.checkColumn(column));
return rangeMap != null ? rangeMap.getAssignedValues() : ImmutableSet.of();
}
/** Returns all assigned ranges in the specified column. */
public RangeTree getAssignedRanges(Column<?> column) {
return getRangeMap(column).map(DisjointRangeMap::getAssignedRanges).orElse(RangeTree.empty());
}
/**
* Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned
* ranges in the column are returned.
*/
public RangeTree getRanges(Assignment<?> assignment) {
return getRanges(assignment.column(), assignment.value().orElse(null));
}
/**
* Returns ranges for the given value in the specified column. If the value is {@code null}, then
* the unassigned ranges in the column are returned.
*/
public RangeTree getRanges(Column<?> column, @Nullable Object value) {
getSchema().checkColumn(column);
if (value == null) {
return getAllRanges().subtract(getAssignedRanges(column));
} else {
return getRangeMap(column).map(m -> m.getRanges(value)).orElse(RangeTree.empty());
}
}
/** Returns all ranges present in this table. */
public RangeTree getAllRanges() {
return allRanges;
}
/** Returns all ranges present in this table which are not assigned in any column. */
public RangeTree getUnassignedRanges() {
return unassigned;
}
/**
* Returns whether this table contains no ranges (assigned or unassigned). Note that not all
* empty tables are equal, since they may still differ by the columns they have.
*/
public boolean isEmpty() {
return allRanges.isEmpty();
}
/**
* Returns a sub-table with rows and columns limited by the specified bounds. The schema of the
* returned table is the same as this table.
*/
public RangeTable subTable(RangeTree bounds, Set<Column<?>> columns) {
// Columns must be a subset of what's allowed in this schema.
columns.forEach(getSchema()::checkColumn);
return subTable(bounds, getSchema(), columns);
}
/**
* Returns a sub-table with rows and columns limited by the specified bounds. The schema of the
* returned table is the same as this table.
*/
public RangeTable subTable(RangeTree bounds, Column<?> first, Column<?>... rest) {
return subTable(bounds, ImmutableSet.<Column<?>>builder().add(first).add(rest).build());
}
/**
* Returns a table with rows and columns limited by the specified bounds. The schema of the
* returned table is the given sub-schema.
*/
public RangeTable subTable(RangeTree bounds, Schema subSchema) {
checkArgument(subSchema.isSubSchemaOf(getSchema()),
"expected sub-schema of %s, got %s", getSchema(), subSchema);
return subTable(bounds, subSchema, Sets.filter(getColumns(), subSchema::isValidColumn));
}
// Callers MUST validate that the given set of columns are all valid in the subSchema.
private RangeTable subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns) {
ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columns.stream()
// Bound the given columns which exist in this table.
.map(c -> immutableEntry(c, getRangeMap(c).map(r -> r.intersect(bounds))))
// Reject columns we didn't already have (but allow empty columns if they exist).
.filter(e -> e.getValue().isPresent())
// Sort to our schema (since the given set of columns is not required to be sorted).
.sorted(comparingByKey(schema.ordering()))
.collect(toImmutableMap(Entry::getKey, e -> e.getValue().get()));
return new RangeTable(
subSchema, columnMap, allRanges.intersect(bounds), unassigned.intersect(bounds));
}
/**
* Returns the assigned rows of a RangeTable as a minimal list of disjoint changes, which can
* be applied to an empty table to recreate this table. No two changes affect the same columns
* in the same way and changes are ordered by the minimal values of their ranges. This is
* essentially the same information as returned in {@link #toImmutableTable()} but does not
* decompose ranges into range specifications, and it thus more amenable to compact
* serialization.
*/
// Note that the minimal nature of the returned changes is essential for some algorithms that
// operate on tables and this must not be changed.
public ImmutableList<Change> toChanges() {
Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create();
for (Column<?> c : getColumns()) {
for (Object v : getAssignedValues(c)) {
table.put(c, Optional.of(v), getRanges(c, v));
}
}
return toChanges(schema, table, getAllRanges());
}
/**
* Returns a minimum set of changes based on a table of assignments (column plus value). This is
* not expected to be used often (since RangeTable is usually a better representation of the data
* but can be useful in representing things like updates and patches in which only some rows or
* columns are represented.
* @param schema a schema for the columns in the given Table (used to determine column order).
* @param table the table of assignments to assigned ranges.
* @param allRanges the set of all ranges affected by the changes (this might include ranges not
* present anywhere in the table, which correspond to empty rows).
*/
public static ImmutableList<Change> toChanges(
Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges) {
return ImmutableList.copyOf(
transform(toRows(table, allRanges, schema.ordering()), Row::toChange));
}
/**
* Returns the data in this table represented as a {@link ImmutableTable}. Row keys are disjoint
* range specifications (in order). The returned table has the smallest number of rows necessary
* to represent the data in this range table. This is useful as a human readable serialized form
* since any digit sequence in the table is contained in a unique row.
*/
public ImmutableTable<RangeSpecification, Column<?>, Optional<?>> toImmutableTable() {
Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create();
for (Column<?> c : getColumns()) {
for (Object v : getAssignedValues(c)) {
table.put(c, Optional.of(v), getRanges(c, v));
}
RangeTree unassigned = getAllRanges().subtract(getAssignedRanges(c));
if (!unassigned.isEmpty()) {
table.put(c, Optional.empty(), unassigned);
}
}
// Unique changes contain disjoint ranges, each associated with a unique combination of
// assignments.
TreeBasedTable<RangeSpecification, Column<?>, Optional<?>> out =
TreeBasedTable.create(comparing(RangeSpecification::min), schema.ordering());
for (Change c : toChanges(schema, table, getAllRanges())) {
List<RangeSpecification> keys = c.getRanges().asRangeSpecifications();
for (Assignment<?> a : c.getAssignments()) {
for (RangeSpecification k : keys) {
out.put(k, a.column(), a.value());
}
}
}
return ImmutableTable.copyOf(out);
}
/**
* Extracts a map for a single column in this table containing the minimal prefix tree for each
* of the assigned values. The returned prefixes are the shortest prefixes possible for
* distinguishing each value in the column. This method is especially useful if you want to
* categorize partial digit sequences efficiently (i.e. prefix matching).
*
* <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some
* circumstances. Note that returned prefixes are never zero length, so {@code 1} is the lowest
* meaningful value (although zero is still accepted to imply "no length restriction").
*
* <p>Note that for some table data, it is technically impossible to obtain perfect prefix
* information and in cases where overlap occurs, this method returns the shortest prefixes. This
* means that for some valid inputs it might be true that more than one prefix is matched. It
* is therefore up to the caller to determine a "best order" for testing the prefixes if this
* matters. See {@link PrefixTree#minimal(RangeTree, RangeTree, int)} for more information.
*
* <p>An example of an "impossible" prefix would be if "123" has value A, "1234" has value B and
* "12345" has value A again. In this case there is no prefix which can distinguish A and B
* (the calculated map would be { "123" => A, "1234" => B }). In this situation, testing for the
* longer prefix would help preserve as much of the original mapping as possible, but it would
* never be possible to correctly distinguish all inputs.
*/
public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap(
Column<T> column, int minPrefixLength) {
ImmutableMap.Builder<T, PrefixTree> map = ImmutableMap.builder();
// Important: Don't just use the assigned ranges in the column, use the assigned ranges of the
// entire table. This ensures unassigned ranges in the column are not accidentally captured by
// any of the generated prefixes.
RangeTree allRanges = getAllRanges();
for (T value : getAssignedValues(column)) {
RangeTree include = getRanges(column, value);
map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength));
}
return map.build();
}
// Constants for the simplification routine below.
// Use -1 for unassigned rows (these are the "overlap" ranges and they don't have an index).
private static final Column<Integer> INDEX =
Column.create(Integer.class, "Change Index", -1, Integer::parseInt);
private static final Schema INDEX_SCHEMA = Schema.builder().add(INDEX).build();
/**
* Applies a simplification function to the rows defined by the given columns of this table. The
* returned table will only have (at most) the specified columns present.
*
* <p>The simplification function is used to produce ranges which satisfy some business logic
* criteria (such as having at most N significant digits, or merging lengths). Range
* simplification enables easier comparison between data sources of differing precision, and
* helps to reduce unnecessary complexity in generated regular expressions.
*
* <p>The simplification function should return a range that's at least as large as the input
* range. This is to ensure that simplification cannot unassign ranges, even accidentally. The
* returned range is automatically restricted to preserve disjoint ranges in the final table.
*
* <p>By passing a {@link Change} rather than just a {@link RangeTree}, the simplification
* function has access to the row assignments for the range it is simplifying. This allows it to
* select different strategies according to the values in specific columns (e.g. area code
* length).
*
* <p>Note that unassigned ranges in the original table will be preserved and simplified ranges
* will not overwrite them. This can be useful for defining "no go" ranges which should be left
* alone.
*/
public RangeTable simplify(
Function<Change, RangeTree> simplifyFn,
int minPrefixLength,
Column<?> first,
Column<?>... rest) {
// Build the single column "index" table (one index for each change) and simplify its ranges.
// This only works because "toChanges()" produces the minimal set of changes such that each
// unique combination of assignments appears only once.
ImmutableList<Change> rows = subTable(getAllRanges(), first, rest).toChanges();
RangeTable simplifiedIndexTable = simplifyIndexTable(rows, simplifyFn, minPrefixLength);
// Reconstruct the output table by assigning values from the original change set according to
// the indices in the simplified index table.
Builder simplified = RangeTable.builder(getSchema()).add(simplifiedIndexTable.getAllRanges());
for (int i : simplifiedIndexTable.getAssignedValues(INDEX)) {
RangeTree simplifiedRange = simplifiedIndexTable.getRanges(INDEX, i);
for (Assignment<?> a : rows.get(i).getAssignments()) {
simplified.assign(a, simplifiedRange, OverwriteMode.NEVER);
}
}
return simplified.build();
}
/**
* Helper function to simplify an index table based on the given rows. The resulting table will
* have a single "index" column with simplified ranges, where the index value {@code N}
* references the Nth row in the given list of disjoint changes. This is a 3 stage process:
* <ol>
* <li>Step 1: Determine which ranges can overlap with respect to set of range prefixes.
* <li>Step 2: Do simplification on the non-overlapping "prefix disjoint" ranges in the table,
* which are then be re-partitioned by the disjoint prefixes.
* <li>Step 3: Copy over any overlapping ranges from the original table (these don't get
* simplified since it's not possible to easily re-pertition them).
* </ol>
*/
private static <T extends Comparable<T>> RangeTable simplifyIndexTable(
ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength) {
RangeTable indexTable = makeIndexTable(rows);
// Step 1: Determine overlapping ranges from the index table, retaining minimum prefix length.
ImmutableMap<Integer, PrefixTree> nonDisjointPrefixes =
indexTable.getPrefixMap(INDEX, minPrefixLength);
// Don't just use the assigned ranges (we need to account for valid but unassigned ranges when
// determining overlaps).
RangeTree allRanges = indexTable.getAllRanges();
RangeTree overlaps = RangeTree.empty();
for (int n : indexTable.getAssignedValues(INDEX)) {
RangeTree otherRanges = allRanges.subtract(indexTable.getRanges(INDEX, n));
overlaps = overlaps.union(nonDisjointPrefixes.get(n).retainFrom(otherRanges));
}
// Step 2: Determine the "prefix disjoint" ranges in a new table and simplify it.
//
// Before getting the new set of prefixes, add the overlapping ranges back to the table, but
// without assigning them to anything. This keeps the generated prefixes as long as necessary
// to avoid creating conflicting assignments for different values. Essentially we're trying to
// keep ranges "away from" any overlaps. Note however that it is still possible for simplified
// ranges encroach on the overlapping areas, so we must still forcibly overwrite the original
// overlapping values after siplification. Consider:
// A = { "12x", "12xxx" }, B = { "123x" }
// where the simplification function just creates any "any" range for all lengths between the
// minimum and maximum range lengths (e.g. { "123", "45678" } ==> { "xxx", "xxxx", "xxxxx" }.
//
// The (non disjoint) prefix table is Pre(A) => { "12" }, Pre(B) => { "123" } and this
// captures the overlaps:
// Pre(A).retainFrom(B) = { "123x" } = B
// Pre(B).retainFrom(A) = { "123xx" }
//
// Since is of "B" is entirely contained by the overlap, it is not simplified, but A is
// simplified to:
// { "xxx", "xxxx", "xxxxx" }
// and the re-captured by the "disjoint" prefix (which is still just "12") to:
// { "12x", "12xx", "12xxx" }
//
// However now, when the original overlaps are added back at the end (in step 3) we find that
// both "123xx" already exists (with the same index) and "123x" exists with a different index.
// The resolution is to just overwrite all overlaps back into the table, since these represent
// the original (unsimplified) values.
//
// Thus in this case, the simplified table is:
// Sim(A) = { "12x", "12[0-24-9]x", "12xxx" }, Sim(B) = { "123x" }
//
// And it is still true that: Sim(A).containsAll(A) and Sim(B).containsAll(B)
RangeTable prefixDisjointTable = indexTable
.subTable(allRanges.subtract(overlaps), INDEX)
.toBuilder()
.add(overlaps)
.build();
// NOTE: Another way to do this would be to implement an "exclusive prefix" method which could
// be used to immediately return a set of truly "disjoint" prefixes (although this would change
// the algorithm's behaviour since more ranges would be considered "overlapping" than now).
// TODO: Experiment with an alternate "exclusive" prefix function.
ImmutableMap<Integer, PrefixTree> disjointPrefixes = prefixDisjointTable.getPrefixMap(INDEX, 1);
// Not all values from the original table need be present in the derived table (since some
// overlaps account for all the ranges of a value).
Builder simplified = RangeTable.builder(INDEX_SCHEMA);
for (int n : prefixDisjointTable.getAssignedValues(INDEX)) {
RangeTree disjointRange = prefixDisjointTable.getRanges(INDEX, n);
// Pass just the assignments, not the whole row (Change) because that also contains a range,
// which might not be the same as the disjoint range (so it could be rather confusing).
PrefixTree disjointPrefix = disjointPrefixes.get(n);
RangeTree simplifiedRange =
simplifyFn.apply(Change.of(disjointRange, rows.get(n).getAssignments()));
// Technically this check is not strictly required, but there's probably no good use-case in
// which you'd want to remove assignments via the simplification process.
checkArgument(simplifiedRange.containsAll(disjointRange),
"simplification should return a superset of the given range\n"
+ "input: %s\n"
+ "output: %s\n"
+ "missing: %s",
disjointRange, simplifiedRange, disjointRange.subtract(simplifiedRange));
// Repartition the simplified ranges by the "disjoint" prefixes to restore most of the
// simplified ranges. These ranges should never overlap with each other.
RangeTree repartitionedRange = disjointPrefix.retainFrom(simplifiedRange);
simplified.assign(INDEX, n, repartitionedRange, OverwriteMode.NEVER);
}
// Step 3: Copy remaining overlapping ranges from the original table back into the result.
// Note that we may end up overwriting values here, but that's correct since it restores
// original "unsimplifiable" ranges.
for (int n : indexTable.getAssignedValues(INDEX)) {
simplified.assign(
INDEX, n, indexTable.getRanges(INDEX, n).intersect(overlaps), OverwriteMode.ALWAYS);
}
return simplified.build();
}
// Helper to make a table with a single column than references a list of disjoint changes by
// index (against the range of that change).
private static RangeTable makeIndexTable(ImmutableList<Change> rows) {
Builder indexTable = RangeTable.builder(INDEX_SCHEMA);
for (int i = 0; i < rows.size(); i++) {
// Empty rows are added to the table, but not assigned an index. Their existence in the index
// table prevents over simplification from affecting unassigned rows of the original table.
if (rows.get(i).getAssignments().isEmpty()) {
indexTable.add(rows.get(i).getRanges());
} else {
indexTable.assign(INDEX, i, rows.get(i).getRanges(), OverwriteMode.NEVER);
}
}
return indexTable.build();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof RangeTable)) {
return false;
}
RangeTable other = (RangeTable) obj;
return this == other
|| (schema.equals(other.schema)
&& allRanges.equals(other.allRanges)
&& columnRanges.values().asList().equals(other.columnRanges.values().asList()));
}
@Override
public int hashCode() {
// This could be memoized if it turns out to be slow.
return schema.hashCode() ^ columnRanges.hashCode() ^ allRanges.hashCode();
}
// TODO: Prettier format for toString().
@Override
public final String toString() {
ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = toImmutableTable();
return table.rowMap().entrySet().stream()
.map(e -> String.format("%s, %s", e.getKey(), rowToString(e.getValue())))
.collect(joining("\n"));
}
private static String rowToString(Map<Column<?>, Optional<?>> r) {
return r.values().stream()
.map(v -> v.map(Object::toString).orElse("UNSET"))
.collect(joining(", "));
}
// Helper method to convert a table of values into a minimal set of changes. This is used to
// turn a single RangeTable into an ImmutableTable, but also to convert a Patch into a minimal
// sequence of Changes. Each returned "row" defines a range, and a unique sequence of assignments
// over that range (i.e. no two rows have the same assignments in). The assignments are ordered
// in column order within each row, and the rows are ordered by the minimum digit sequence in
// each range and the ranges form a disjoint covering of the ranges in the original table.
//
// See go/phonenumber-v2-data-structure for more details.
private static ImmutableList<Row> toRows(
Table<Column<?>, Optional<?>, RangeTree> src,
RangeTree allRanges,
Comparator<Column<?>> columnOrdering) {
// Get the non-empty columns in _reverse_ iteration order. We build up rows as a linked list
// structure, started from the "right hand side". This avoids a lot of copying as new columns
// are processed.
ImmutableList<Column<?>> reversedColumns = src.rowMap().entrySet().stream()
.filter(e -> !e.getValue().isEmpty())
.map(Entry::getKey)
.sorted(columnOrdering.reversed())
.collect(toImmutableList());
List<Row> uniqueRows = new ArrayList<>();
uniqueRows.add(Row.empty(allRanges));
for (Column<?> col : reversedColumns) {
// Loop backward here so that rows can be (a) removed in place and (b) added at the end.
for (int i = uniqueRows.size() - 1; i >= 0; i--) {
Row row = uniqueRows.get(i);
// Track the unprocessed range for each row as we extend it.
RangeTree remainder = row.getRanges();
for (Entry<Optional<?>, RangeTree> e : src.row(col).entrySet()) {
RangeTree overlap = e.getValue().intersect(remainder);
if (overlap.isEmpty()) {
continue;
}
// Extend the existing row by the current column value and reduce the remaining ranges.
uniqueRows.add(Row.of(overlap, col, e.getKey(), row));
remainder = remainder.subtract(overlap);
if (remainder.isEmpty()) {
// We've accounted for all of the existing row in the new column, so remove it.
uniqueRows.remove(i);
break;
}
}
if (!remainder.isEmpty()) {
// The existing row is not completely covered by the new column, so retain what's left.
uniqueRows.set(i, row.bound(remainder));
}
}
}
return ImmutableList.sortedCopyOf(comparing(r -> r.getRanges().first()), uniqueRows);
}
/**
* A notional "row" with some set of assignments in a range table or table like structure. Note
* that a Row can represent unassignment as well as assignment, and not all rows need to contain
* all columns. Rows are used for representing value in a table, but also changes between tables.
*/
@AutoValue
abstract static class Row implements Iterable<Assignment<?>> {
private static Row empty(RangeTree row) {
return new AutoValue_RangeTable_Row(row, null);
}
private static Row of(RangeTree row, Column<?> col, Optional<?> val, Row next) {
checkArgument(!row.isEmpty(), "empty ranges not permitted (col=%s, val=%s)", col, val);
return new AutoValue_RangeTable_Row(
row, new AutoValue_RangeTable_Cell(Assignment.ofOptional(col, val), next.head()));
}
public abstract RangeTree getRanges();
@Nullable abstract Cell head();
Change toChange() {
return Change.of(getRanges(), this);
}
private Row bound(RangeTree ranges) {
return new AutoValue_RangeTable_Row(getRanges().intersect(ranges), head());
}
@Override
public Iterator<Assignment<?>> iterator() {
return new UnmodifiableIterator<Assignment<?>>() {
@Nullable private Cell cur = Row.this.head();
@Override
public boolean hasNext() {
return cur != null;
}
@Override
public Assignment<?> next() {
Cell c = cur;
if (c == null) {
throw new NoSuchElementException();
}
cur = cur.next();
return c.assignment();
}
};
}
@Override
public final String toString() {
return "Row{" + getRanges() + " >> " + Iterables.toString(this) + "}";
}
}
@AutoValue
abstract static class Cell {
abstract Assignment<?> assignment();
@Nullable abstract Cell next();
}
}

+ 169
- 0
metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java View File

@ -0,0 +1,169 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Ordering;
import java.util.Comparator;
/**
* Representation of ordered {@link Column}s in a table. Schemas define columns in both
* {@code RangeTable} and {@code CsvTable}.
*/
@AutoValue
public abstract class Schema {
/**
* Builder for a table schema. Columns are ordered in the order in which they, or their owning
* group is added to the schema.
*/
public static final class Builder {
private final ImmutableSet.Builder<String> names = ImmutableSet.builder();
private final ImmutableMap.Builder<String, Column<?>> columns = ImmutableMap.builder();
private final ImmutableMap.Builder<String, ColumnGroup<?, ?>> groups = ImmutableMap.builder();
/** Adds the given column to the schema. */
public Builder add(Column<?> column) {
names.add(column.getName());
columns.put(column.getName(), column);
return this;
}
/** Adds the given column group to the schema. */
public Builder add(ColumnGroup<?, ?> group) {
names.add(group.prototype().getName());
groups.put(group.prototype().getName(), group);
return this;
}
public Schema build() {
return new AutoValue_Schema(names.build(), columns.build(), groups.build());
}
}
private static final Schema EMPTY = builder().build();
/** Returns an empty schema with no assigned columns. */
public static Schema empty() {
return EMPTY;
}
/** Returns a new schema builder. */
public static Builder builder() {
return new Builder();
}
// Visible for AutoValue only.
Schema() {}
// List of column/group names used to determine column order:
// E.g. if "names" is: ["col1", "grp1", "col2", "col3"]
// You can have the table <<"col1", "grp1:xx", "grp1:yy", "col3">>
// Not all columns need to be present and groups are ordered contiguously as the group prefix
// appears in the names list.
abstract ImmutableSet<String> names();
abstract ImmutableMap<String, Column<?>> columns();
abstract ImmutableMap<String, ColumnGroup<?, ?>> groups();
/**
* Returns the column for the specified key string. For "plain" columns (not in groups) the key
* is just the column name. For group columns, the key takes the form "prefix:suffix", where the
* prefix is the name of the "prototype" column, and the "suffix" is an ID of a value within the
* group. For example:
* <p> {@oode
* // Schema has a plain column called "Type" in it.
* typeCol = table.getColumn("Type");
*
* // Schema has a group called "Region" in it which can parse RegionCodes.
* usRegionCol = table.getColumn("Region:US");
* }</p>
*/
public Column<?> getColumn(String key) {
int split = key.indexOf(':');
Column<?> column;
if (split == -1) {
column = columns().get(key);
} else {
ColumnGroup<?, ?> group = groups().get(key.substring(0, split));
checkArgument(group != null, "invalid column %s, not in schema: %s", key, this);
column = group.getColumnFromId(key.substring(split + 1));
}
checkArgument(column != null, "invalid column %s, not in schema: %s", key, this);
return column;
}
/** Returns whether the given column is valid within this schema. */
public <T extends Comparable<T>> boolean isValidColumn(Column<T> column) {
int split = column.getName().indexOf(':');
if (split == -1) {
return columns().containsValue(column);
} else {
ColumnGroup<?, ?> group = groups().get(column.getName().substring(0, split));
return group != null && column.isIn(group);
}
}
/**
* Checks whether the given column is valid within this schema, otherwise throws
* IllegalArgumentException. This is expected to be internal use only, since table users are
* meant to always know which columns are valid.
*/
<T extends Comparable<T>> Column<T> checkColumn(Column<T> column) {
checkArgument(isValidColumn(column), "invalid column %s, not in schema: %s", column, this);
return column;
}
/**
* Returns whether the this schema has a subset of columns/groups, in the same order as the
* given schema.
*/
public boolean isSubSchemaOf(Schema schema) {
return schema.columns().values().containsAll(columns().values())
&& schema.groups().entrySet().containsAll(groups().entrySet())
&& names().asList().equals(
schema.names().stream().filter(names()::contains).collect(toImmutableList()));
}
/** Returns an ordering for all columns in this schema. */
public Comparator<Column<?>> ordering() {
return Comparator
.comparing(Schema::getPrefix, Ordering.explicit(names().asList()))
.thenComparing(Schema::getSuffix);
}
public ImmutableSet<String> getNames() {
return names();
}
public ImmutableCollection<Column<?>> getColumns() {
return columns().values();
}
private static String getPrefix(Column<?> column) {
int split = column.getName().indexOf(':');
return split != -1 ? column.getName().substring(0, split) : column.getName();
}
private static String getSuffix(Column<?> column) {
int split = column.getName().indexOf(':');
return split == -1 ? "" : column.getName().substring(split + 1);
}
}

+ 69
- 0
metadata/src/main/proto/enums.proto View File

@ -0,0 +1,69 @@
// Copyright (C) 2017 The Libphonenumber Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package i18n.phonenumbers.metadata;
option java_package = "com.google.i18n.phonenumbers.metadata.proto";
// The possible provenance which can be assigned to a range.
// This enum is NOT stable and must only be stored in text based protocol
// buffers.
enum Provenance {
// Having a distinct default/unknown enum with a zero value is a proto3 thing.
// No data should actually ever have this value.
UNKNOWN = 0;
// Indicates that the ranges were defined in an official ITU document. The
// comment associated with this range should contain a link to the document.
// This is the most trusted for of evidence and will usually replace any
// previous "lower" provenance (though this is not always true for some
// countries).
ITU = 10;
// Indicates that the ranges were defined in an official IR21 document. The
// comment associated with this range should contain a link to the document.
// This is the most trusted for of evidence and will usually replace any
// previous "lower" provenance (though this is not always true for some
// countries).
IR21 = 20;
// Indicates that evidence for a range was found in a website belonging to
// an official, government endorsed entity (e.g. national telecoms operator),
// but not part of either an official ITU or IR21 document.
// The comment associated with this range should contain a URL to the
// appropriate page where the evidence was found.
GOVERNMENT = 30;
// Indicates that evidence for a range was found in a website belonging to a
// telecoms operators (mobile carrier, MVNO etc...). The comment associated
// with this range should contain a URL to the appropriate page where the
// evidence was found.
TELECOMS = 40;
// Indicates that evidence for a range was found in an unofficial website
// (e.g Facebook or a general company home page). The comment associated
// with this range should contain a URL to the appropriate page where the
// evidence was found.
WEB = 50;
// Used to indicate special situations in which a number is accepted as
// valid, despite no citeable evidence. When this provenance the coment text
// should indicate some bug report or internal reasoning as to why this range
// should be accepted. This provenance should be used only in exceptional
// circumstances and the comment may be scrubbed from externally published
// versions of the range data.
INTERNAL = 100;
}

+ 82
- 0
metadata/src/main/proto/types.proto View File

@ -0,0 +1,82 @@
// Copyright (C) 2017 The Libphonenumber Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package i18n.phonenumbers.metadata;
option java_package = "com.google.i18n.phonenumbers.metadata.proto";
// Enum names must match the element names in the XML metadata modulo casing.
enum XmlNumberType {
// Having a distinct default/unknown enum with a zero value is a proto3 thing.
// No data should actually ever have this value.
XML_UNKNOWN = 0;
XML_NO_INTERNATIONAL_DIALLING = 1;
XML_FIXED_LINE = 2;
XML_MOBILE = 3;
XML_PAGER = 4;
XML_TOLL_FREE = 5;
XML_PREMIUM_RATE = 6;
XML_SHARED_COST = 7;
XML_PERSONAL_NUMBER = 8;
XML_VOIP = 9;
XML_UAN = 10;
XML_VOICEMAIL = 11;
}
// Validation types for phone number ranges. Each valid range is categorized as
// exactly one of these types. This does not include NO_INTERNATIONAL_DIALLING
// since it is an attribute of ranges rather than their fundamental type.
enum ValidNumberType {
// Having a distinct default/unknown enum with a zero value is a proto3 thing.
// No data should actually ever have this value.
UNKNOWN = 0;
FIXED_LINE = 1;
MOBILE = 2;
FIXED_LINE_OR_MOBILE = 3;
PAGER = 4;
TOLL_FREE = 5;
PREMIUM_RATE = 6;
SHARED_COST = 7;
PERSONAL_NUMBER = 8;
VOIP = 9;
UAN = 10;
VOICEMAIL = 11;
}
// Enum names must match the element names in the XML metadata modulo casing.
// Unlike main metadata, these types are not required to be exclusive a number.
enum XmlShortcodeType {
// Having a distinct default/unknown enum with a zero value is a proto3 thing.
// No data should actually ever have this value.
SC_UNKNOWN = 0;
// General short codes without a more specific representation (unlike
// generalDesc, which can just be the leading digits, this must be precise).
SC_SHORT_CODE = 1;
// Mutually exclusive sub-set of types for tariff.
SC_TOLL_FREE = 2;
SC_STANDARD_RATE = 3;
SC_PREMIUM_RATE = 4;
// Use-case types.
SC_CARRIER_SPECIFIC = 5;
SC_EMERGENCY = 6;
SC_EXPANDED_EMERGENCY = 7;
SC_SMS_SERVICES = 8;
}

+ 134
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/DigitSequenceTest.java View File

@ -0,0 +1,134 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
import static org.junit.Assert.assertThrows;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class DigitSequenceTest {
@Test
public void testEmpty() {
Object e = DigitSequence.of("");
assertThat(e).isSameInstanceAs(DigitSequence.empty());
assertThat(DigitSequence.empty().length()).isEqualTo(0);
assertThrows(IndexOutOfBoundsException.class, () -> DigitSequence.empty().getDigit(0));
assertThat(DigitSequence.empty().toString()).isEqualTo("");
}
@Test
public void testCreate() {
DigitSequence s = DigitSequence.of("0123456789");
assertThat(s).isEqualTo(DigitSequence.of("0123456789"));
assertThat(s).isNotEqualTo(DigitSequence.of("1111111111"));
}
@Test
public void testGetDigit() {
DigitSequence s = DigitSequence.of("0123456789");
assertThat(s.length()).isEqualTo(10);
for (int n = 0; n < s.length(); n++) {
assertThat(s.getDigit(n)).isEqualTo(n);
}
assertThat(s.toString()).isEqualTo("0123456789");
}
@Test
public void testBadArguments() {
assertThrows(NullPointerException.class, () -> DigitSequence.of(null));
assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("123X"));
// Too long (19 digits).
assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("1234567890123456789"));
}
@Test
public void testMin() {
assertThat(domain().minValue()).isEqualTo(DigitSequence.empty());
assertThat(domain().next(DigitSequence.empty())).isNotNull();
assertThat(domain().previous(DigitSequence.empty())).isNull();
}
@Test
public void testMax() {
DigitSequence max = DigitSequence.of("999999999999999999");
assertThat(domain().maxValue()).isEqualTo(max);
assertThat(domain().previous(max)).isNotNull();
assertThat(domain().next(max)).isNull();
}
@Test
public void testDistance() {
assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("0")))
.isEqualTo(1);
assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("1")))
.isEqualTo(1);
assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("00")))
.isEqualTo(10);
assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("10")))
.isEqualTo(20);
assertThat(domain().distance(DigitSequence.of("10"), DigitSequence.of("0")))
.isEqualTo(-20);
assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("000000")))
.isEqualTo(111111);
assertThat(domain().distance(DigitSequence.of("000"), DigitSequence.of("000000")))
.isEqualTo(111000);
// Max distance is one less than the total number of digit sequences.
assertThat(domain().distance(domain().minValue(), domain().maxValue()))
.isEqualTo(1111111111111111110L);
}
@Test
public void testLexicographicalOrdering() {
testComparator(
DigitSequence.empty(),
DigitSequence.of("0"),
DigitSequence.of("1"),
DigitSequence.of("9"),
DigitSequence.of("00"),
DigitSequence.of("01"),
DigitSequence.of("10"),
DigitSequence.of("99"),
DigitSequence.of("000"),
DigitSequence.of("123"),
DigitSequence.of("124"),
DigitSequence.of("999"));
}
@Test
public void testExtend() {
assertThat(DigitSequence.empty().extendBy(0)).isEqualTo(DigitSequence.of("0"));
assertThat(DigitSequence.of("1234").extendBy(DigitSequence.of("5678")))
.isEqualTo(DigitSequence.of("12345678"));
}
private static <T extends Comparable<T>> void testComparator(T... items) {
for (int i = 0; i < items.length; i++) {
assertThat(items[i]).isEqualTo(items[i]);
assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]);
for (int j = i + 1; j < items.length; j++) {
assertThat(items[i]).isNotEqualTo(items[j]);
assertThat(items[i]).isLessThan(items[j]);
assertThat(items[j]).isGreaterThan(items[i]);
}
}
}
}

+ 213
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/PrefixTreeTest.java View File

@ -0,0 +1,213 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.RangeTree.empty;
import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat;
import java.util.Arrays;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class PrefixTreeTest {
@Test
public void testNewInstancesNormalized() {
assertThat(prefixes("123", "1234")).containsExactly("123");
assertThat(prefixes("70x", "7[1-9]")).containsExactly("7");
// Regression test for b/68707522
assertThat(prefixes("123xxx", "123x_xxx", "567xxx", "567x_xxx")).containsExactly("123", "567");
}
@Test
public void testRetainFrom() {
PrefixTree prefix = prefixes("123", "124", "126", "555");
RangeTree ranges = ranges("1xxxxxx", "5xxxxxx", "6xxxxxx");
assertThat(prefix.retainFrom(ranges)).containsExactly("12[346]xxxx", "555xxxx");
}
@Test
public void testPrefixes() {
PrefixTree prefix = prefixes("123", "124", "126", "555");
assertThat(prefix.prefixes(seq("1230000"))).isTrue();
assertThat(prefix.prefixes(seq("555000"))).isTrue();
assertThat(prefix.prefixes(seq("12"))).isFalse();
assertThat(prefix.prefixes(seq("120000"))).isFalse();
}
@Test
public void testEmptyVsZeroLength() {
PrefixTree empty = PrefixTree.from(empty());
PrefixTree zeroLength = prefixes("xxx");
assertThat(empty).isEmpty();
assertThat(zeroLength).isNotEmpty();
assertThat(zeroLength).hasSize(1);
assertThat(zeroLength).containsExactly(RangeSpecification.empty());
// While the empty prefix tree filters out everything, the zero length tree allows everything
// to pass. This is because the zero length prefix tree represents a single prefix of length
// zero and all digit sequences start with a zero length sub-sequence.
RangeTree ranges = ranges("12x", "3xx", "456");
assertThat(empty.retainFrom(ranges)).isEqualTo(empty());
assertThat(zeroLength.retainFrom(ranges)).isEqualTo(ranges);
}
@Test
public void testNoTrailingAnyPath() {
assertThat(prefixes("123xxx", "456xx", "789x")).containsExactly("123", "456", "789");
}
@Test
public void testRangeAndPrefixSameLength() {
PrefixTree prefix = prefixes("1234");
RangeTree ranges = ranges("xxxx");
assertThat(prefix.retainFrom(ranges)).containsExactly("1234");
}
@Test
public void testRangeShorterThanPrefix() {
PrefixTree prefix = prefixes("1234");
RangeTree ranges = ranges("xxx");
assertThat(prefix.retainFrom(ranges)).isEmpty();
}
@Test
public void testComplex() {
PrefixTree prefix = prefixes("[12]", "3x4x5", "67890", "987xx9");
RangeTree ranges = ranges("x", "xx", "xxx", "1234xx", "234xxx", "3xx8xx", "67890");
assertThat(prefix.retainFrom(ranges))
.containsExactly("[12]", "[12]x", "[12]xx", "67890", "1234xx", "234xxx", "3x485x");
}
@Test
public void testEmptyPrefixTree() {
// The empty filter filters everything out, since a filter operation is defined to return
// only ranges which are prefixed by an element in the filter (of which there are none).
assertThat(PrefixTree.from(empty()).retainFrom(ranges("12xxx"))).isEmpty();
}
@Test
public void testZeroLengthPrefix() {
// The non-empty prefix tree which contains a single prefix of zero length. This has no effect
// as a filter, since all ranges "have a zero length prefix".
PrefixTree prefix = PrefixTree.from(RangeTree.from(RangeSpecification.empty()));
RangeTree input = ranges("12xxx");
assertThat(prefix.retainFrom(input)).isEqualTo(input);
}
@Test
public void testUnion() {
// Overlapping prefixes retain the more general (shorter) one.
assertThat(prefixes("1234").union(prefixes("12"))).containsExactly("12");
// Indentical prefixes treated like normal union.
assertThat(prefixes("12").union(prefixes("12"))).containsExactly("12");
// Non-overlapping prefixes treated like normal union.
assertThat(prefixes("123").union(prefixes("124"))).containsExactly("12[34]");
// Complex case where prefixes are split into 2 lengths due to a partial overlap.
assertThat(prefixes("1234", "45", "800").union(prefixes("12", "4x67")))
.containsExactly("12", "45", "4[0-46-9]67", "800");
}
@Test
public void testIntersection() {
// Overlapping prefixes retain the more specific (longer) one.
assertThat(prefixes("1234").intersect(prefixes("12"))).containsExactly("1234");
// Indentical prefixes treated like normal intersection.
assertThat(prefixes("12").intersect(prefixes("12"))).containsExactly("12");
// Non-overlapping prefixes treated like normal intersection.
assertThat(prefixes("123").intersect(prefixes("124"))).isEmpty();
// Unlike the union case, with intersection, only the longest prefix remains.
assertThat(prefixes("1234", "45x", "800").intersect(prefixes("12x", "4x67")))
.containsExactly("1234", "4567");
}
@Test
public void testTrim() {
assertThat(prefixes("1234").trim(3)).containsExactly("123");
assertThat(prefixes("12").trim(3)).containsExactly("12");
assertThat(prefixes("1234").trim(0)).containsExactly(RangeSpecification.empty());
// Trimming can result in prefixes shorter than the stated length if by collapsing the original
// prefix tree you end up with trailing any digit sequences.
assertThat(prefixes("12[0-4]5", "12[5-9]").trim(3)).containsExactly("12");
assertThat(prefixes("7001", "70[1-9]", "7[1-9]").trim(3)).containsExactly("7");
}
@Test
public void testMinimal() {
// If there are no ranges to include, the minimal prefix is empty (matching nothing).
assertThat(PrefixTree.minimal(RangeTree.empty(), ranges("123x"), 0)).isEmpty();
// If the prefix for the included ranges is the identity, then the result is the identity
// (after converting to a prefix, ranges like "xxx.." become the identity prefix).
assertThat(PrefixTree.minimal(ranges("xxxx"), ranges("123"), 0).isIdentity()).isTrue();
// Without an exclude set, the prefix returned (at zero length) can just accept everything.
assertThat(PrefixTree.minimal(ranges("123x"), RangeTree.empty(), 0).isIdentity()).isTrue();
assertThat(PrefixTree.minimal(ranges("123x", "456x"), ranges("13xx", "459x"), 0))
.containsExactly("12", "456");
assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 1)).containsExactly("[14]");
assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 2)).containsExactly("12", "45");
// Pick the shortest prefix when several suffice.
assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 0))
.containsExactly("12", "56");
assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 3))
.containsExactly("12", "56");
// When ranges are contested, split the prefix (only "12" is contested out of "1[2-4]").
assertThat(PrefixTree.minimal(ranges("1[2-4]5xx", "189xx"), ranges("128xx"), 0))
.containsExactly("125", "1[348]");
// If the include range already prefixes an entire path of the exclude set, ignore that path.
// Here '12' (the shorter path) already captures '123', so '123' is ignored.
assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5xxx"), 0))
.containsExactly("1", "56");
// Now all exclude paths are ignored, so you get the "identity" prefix that catches everything.
assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5678"), 0))
.containsExactly("");
}
@Test
public void testMinimal_regression() {
// This is extracted from a real case in which the old algorithm would fail for this case. The
// "281xxxxxxx" path was necessary for failing since while visiting this, the old algorithm
// became "confused" and added an additional "250" path to the minimal prefix, meaning that
// the resulting range tree was "250", "250395". When this was turned into a prefix tree, the
// shorter, early terminating, path took precedence and the result was (incorrectly) "250".
assertThat(
PrefixTree.minimal(
ranges("250395xxxx"),
ranges("250[24-9]xxxxxx", "2503[0-8]xxxxx", "25039[0-46-9]xxxx", "281xxxxxxx"),
3))
.containsExactly("250395");
}
private static DigitSequence seq(String s) {
return DigitSequence.of(s);
}
private static PrefixTree prefixes(String... specs) {
return PrefixTree.from(ranges(specs));
}
private static RangeTree ranges(String... specs) {
return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
}
}

+ 308
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java View File

@ -0,0 +1,308 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
import static com.google.i18n.phonenumbers.metadata.RangeSpecification.parse;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertThrows;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.truth.Truth;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class RangeSpecificationTest {
@Test
public void testParse() {
assertThat(parse("")).isSameInstanceAs(RangeSpecification.empty());
assertThat(parse("0").toString()).isEqualTo("0");
assertThat(parse("0").length()).isEqualTo(1);
assertThat(parse("01234").toString()).isEqualTo("01234");
assertThat(parse("01234").length()).isEqualTo(5);
assertThat(parse("012[0-9]").toString()).isEqualTo("012x");
assertThat(parse("012[0234789]xxx").toString()).isEqualTo("012[02-47-9]xxx");
assertThat(parse("0_1_2").toString()).isEqualTo("012");
assertThat(parse("0_12[3-8]_xxx_xxx").toString()).isEqualTo("012[3-8]xxxxxx");
}
@Test
public void testParseBad() {
assertThrows(NullPointerException.class, () -> parse(null));
assertThrows(IllegalArgumentException.class, () -> parse("#"));
assertThrows(IllegalArgumentException.class, () -> parse("["));
assertThrows(IllegalArgumentException.class, () -> parse("[]"));
assertThrows(IllegalArgumentException.class, () -> parse("[0-"));
assertThrows(IllegalArgumentException.class, () -> parse("[0-]"));
assertThrows(IllegalArgumentException.class, () -> parse("[0--9]"));
assertThrows(IllegalArgumentException.class, () -> parse("[0..9]"));
assertThrows(IllegalArgumentException.class, () -> parse("[33]"));
assertThrows(IllegalArgumentException.class, () -> parse("[32]"));
assertThrows(IllegalArgumentException.class, () -> parse("[3-3]"));
assertThrows(IllegalArgumentException.class, () -> parse("[3-2]"));
assertThrows(IllegalArgumentException.class, () -> parse("123[9-0]456"));
assertThrows(IllegalArgumentException.class, () -> parse("1234_"));
assertThrows(IllegalArgumentException.class, () -> parse("_1234"));
assertThrows(IllegalArgumentException.class, () -> parse("12__34"));
assertThrows(IllegalArgumentException.class, () -> parse("1[2_4]5"));
}
@Test
public void testSingleton() {
assertThat(RangeSpecification.singleton(asList(0, 1, 2, 4, 5, 7, 8, 9)))
.isEqualTo(parse("[0-2457-9]"));
}
@Test
public void testMatches() {
assertThat(RangeSpecification.empty().matches(DigitSequence.empty())).isTrue();
assertAllMatch(parse("0"), "0");
assertNoneMatch(parse("0"), "00", "1");
assertAllMatch(parse("01234"), "01234");
assertNoneMatch(parse("01234"), "01233", "01235");
assertAllMatch(parse("012x"), "0120", "0125", "0129");
assertNoneMatch(parse("012x"), "012", "0119", "0130", "01200");
assertAllMatch(parse("012[3-689]xxx"), "0124000", "0128999");
assertNoneMatch(parse("012[3-689]xxx"), "0122000", "0127999");
}
@Test
public void testMinMax() {
assertThat(parse("123xxx").min()).isEqualTo(DigitSequence.of("123000"));
assertThat(parse("123xxx").max()).isEqualTo(DigitSequence.of("123999"));
assertThat(parse("1x[2-3]x4").min()).isEqualTo(DigitSequence.of("10204"));
assertThat(parse("1x[2-3]x4").max()).isEqualTo(DigitSequence.of("19394"));
}
@Test
public void testSequenceCount() {
assertThat(RangeSpecification.empty().getSequenceCount()).isEqualTo(1);
assertThat(parse("1xx").getSequenceCount()).isEqualTo(100);
assertThat(parse("1[2-46-8]x").getSequenceCount()).isEqualTo(60);
assertThat(parse("1xx[0-27-9]").getSequenceCount()).isEqualTo(600);
}
@Test
public void testFrom() {
assertThat(RangeSpecification.from(DigitSequence.empty()))
.isEqualTo(RangeSpecification.empty());
assertThat(RangeSpecification.from(DigitSequence.of("1"))).isEqualTo(parse("1"));
assertThat(RangeSpecification.from(DigitSequence.of("1234"))).isEqualTo(parse("1234"));
}
@Test
public void testAny() {
assertThat(RangeSpecification.any(0)).isEqualTo(RangeSpecification.empty());
assertThat(RangeSpecification.any(2)).isEqualTo(parse("xx"));
assertThat(RangeSpecification.any(10)).isEqualTo(parse("xxxxxxxxxx"));
assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(-1));
assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(19));
}
@Test
public void testFirst() {
RangeSpecification spec = parse("123[4-7]xxxx");
assertThat(spec.first(3)).isEqualTo(parse("123"));
assertThat(spec.first(6)).isEqualTo(parse("123[4-7]xx"));
assertThat(spec.first(spec.length())).isSameInstanceAs(spec);
assertThat(spec.first(100)).isSameInstanceAs(spec);
assertThat(spec.first(0)).isEqualTo(RangeSpecification.empty());
assertThrows(IllegalArgumentException.class, () -> spec.first(-1));
}
@Test
public void testLast() {
RangeSpecification spec = parse("123[4-7]xxxx");
assertThat(spec.last(3)).isEqualTo(parse("xxx"));
assertThat(spec.last(6)).isEqualTo(parse("3[4-7]xxxx"));
assertThat(spec.last(spec.length())).isSameInstanceAs(spec);
assertThat(spec.last(100)).isSameInstanceAs(spec);
assertThat(spec.last(0)).isEqualTo(RangeSpecification.empty());
assertThrows(IllegalArgumentException.class, () -> spec.last(-1));
}
@Test
public void testGetPrefix() {
assertThat(RangeSpecification.empty().getPrefix()).isEqualTo(RangeSpecification.empty());
assertThat(parse("xxxx").getPrefix()).isEqualTo(RangeSpecification.empty());
assertThat(parse("xx1x").getPrefix()).isEqualTo(parse("xx1"));
assertThat(parse("123[4-7]xxxx").getPrefix()).isEqualTo(parse("123[4-7]"));
}
@Test
public void testOrdering_simple() {
// For specifications representing a single DigitSequence, the ordering should be the same.
testComparator(
RangeSpecification.empty(),
parse("0"),
parse("00"),
parse("000"),
parse("01"),
parse("1"),
parse("10"),
parse("123"),
parse("124"),
parse("4111"),
parse("4200"),
parse("4555"),
parse("9"),
parse("99"),
parse("999"));
}
@Test
public void testOrdering_disjoint() {
// NOT the same as using the min() sequence for ordering (since "4555" > "4200" > "4111").
testComparator(
parse("12xx"),
parse("13xx"),
parse("14xx"),
parse("1[5-8]00"),
parse("[2-3]xxx"),
parse("[4-6]555"),
parse("[45]111"),
parse("[45]2xx"),
parse("4999"));
}
@Test
public void testOrdering_overlapping() {
// Ordering for overlapping ranges is well defined but not particularly intuitive.
testComparator(
parse("01xxx"),
parse("01xx[0-5]"),
parse("01x0[0-5]"),
parse("01x00"),
parse("01[0-6]00"),
parse("01[2-7]xx"),
parse("01[2-7]00"),
parse("01[2-7]67"),
parse("01[4-9]00"));
}
@Test
public void testToString() {
assertThat(parse("0").toString()).isEqualTo("0");
assertThat(parse("01234").toString()).isEqualTo("01234");
assertThat(parse("012[3-4]").toString()).isEqualTo("012[34]");
assertThat(parse("012[0-9]").toString()).isEqualTo("012x");
assertThat(parse("012[3-689]xxx").toString()).isEqualTo("012[3-689]xxx");
}
@Test
public void testBitmaskToString() {
assertThat(RangeSpecification.toString(1 << 0)).isEqualTo("0");
assertThat(RangeSpecification.toString(1 << 9)).isEqualTo("9");
assertThat(RangeSpecification.toString(0xF)).isEqualTo("[0-3]");
assertThat(RangeSpecification.toString(0xF1)).isEqualTo("[04-7]");
assertThat(RangeSpecification.toString(ALL_DIGITS_MASK)).isEqualTo("x");
assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0));
assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0x400));
}
@Test
public void testRangeProcessing_singleBlock() {
Truth.assertThat(RangeSpecification.from(setOf(range("1200", "1299"))))
.isEqualTo(specs("12xx"));
}
@Test
public void testRangeProcessing_fullRange() {
Truth.assertThat(RangeSpecification.from(setOf(range("0000", "9999"))))
.isEqualTo(specs("xxxx"));
}
@Test
public void testRangeProcessing_edgeCases() {
Truth.assertThat(RangeSpecification.from(setOf(range("1199", "1300")))).isEqualTo(specs(
"1199",
"12xx",
"1300"));
}
@Test
public void testRangeProcessing_complex() {
Truth.assertThat(RangeSpecification.from(setOf(range("123", "45678")))).isEqualTo(specs(
"12[3-9]",
"1[3-9]x",
"[2-9]xx",
"xxxx",
"[0-3]xxxx",
"4[0-4]xxx",
"45[0-5]xx",
"456[0-6]x",
"4567[0-8]"));
}
@Test
public void testAsRanges_edgeCase() {
// The middle 2 ranges abut.
assertThat(RangeSpecification.parse("12[34][0189]x").asRanges())
.containsExactly(range("12300", "12319"), range("12380", "12419"), range("12480", "12499"))
.inOrder();
}
private static void assertAllMatch(RangeSpecification r, String... sequences) {
for (String digits : sequences) {
assertThat(r.matches(DigitSequence.of(digits))).isTrue();
}
}
private static void assertNoneMatch(RangeSpecification r, String... sequences) {
for (String digits : sequences) {
assertThat(r.matches(DigitSequence.of(digits))).isFalse();
}
}
List<RangeSpecification> specs(String... s) {
return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList());
}
private static Range<DigitSequence> range(String lo, String hi) {
return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain());
}
private static RangeSet<DigitSequence> setOf(Range<DigitSequence>... r) {
return ImmutableRangeSet.copyOf(Arrays.asList(r));
}
private static <T extends Comparable<T>> void testComparator(T... items) {
for (int i = 0; i < items.length; i++) {
assertThat(items[i]).isEqualTo(items[i]);
assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]);
for (int j = i + 1; j < items.length; j++) {
assertThat(items[i]).isNotEqualTo(items[j]);
assertThat(items[i]).isLessThan(items[j]);
assertThat(items[j]).isGreaterThan(items[i]);
}
}
}
}

+ 101
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeTreeFactorizerTest.java View File

@ -0,0 +1,101 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.RangeTree.empty;
import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.ALLOW_EDGE_SPLITTING;
import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES;
import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.factor;
import java.util.List;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class RangeTreeFactorizerTest {
@Test
public void testEmpty() {
assertThat(factor(empty(), REQUIRE_EQUAL_EDGES)).isEmpty();
assertThat(factor(empty(), ALLOW_EDGE_SPLITTING)).isEmpty();
}
@Test
public void testSimplePrefix() {
RangeTree t = ranges("123x", "123xx", "123xxx");
assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t);
assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t);
}
@Test
public void testDisjointBranchesNotFactored() {
RangeTree t = ranges("123xxx", "124xx", "125x");
assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t);
assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t);
}
@Test
public void testOverlappingBranchesAreFactored() {
RangeTree t = ranges("123xxx", "1234x", "1234", "123");
assertThat(factor(t, REQUIRE_EQUAL_EDGES))
.containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234"))
.inOrder();
assertThat(factor(t, ALLOW_EDGE_SPLITTING))
.containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234"))
.inOrder();
}
@Test
public void testStrategyDifference() {
// When factoring with REQUIRE_EQUAL_EDGES the [3-9] edge in the shorter path cannot be merged
// into the longer path of the first factor, since [3-4] already exists and is not equal to
// [3-9]. However since [3-4] is contained by [3-9], when we ALLOW_EDGE_SPLITTING, we can split
// the edge we are trying to merge to add paths for both [3-4] and [5-9]. This isn't always a
// win for regular expression length, and in fact for the most complex cases,
// REQUIRE_EQUAL_EDGES often ends up smaller.
RangeTree splittable = ranges("12[3-5]xx", "12[3-9]x");
assertThat(factor(splittable, REQUIRE_EQUAL_EDGES))
.containsExactly(ranges("12[3-5]xx"), ranges("12[3-9]x"))
.inOrder();
assertThat(factor(splittable, ALLOW_EDGE_SPLITTING))
.containsExactly(ranges("12[3-5]xx", "12[3-9]x"));
// In this case, the [3-5] edge in the first factor in only a partial overlap with the [4-9]
// edge we are trying to merge in. Now both strategies will prefer to treat the shorter path
// as a separate factor, since there's no clean way to merge into the existing edge.
RangeTree unsplittable = ranges("12[3-5]xx", "12[4-9]x");
assertThat(factor(unsplittable, REQUIRE_EQUAL_EDGES))
.containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x"))
.inOrder();
assertThat(factor(unsplittable, ALLOW_EDGE_SPLITTING))
.containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x"))
.inOrder();
// TODO: Find a non-complex example where REQUIRE_EQUAL_EDGES yeilds smaller regex.
// Approximately 50 out of the 1000+ regex's in the XML get smaller with REQUIRE_EQUAL_EDGES.
}
RangeTree ranges(String... s) {
return RangeTree.from(specs(s));
}
List<RangeSpecification> specs(String... s) {
return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList());
}
}

+ 555
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeTreeTest.java View File

@ -0,0 +1,555 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertThrows;
import com.google.auto.value.AutoValue;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class RangeTreeTest {
@Test
public void testEmptyTree() {
assertThat(RangeTree.empty()).containsExactly();
assertThat(RangeTree.empty()).hasSize(0);
}
@Test
public void testEmptySequenceTree() {
// The tree that matches a zero length input is a perfectly valid range tree (zero length input
// is perfectly valid input). This is very distinct from the empty tree, which cannot match any
// input. It's not used very often, but it is well defined.
RangeTree r = RangeTree.from(RangeSpecification.empty());
assertThat(r).containsExactly(RangeSpecification.empty());
assertThat(r).hasSize(1);
}
@Test
public void testFromRangeSetSimple() {
// Single ranges produce minimal/canoncial range specifications.
RangeTree r = RangeTree.from(rangeSetOf(range("1000", "4999")));
assertThat(r).containsExactly("[1-4]xxx");
assertThat(r).hasSize(4000);
}
@Test
public void testFromRangeSetMinMax() {
RangeTree r = RangeTree.from(rangeSetOf(range("0000", "9999")));
assertThat(r).containsExactly("xxxx");
assertThat(r).hasSize(10000);
}
@Test
public void testFromRangeSetAllValues() {
// Just checking for any out-of-bounds issues at the end of the domain.
RangeTree r = RangeTree.from(rangeSetOf(range("0", domain().maxValue().toString())));
assertThat(r).containsExactly(
"x",
"xx",
"xxx",
"xxxx",
"xxxxx",
"xxxxxx",
"xxxxxxx",
"xxxxxxxx",
"xxxxxxxxx",
"xxxxxxxxxx",
"xxxxxxxxxxx",
"xxxxxxxxxxxx",
"xxxxxxxxxxxxx",
"xxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxx",
"xxxxxxxxxxxxxxxxxx");
}
@Test
public void testContains() {
// The tree generated from the empty range specification actually contains one digit sequence
// (the empty one). This is not the same as RangeTree.empty() which really contains nothing.
assertThat(RangeTree.empty()).doesNotContain("");
assertThat(RangeTree.from(RangeSpecification.empty())).contains("");
assertThat(RangeTree.from(spec("x"))).contains("7");
assertThat(RangeTree.from(spec("1"))).contains("1");
assertThat(RangeTree.from(spec("1"))).doesNotContain("5");
assertThat(RangeTree.from(spec("xx"))).contains("99");
assertThat(RangeTree.from(spec("xx"))).doesNotContain("100");
assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).contains("027617");
}
@Test
public void testMatchCount() {
assertThat(RangeTree.empty()).hasSize(0);
assertThat(RangeTree.from(RangeSpecification.empty())).hasSize(1);
assertThat(RangeTree.from(spec("x"))).hasSize(10);
assertThat(RangeTree.from(spec("1"))).hasSize(1);
assertThat(RangeTree.from(spec("[123]"))).hasSize(3);
assertThat(RangeTree.from(spec("xx"))).hasSize(100);
assertThat(RangeTree.from(spec("[234]xx"))).hasSize(300);
assertThat(RangeTree.from(spec("1[234]xx"))).hasSize(300);
assertThat(RangeTree.from(spec("1[234][567]xx"))).hasSize(900);
assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).hasSize(2700);
}
@Test
public void testUnion() {
RangeTree a = ranges("12xx", "456xx");
assertThat(a.union(a)).isEqualTo(a);
assertThat(a.union(RangeTree.empty())).isEqualTo(a);
assertThat(RangeTree.empty().union(a)).isEqualTo(a);
RangeTree b = ranges("1234", "4xxxx", "999");
assertThat(a.union(b)).containsExactly("999", "12xx", "4xxxx");
assertThat(b.union(a)).containsExactly("999", "12xx", "4xxxx");
}
@Test
public void testIntersection() {
RangeTree a = ranges("12xx", "456xx");
assertThat(a.intersect(a)).isEqualTo(a);
assertThat(a.intersect(RangeTree.empty())).isSameInstanceAs(RangeTree.empty());
assertThat(RangeTree.empty().intersect(a)).isSameInstanceAs(RangeTree.empty());
RangeTree b = ranges("1234", "4xxxx", "999");
assertThat(a.intersect(b)).containsExactly("1234", "456xx");
assertThat(b.intersect(a)).containsExactly("1234", "456xx");
}
@Test
public void testSubtraction() {
RangeTree a = ranges("12xx", "456xx");
assertThat(a.subtract(a)).isSameInstanceAs(RangeTree.empty());
assertThat(a.subtract(RangeTree.empty())).isEqualTo(a);
assertThat(RangeTree.empty().subtract(a)).isSameInstanceAs(RangeTree.empty());
RangeTree b = ranges("1234", "4xxxx", "999");
assertThat(a.subtract(b)).containsExactly("12[0-24-9]x", "123[0-35-9]");
assertThat(b.subtract(a)).containsExactly("999", "4[0-46-9]xxx", "45[0-57-9]xx");
}
@Test
public void testContainsAll() {
RangeTree a = ranges("12[3-6]xx", "13[5-8]xx", "456xxxx");
assertThat(a.containsAll(a)).isTrue();
assertThat(a.containsAll(RangeTree.empty())).isTrue();
assertThat(RangeTree.empty().containsAll(a)).isFalse();
// Test branching, since 12.. and 13... are distinct branches but both contain ..[56][78]x
assertThat(a.containsAll(ranges("1[23][56][78]x", "4567890"))).isTrue();
// Path 127.. is not contained.
assertThat(a.containsAll(ranges("12[357]xx"))).isFalse();
// Hard to test for, but this should fail immediately (due to length mismatch).
assertThat(a.containsAll(ranges("123456"))).isFalse();
// Check edge case for zero-length paths.
assertThat(ranges("", "1").containsAll(ranges(""))).isTrue();
assertThat(RangeTree.empty().containsAll(ranges(""))).isFalse();
}
@Test
public void testVennDiagram() {
// Test basic set-theoretic assumptions about the logical operations.
// In theory we could run this test with any non-disjoint pair of trees.
RangeTree a = ranges("12xx", "456xx");
RangeTree b = ranges("1234", "4xxxx", "999");
RangeTree intAB = a.intersect(b);
RangeTree subAB = a.subtract(b);
RangeTree subBA = b.subtract(a);
// (A\B) and (B\A) are disjoint with (A^B) and each other.
assertThat(subAB.intersect(intAB)).isSameInstanceAs(RangeTree.empty());
assertThat(subBA.intersect(intAB)).isSameInstanceAs(RangeTree.empty());
assertThat(subAB.intersect(subBA)).isSameInstanceAs(RangeTree.empty());
// Even the union of (A\B) and (B\A) is disjoint to the intersection.
assertThat(subAB.union(subBA).intersect(intAB)).isSameInstanceAs(RangeTree.empty());
// (A\B) + (A^B) = A, (B\A) + (A^B) = B, (A\B) + (B\A) + (A^B) == (A+B)
assertThat(subAB.union(intAB)).isEqualTo(a);
assertThat(subBA.union(intAB)).isEqualTo(b);
assertThat(subAB.union(subBA).union(intAB)).isEqualTo(a.union(b));
}
@Test
public void testFromRaggedRange() {
RangeTree r = RangeTree.from(rangeSetOf(range("123980", "161097")));
// Very 'ragged' ranges produde a lot of range specifications.
assertThat(r).containsExactly(
"1239[8-9]x",
"12[4-9]xxx",
"1[3-5]xxxx",
"160xxx",
"1610[0-8]x",
"16109[0-7]");
}
@Test
public void testComplexSpecsToSimpleRange() {
List<RangeSpecification> specs = specs(
"12[3-9]",
"1[3-9]x",
"[2-9]xx",
"xxxx",
"[0-3]xxxx",
"4[0-4]xxx",
"45[0-5]xx",
"456[0-6]x",
"4567[0-8]");
RangeTree r = RangeTree.from(specs);
assertThat(r).containsExactly(specs);
assertThat(r.asRangeSet()).isEqualTo(rangeSetOf(range("123", "45678")));
}
@Test
public void testAsRangeSetMultipleGroups() {
// The range specification has 4 ranges, one each for the four 123x prefixes.
RangeTree r = ranges("012[3-58][2-7]x");
assertThat(r.asRangeSet()).isEqualTo(rangeSetOf(
range("012320", "012379"),
range("012420", "012479"),
range("012520", "012579"),
range("012820", "012879")));
}
@Test
public void testAsRangeSetMerging() {
// In isolation, the first specification represents two range, and the second represents one.
RangeTree r = ranges("12[3-4][7-9]x", "125[0-5]x");
// The range ending 12499 merges with the range starting 12500, giving 2 rather than 3 ranges.
assertThat(r.asRangeSet()).isEqualTo(rangeSetOf(
range("12370", "12399"),
range("12470", "12559")));
}
@Test
public void testVisitor() {
// Carefully construct DFA so depth first visitation order is just incrementing from 0.
RangeTree r = ranges("012", "345", "367", "3689");
TestVisitor v = new TestVisitor();
r.accept(v);
DfaNode initial = r.getInitial();
DfaNode terminal = RangeTree.getTerminal();
assertThat(v.visited).hasSize(10);
// Edges 0 & 3 leave the initial state, edges 2,5,7,9 reach the terminal.
assertThat(v.visited.stream().map(Edge::source).filter(initial::equals).count()).isEqualTo(2);
assertThat(v.visited.stream().map(Edge::target).filter(terminal::equals).count()).isEqualTo(4);
// Check expected edge value masks.
for (int n = 0; n < 10; n++) {
assertThat(v.visited.get(n).digitMask()).isEqualTo(1 << n);
}
}
@Test
public void testMin() {
assertThrows(IllegalStateException.class, () -> RangeTree.empty().first());
assertThat(RangeTree.from(RangeSpecification.empty()).first()).isEqualTo(DigitSequence.empty());
RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx");
assertThat(tree.first()).isEqualTo(DigitSequence.of("600"));
assertThat(tree.subtract(ranges("[6-8]xx")).first()).isEqualTo(DigitSequence.of("900"));
assertThat(tree.subtract(ranges("xxx")).first()).isEqualTo(DigitSequence.of("8000"));
assertThat(tree.subtract(ranges("xxx", "8[0-6]xx")).first())
.isEqualTo(DigitSequence.of("8700"));
assertThat(tree.subtract(ranges("xxx", "xxxx")).first()).isEqualTo(DigitSequence.of("10000"));
}
@Test
public void testSample() {
assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(0));
assertThat(RangeTree.from(RangeSpecification.empty()).sample(0))
.isEqualTo(DigitSequence.empty());
RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx");
// sometimes iteration looks ordered ...
assertThat(tree.sample(0)).isEqualTo(DigitSequence.of("10000"));
assertThat(tree.sample(1)).isEqualTo(DigitSequence.of("10001"));
assertThat(tree.sample(10)).isEqualTo(DigitSequence.of("10010"));
// but in general sample(n).next() != sample(n+1)
assertThat(tree.sample(49999)).isEqualTo(DigitSequence.of("59999"));
assertThat(tree.sample(50000)).isEqualTo(DigitSequence.of("600"));
assertThat(tree.sample(50001)).isEqualTo(DigitSequence.of("60000"));
assertThat(tree.sample(tree.size() - 1)).isEqualTo(DigitSequence.of("9999"));
assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(tree.size()));
}
@Test
public void testSignificantDigits() {
RangeTree ranges = ranges("123xx", "14567", "789");
assertThat(ranges.significantDigits(3)).containsExactly("123xx", "145xx", "789");
assertThat(ranges.significantDigits(2)).containsExactly("12xxx", "14xxx", "78x");
assertThat(ranges.significantDigits(1)).containsExactly("1xxxx", "7xx");
assertThat(ranges.significantDigits(0)).containsExactly("xxxxx", "xxx");
}
@Test
public void testPrefixWith() {
RangeTree ranges = ranges("123xx", "456x");
assertThat(ranges.prefixWith(spec("00"))).isEqualTo(ranges("00123xx", "00456x"));
assertThat(ranges.prefixWith(RangeSpecification.empty())).isSameInstanceAs(ranges);
// The prefixing of an empty tree is empty (all paths that exist been prefixed correctly).
assertThat(RangeTree.empty().prefixWith(spec("00"))).isEqualTo(RangeTree.empty());
}
@Test
public void testSlicing() {
RangeTree ranges = ranges("", "1", "123", "125xx", "456x");
assertThat(ranges.slice(1)).isEqualTo(ranges("[14]"));
assertThat(ranges.slice(2)).isEqualTo(ranges("12", "45"));
assertThat(ranges.slice(3)).isEqualTo(ranges("12[35]", "456"));
assertThat(ranges.slice(4)).isEqualTo(ranges("125x", "456x"));
assertThat(ranges.slice(2, 4)).isEqualTo(ranges("123", "125x", "456x"));
assertThat(ranges.slice(0, 5)).isEqualTo(ranges);
}
@Test
public void testSerializingRealWorldExample() {
List<RangeSpecification> expected = specs(
"11[2-7]xxxxxxx",
"12[0-249][2-7]xxxxxx",
"12[35-8]x[2-7]xxxxx",
"13[0-25][2-7]xxxxxx",
"13[346-9]x[2-7]xxxxx",
"14[145][2-7]xxxxxx",
"14[236-9]x[2-7]xxxxx",
"1[59][0235-9]x[2-7]xxxxx",
"1[59][14][2-7]xxxxxx",
"16[014][2-7]xxxxxx",
"16[235-9]x[2-7]xxxxx",
"17[1257][2-7]xxxxxx",
"17[34689]x[2-7]xxxxx",
"18[01346][2-7]xxxxxx",
"18[257-9]x[2-7]xxxxx",
"2[02][2-7]xxxxxxx",
"21[134689]x[2-7]xxxxx",
"21[257][2-7]xxxxxx",
"23[013][2-7]xxxxxx",
"23[24-8]x[2-7]xxxxx",
"24[01][2-7]xxxxxx",
"24[2-8]x[2-7]xxxxx",
"25[0137][2-7]xxxxxx",
"25[25689]x[2-7]xxxxx",
"26[0158][2-7]xxxxxx",
"26[2-4679]x[2-7]xxxxx",
"27[13-79]x[2-7]xxxxx",
"278[2-7]xxxxxx",
"28[1568][2-7]xxxxxx",
"28[2-479]x[2-7]xxxxx",
"29[14][2-7]xxxxxx",
"29[235-9]x[2-7]xxxxx",
"301x[2-7]xxxxx",
"31[79]x[2-7]xxxxx",
"32[1-5]x[2-7]xxxxx",
"326[2-7]xxxxxx",
"33[2-7]xxxxxxx",
"34[13][2-7]xxxxxx",
"342[0189][2-7]xxxxx",
"342[2-7]xxxxxx",
"34[5-8]x[2-7]xxxxx",
"35[125689]x[2-7]xxxxx",
"35[34][2-7]xxxxxx",
"36[01489][2-7]xxxxxx",
"36[235-7]x[2-7]xxxxx",
"37[02-46][2-7]xxxxxx",
"37[157-9]x[2-7]xxxxx",
"38[159][2-7]xxxxxx",
"38[2-467]x[2-7]xxxxx",
"4[04][2-7]xxxxxxx",
"41[14578]x[2-7]xxxxx",
"41[36][2-7]xxxxxx",
"42[1-47][2-7]xxxxxx",
"42[5689]x[2-7]xxxxx",
"43[15][2-7]xxxxxx",
"43[2-467]x[2-7]xxxxx",
"45[12][2-7]xxxxxx",
"45[4-7]x[2-7]xxxxx",
"46[0-26-9][2-7]xxxxxx",
"46[35]x[2-7]xxxxx",
"47[0-24-9][2-7]xxxxxx",
"473x[2-7]xxxxx",
"48[013-57][2-7]xxxxxx",
"48[2689]x[2-7]xxxxx",
"49[014-7][2-7]xxxxxx",
"49[2389]x[2-7]xxxxx",
"51[025][2-7]xxxxxx",
"51[146-9]x[2-7]xxxxx",
"52[14-8]x[2-7]xxxxx",
"522[2-7]xxxxxx",
"53[1346]x[2-7]xxxxx",
"53[25][2-7]xxxxxx",
"54[14-69]x[2-7]xxxxx",
"54[28][2-7]xxxxxx",
"55[12][2-7]xxxxxx",
"55[46]x[2-7]xxxxx",
"56[146-9]x[2-7]xxxxx",
"56[25][2-7]xxxxxx",
"571[2-7]xxxxxx",
"57[2-4]x[2-7]xxxxx",
"581[2-7]xxxxxx",
"58[2-8]x[2-7]xxxxx",
"59[15][2-7]xxxxxx",
"59[246]x[2-7]xxxxx",
"61[1358]x[2-7]xxxxx",
"612[2-7]xxxxxx",
"621[2-7]xxxxxx",
"62[2457]x[2-7]xxxxx",
"631[2-7]xxxxxx",
"63[2-4]x[2-7]xxxxx",
"641[2-7]xxxxxx",
"64[235-7]x[2-7]xxxxx",
"65[17][2-7]xxxxxx",
"65[2-689]x[2-7]xxxxx",
"66[13][2-7]xxxxxx",
"66[24578]x[2-7]xxxxx",
"671[2-7]xxxxxx",
"67[235689]x[2-7]xxxxx",
"674[0189][2-7]xxxxx",
"674[2-7]xxxxxx",
"680[2-7]xxxxxx",
"68[1-6]x[2-7]xxxxx",
"71[013-9]x[2-7]xxxxx",
"712[2-7]xxxxxx",
"72[0235-9]x[2-7]xxxxx",
"72[14][2-7]xxxxxx",
"73[134][2-7]xxxxxx",
"73[2679]x[2-7]xxxxx",
"74[1-35689]x[2-7]xxxxx",
"74[47][2-7]xxxxxx",
"75[15][2-7]xxxxxx",
"75[2-46-9]x[2-7]xxxxx",
"7[67][02-9]x[2-7]xxxxx",
"7[67]1[2-7]xxxxxx",
"78[013-7]x[2-7]xxxxx",
"782[0-6][2-7]xxxxx",
"788[0189][2-7]xxxxx",
"788[2-7]xxxxxx",
"79[0189]x[2-7]xxxxx",
"79[2-7]xxxxxxx",
"80[2-467]xxxxxxx",
"81[1357-9]x[2-7]xxxxx",
"816[2-7]xxxxxx",
"82[014][2-7]xxxxxx",
"82[235-8]x[2-7]xxxxx",
"83[03-57-9]x[2-7]xxxxx",
"83[126][2-7]xxxxxx",
"84[0-24-9]x[2-7]xxxxx",
"85xx[2-7]xxxxx",
"86[136][2-7]xxxxxx",
"86[2457-9]x[2-7]xxxxx",
"87[078][2-7]xxxxxx",
"87[1-6]x[2-7]xxxxx",
"88[1256]x[2-7]xxxxx",
"88[34][2-7]xxxxxx",
"891[2-7]xxxxxx",
"89[2-4]x[2-7]xxxxx");
RangeTree t1 = RangeTree.from(expected);
assertThat(t1).containsExactly(expected);
assertThat(RangeTree.from(t1.asRangeSet())).containsExactly(expected);
}
@Test
public void testThreadSafety() throws ExecutionException, InterruptedException {
// For 10^5 this takes ~500ms. For 10^6 it starts to take non-trivial time (~10 seconds).
int numDigits = 5;
// At 1000 threads this starts to take non-trivial time.
int numThreads = 100;
// Collect 10^N ranges from "00..." to "99...", all distinct.
List<RangeTree> ranges = Stream
.iterate(DigitSequence.zeros(numDigits), DigitSequence::next)
.limit((int) Math.pow(10, numDigits))
.map(RangeTreeTest::singletonRange)
.collect(Collectors.toCollection(ArrayList::new));
Collections.shuffle(ranges, new Random(1234L));
// Recombining all 10^N ranges should give a single combined block (i.e. "xx..."). Doing it
// with high parallelism should test the thread safety of the concurrent interning map.
RangeTree combined = new ForkJoinPool(numThreads)
.submit(() -> ranges.parallelStream().reduce(RangeTree.empty(), RangeTree::union))
.get();
assertThat(combined).isEqualTo(ranges(Strings.repeat("x", numDigits)));
}
@AutoValue
abstract static class Edge {
static Edge of(DfaNode source, DfaNode target, DfaEdge edge) {
return new AutoValue_RangeTreeTest_Edge(source, target, edge.getDigitMask());
}
abstract DfaNode source();
abstract DfaNode target();
abstract int digitMask();
}
// Range tree visitor that captures edges visited (in depth first order)
private static final class TestVisitor implements DfaVisitor {
List<Edge> visited = new ArrayList<>();
@Override
public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
visited.add(Edge.of(source, target, edge));
target.accept(this);
}
}
RangeTree ranges(String... s) {
return RangeTree.from(specs(s));
}
private static RangeSpecification spec(String s) {
return RangeSpecification.parse(s);
}
private static List<RangeSpecification> specs(String... s) {
return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList());
}
private static Range<DigitSequence> range(String lo, String hi) {
return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain());
}
private static RangeSet<DigitSequence> rangeSetOf(Range<DigitSequence>... r) {
return ImmutableRangeSet.copyOf(asList(r));
}
private static RangeTree singletonRange(DigitSequence s) {
return RangeTree.from(spec(s.toString()));
}
}

+ 57
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/i18n/PhoneRegionTest.java View File

@ -0,0 +1,57 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.i18n;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.junit.Assert.assertThrows;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class PhoneRegionTest {
@Test
public void testOrdering() {
assertThat(Stream.of(r("US"), r("GB"), r("AE"), r("001"), r("KR"), r("MN")).sorted())
.containsAtLeast(r("AE"), r("GB"), r("KR"), r("MN"), r("US"), r("001"))
.inOrder();
}
@Test
public void testWorld() {
assertThat(PhoneRegion.getWorld()).isEqualTo(r("001"));
}
@Test
public void testBadArgs() {
assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("ABC")))
.hasMessageThat()
.contains("ABC");
assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("us")))
.hasMessageThat()
.contains("us");
assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("000")))
.hasMessageThat()
.contains("000");
}
private static PhoneRegion r(String cldrCode) {
return PhoneRegion.of(cldrCode);
}
}

+ 42
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/i18n/SimpleLanguageTagTest.java View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.i18n;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class SimpleLanguageTagTest {
@Test
public void testSimple() {
assertThat(SimpleLanguageTag.of("en").toString()).isEqualTo("en");
assertThat(SimpleLanguageTag.of("zh_Hant").toString()).isEqualTo("zh-Hant");
}
@Test
public void testBadArgs() {
assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("x")))
.hasMessageThat().contains("x");
assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("EN")))
.hasMessageThat().contains("EN");
assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("003")))
.hasMessageThat().contains("003");
}
}

+ 82
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/AltFormatSpecTest.java View File

@ -0,0 +1,82 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.junit.Assert.assertThrows;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class AltFormatSpecTest {
@Test
public void testSimple() {
FormatTemplate template = FormatTemplate.parse("XXXX XXXX");
RangeSpecification prefix = RangeSpecification.parse("123");
AltFormatSpec spec = AltFormatSpec.create(template, prefix, "foo", Optional.of("Comment"));
assertThat(spec.template()).isEqualTo(template);
assertThat(spec.prefix()).isEqualTo(prefix);
assertThat(spec.parentFormatId()).isEqualTo("foo");
assertThat(spec.comment()).hasValue("Comment");
assertThat(spec.specifier()).isEqualTo("123X XXXX");
}
@Test
public void testGoodTemplateAndPrefix() {
assertGoodTemplateAndPrefix("XXX XXX", "", "XXX XXX");
assertGoodTemplateAndPrefix("XXX XXX", "123", "123 XXX");
assertGoodTemplateAndPrefix("XXX XXX", "1234", "123 4XX");
assertGoodTemplateAndPrefix("XXX XXX", "123456", "123 456");
assertGoodTemplateAndPrefix("XXX XXX**", "123", "123 XXX**");
assertGoodTemplateAndPrefix("XXX XXX", "12[3-6]", "12[3-6] XXX");
assertGoodTemplateAndPrefix("XXX XXX", "1x3", "1X3 XXX");
}
@Test
public void testBadTemplateOrPrefix() {
// Prefix too long.
assertBadTemplateAndPrefix("XXXX", "12345");
// Prefix too long for min length.
assertBadTemplateAndPrefix("XXXX**", "12345");
// Bad template chars.
assertBadTemplateAndPrefix("XXX-XXX", "123");
// Extra whitespace.
assertBadTemplateAndPrefix(" XXXXXX", "123");
// Prefix must not end with "any digit".
assertBadTemplateAndPrefix(" XXXXXX", "123xx");
}
private static void assertGoodTemplateAndPrefix(String template, String prefix, String spec) {
FormatTemplate t = FormatTemplate.parse(template);
RangeSpecification p = RangeSpecification.parse(prefix);
assertThat(AltFormatSpec.create(t, p, "foo", Optional.empty()).specifier()).isEqualTo(spec);
}
private static void assertBadTemplateAndPrefix(String template, String prefix) {
FormatTemplate t = FormatTemplate.parse(template);
RangeSpecification p = RangeSpecification.parse(prefix);
assertThrows(IllegalArgumentException.class,
() -> AltFormatSpec.create(t, p, "foo", Optional.empty()));
}
}

+ 111
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/AltFormatsSchemaTest.java View File

@ -0,0 +1,111 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.truth.Truth.assertThat;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class AltFormatsSchemaTest {
@Test
public void testSimple_export() throws IOException {
assertThat(
exportCsv(
altFormat("123 XXX XXXX", "foo", "Hello World")))
.containsExactly(
"Format ; Parent Format ; Comment",
"123 XXX XXXX ; foo ; \"Hello World\"")
.inOrder();
}
@Test
public void testSimple_import() throws IOException {
assertThat(
importCsv(
"Format ; Parent Format ; Comment",
"123 XXX XXXX ; foo ; \"Hello World\""))
.containsExactly(
altFormat("123 XXX XXXX", "foo", "Hello World"));
}
@Test
public void testEscapedText_export() throws IOException {
assertThat(
exportCsv(
altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\")))
.containsExactly(
"Format ; Parent Format ; Comment",
"123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\"")
.inOrder();
}
@Test
public void testEscapedText_import() throws IOException {
assertThat(
importCsv(
"Format ; Parent Format ; Comment",
"123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\""))
.containsExactly(
altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\"));
}
@Test
public void testRetainsExplicitOrdering() throws IOException {
assertThat(
exportCsv(
altFormat("123 XXXXXX", "foo", "First"),
altFormat("XX XXXX", "bar", "Second"),
altFormat("9X XXX XXX", "baz", "Third")))
.containsExactly(
"Format ; Parent Format ; Comment",
"123 XXXXXX ; foo ; \"First\"",
"XX XXXX ; bar ; \"Second\"",
"9X XXX XXX ; baz ; \"Third\"")
.inOrder();
}
private AltFormatSpec altFormat(String spec, String parentId, String comment) {
return AltFormatsSchema.parseAltFormat(spec, parentId, comment);
}
private static List<String> exportCsv(AltFormatSpec... altFormats) throws IOException {
try (StringWriter out = new StringWriter()) {
AltFormatsSchema.exportCsv(out, Arrays.asList(altFormats));
// Ignore trailing empty lines.
return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString()));
}
}
private static ImmutableList<AltFormatSpec> importCsv(String... lines)
throws IOException {
// Add a trailing newline, since that's what we expect in the real CSV files.
StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n");
return AltFormatsSchema.importAltFormats(file);
}
}

+ 156
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/CommentsSchemaTest.java View File

@ -0,0 +1,156 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.anchor;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class CommentsSchemaTest {
private static final PhoneRegion REGION_US = PhoneRegion.of("US");
private static final PhoneRegion REGION_CA = PhoneRegion.of("CA");
private static final Anchor US_TOP = Comment.anchor(REGION_US);
private static final Anchor US_FIXED_LINE = anchor(REGION_US, XML_FIXED_LINE);
private static final Anchor US_MOBILE = anchor(REGION_US, XML_MOBILE);
private static final Anchor US_SHORTCODE = Comment.shortcodeAnchor(REGION_US);
private static final Anchor CA_FIXED_LINE = anchor(REGION_CA, XML_FIXED_LINE);
@Test
public void testSimple_export() throws IOException {
assertThat(
exportCsv(
comment(US_FIXED_LINE, "Hello World")))
.containsExactly(
"Region ; Label ; Comment",
"US ; XML_FIXED_LINE ; \"Hello World\"")
.inOrder();
}
@Test
public void testSimple_import() throws IOException {
assertThat(
importCsv(
"Region ; Label ; Comment",
"US ; XML_FIXED_LINE ; \"Hello World\""))
.containsExactly(
comment(US_FIXED_LINE, "Hello World"));
}
@Test
public void testEscapedText_export() throws IOException {
assertThat(
exportCsv(
comment(US_FIXED_LINE, "\tHello", "World\\")))
.containsExactly(
"Region ; Label ; Comment",
"US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\"")
.inOrder();
}
@Test
public void testEscapedText_import() throws IOException {
assertThat(
importCsv(
"Region ; Label ; Comment",
"US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\""))
.containsExactly(
comment(US_FIXED_LINE, "\tHello", "World\\"));
}
@Test
public void testOrdering_export() throws IOException {
assertThat(
exportCsv(
comment(US_FIXED_LINE, "First"),
comment(US_FIXED_LINE, "Second"),
comment(US_FIXED_LINE, "Third"),
comment(US_TOP, "Top Level Comment"),
comment(US_SHORTCODE, "Shortcode Comment"),
comment(US_MOBILE, "Other Type"),
comment(CA_FIXED_LINE, "Other Region")))
.containsExactly(
"Region ; Label ; Comment",
"CA ; XML_FIXED_LINE ; \"Other Region\"",
"US ; SC ; \"Shortcode Comment\"",
"US ; XML ; \"Top Level Comment\"",
"US ; XML_FIXED_LINE ; \"First\"",
"US ; XML_FIXED_LINE ; \"Second\"",
"US ; XML_FIXED_LINE ; \"Third\"",
"US ; XML_MOBILE ; \"Other Type\"")
.inOrder();
}
@Test
public void testOrdering_import() throws IOException {
assertThat(
importCsv(
"Region ; Label ; Comment",
"US ; XML_FIXED_LINE ; \"First\"",
"US ; XML_FIXED_LINE ; \"Second\"",
"US ; XML_FIXED_LINE ; \"Third\"",
"US ; XML ; \"Top Level Comment\"",
"US ; SC ; \"Shortcode Comment\"",
"US ; XML_MOBILE ; \"Other Type\"",
"CA ; XML_FIXED_LINE ; \"Other Region\""))
.containsExactly(
comment(CA_FIXED_LINE, "Other Region"),
comment(US_SHORTCODE, "Shortcode Comment"),
comment(US_TOP, "Top Level Comment"),
comment(US_FIXED_LINE, "First"),
comment(US_FIXED_LINE, "Second"),
comment(US_FIXED_LINE, "Third"),
comment(US_MOBILE, "Other Type"))
.inOrder();
}
private Comment comment(Anchor a, String... lines) {
return Comment.create(a, Arrays.asList(lines));
}
private static List<String> exportCsv(Comment... comments) throws IOException {
try (StringWriter out = new StringWriter()) {
CommentsSchema.exportCsv(out, Arrays.asList(comments));
// Ignore trailing empty lines.
return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString()));
}
}
private static ImmutableList<Comment> importCsv(String... lines)
throws IOException {
// Add a trailing newline, since that's what we expect in the real CSV files.
StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n");
return CommentsSchema.importComments(file);
}
}

+ 160
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/model/FormatSpecTest.java View File

@ -0,0 +1,160 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.model;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static java.util.Optional.empty;
import static org.junit.Assert.assertThrows;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class FormatSpecTest {
@Test
public void testCreate_national() {
national("XXXX");
national("XXX***");
national("#XXX XXX");
national("(#XXX) XX**-XXX");
assertThat(national("XX\\XXX").national().skeleton()).isEqualTo("$1X$2");
}
@Test
public void testCreate_international() {
// The international spec can be a duplicate (signifies international formatting is permitted).
international("XXX XXXX", "XXX XXXX");
// Or it can be different (including grouping and separators).
international("(#XXX) XXXX", "XXX-XXXX");
}
@Test
public void testCreate_carrier() {
carrier("# XXX XXXX", "# @ XXX XXXX");
carrier("XXX XXXX", "@ XXX XXXX");
// Carrier and national prefix can differ on whether national prefix is needed.
carrier("XXX XXXX", "#@ XXX XXXX");
}
@Test
public void testCreate_national_bad() {
assertThrows(IllegalArgumentException.class, () -> national(""));
assertThrows(IllegalArgumentException.class, () -> national("Hello"));
assertThrows(IllegalArgumentException.class, () -> national("$1"));
assertThrows(IllegalArgumentException.class, () -> national("XX**XX"));
assertThrows(IllegalArgumentException.class, () -> national("****"));
assertThrows(IllegalArgumentException.class, () -> national("@ XXX"));
}
@Test
public void testCreate_international_bad() {
// National prefix is not allowed.
assertThrows(IllegalArgumentException.class, () -> international("#XXXX", "#XXXX"));
// Groups must match.
assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XX XX"));
assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XXX"));
}
@Test
public void testCreate_carrier_bad() {
// Carrier specs must have '@' present.
assertThrows(IllegalArgumentException.class, () -> carrier("XXX XXXX", "XXX XXXX"));
// Carrier specs cannot differ after the first group (including separator).
assertThrows(IllegalArgumentException.class, () -> carrier("#XXX XXXX", "#@XXX-XXXX"));
// National prefix (if present) must come first (if this is ever relaxed, we would need to
// change how carrier prefixes are handled and how nationalPrefixForParsing is generated).
assertThrows(IllegalArgumentException.class, () -> carrier("# XXX XXXX", "@# XXX XXXX"));
}
@Test
public void testTemplate_splitPrefix() {
FormatTemplate t = FormatTemplate.parse("(#) XXX - XXX**");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{3,5})");
assertThat(t.getXmlFormat()).isEqualTo("$1 - $2");
assertThat(t.getXmlPrefix()).hasValue("($NP) $FG");
assertThat(t.hasNationalPrefix()).isTrue();
assertThat(t.hasCarrierCode()).isFalse();
}
@Test
public void testTemplate_noPrefix() {
FormatTemplate t = FormatTemplate.parse("XXX XX-XX");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})");
assertThat(t.getXmlFormat()).isEqualTo("$1 $2-$3");
assertThat(t.getXmlPrefix()).isEmpty();
assertThat(t.hasNationalPrefix()).isFalse();
assertThat(t.hasCarrierCode()).isFalse();
}
@Test
public void testTemplate_replacementNoNationalPrefix() {
FormatTemplate t = FormatTemplate.parse("{XXX>123} XX-XX");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})");
assertThat(t.getXmlFormat()).isEqualTo("$2-$3");
assertThat(t.getXmlPrefix()).hasValue("123 $FG");
assertThat(t.hasNationalPrefix()).isFalse();
assertThat(t.hasCarrierCode()).isFalse();
}
@Test
public void testTemplate_replacementWithNationalPrefix() {
FormatTemplate t = FormatTemplate.parse("#{XXX>123} XX-XX");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})");
assertThat(t.getXmlFormat()).isEqualTo("$2-$3");
assertThat(t.getXmlPrefix()).hasValue("$NP123 $FG");
assertThat(t.hasNationalPrefix()).isTrue();
assertThat(t.hasCarrierCode()).isFalse();
}
@Test
public void testTemplate_replacementNotFirstGroup() {
FormatTemplate t = FormatTemplate.parse("XXX {XX>ABC} XX");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})");
assertThat(t.getXmlFormat()).isEqualTo("$1 ABC $3");
assertThat(t.getXmlPrefix()).isEmpty();
assertThat(t.hasNationalPrefix()).isFalse();
assertThat(t.hasCarrierCode()).isFalse();
}
@Test
public void testTemplate_removeFirstGroupViaReplacement() {
// This test is very important for Argentina, where the leading group must be removed (and a
// different mobile token is used after the area code).
FormatTemplate t = FormatTemplate.parse("{XX>}XXX XXXX");
assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{2})(\\d{3})(\\d{4})");
assertThat(t.getXmlFormat()).isEqualTo("$2 $3");
assertThat(t.getXmlPrefix()).isEmpty();
assertThat(t.hasNationalPrefix()).isFalse();
assertThat(t.hasCarrierCode()).isFalse();
}
private static FormatSpec national(String national) {
return FormatSpec.of(national, empty(), empty(), empty(), false, empty());
}
private static FormatSpec international(String national, String intl) {
return FormatSpec.of(national, empty(), Optional.of(intl), empty(), false, empty());
}
private static FormatSpec carrier(String national, String carrier) {
return FormatSpec.of(national, Optional.of(carrier), empty(), empty(), false, empty());
}
}

+ 70
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/AssignmentTest.java View File

@ -0,0 +1,70 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static org.junit.Assert.assertThrows;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class AssignmentTest {
private static final Column<String> COL_A = Column.ofString("A");
private static final Column<String> COL_B = Column.ofString("B");
private static final Column<Integer> COL_X = Column.ofUnsignedInteger("X");
private static final Schema SCHEMA = Schema.builder().add(COL_A).add(COL_B).add(COL_X).build();
@Test
public void testParsing() {
assertAssignment(Assignment.parse("A=foo", SCHEMA), COL_A, "foo");
assertAssignment(Assignment.parse(" B = bar ", SCHEMA), COL_B, "bar");
assertUnassignment(Assignment.parse("A=", SCHEMA), COL_A);
assertAssignment(Assignment.parse("X=23", SCHEMA), COL_X, 23);
assertThrows(IllegalArgumentException.class, () -> Assignment.parse("C=Nope", SCHEMA));
assertThrows(IllegalArgumentException.class, () -> Assignment.parse("X=NaN", SCHEMA));
}
@Test
public void testOf() {
assertAssignment(Assignment.of(COL_A, "foo"), COL_A, "foo");
assertThat(Assignment.of(COL_A, "foo")).isNotEqualTo(Assignment.of(COL_A, "bar"));
assertThat(Assignment.of(COL_A, "")).isNotEqualTo(Assignment.of(COL_B, ""));
assertThat(Assignment.of(COL_A, COL_A.defaultValue())).isNotEqualTo(Assignment.unassign(COL_A));
assertThrows(NullPointerException.class, () -> Assignment.of(COL_A, null));
}
@Test
public void testUnassign() {
// Not much else to do here...
assertThat(Assignment.unassign(COL_A)).isEqualTo(Assignment.unassign(COL_A));
assertUnassignment(Assignment.unassign(COL_A), COL_A);
}
private static <T extends Comparable<T>> void assertAssignment(
Assignment<?> a, Column<T> c, T v) {
assertThat(a.column()).isSameInstanceAs(c);
assertThat(a.value()).hasValue(v);
}
private static void assertUnassignment(Assignment<?> a, Column<?> c) {
assertThat(a.column()).isSameInstanceAs(c);
assertThat(a.value()).isEmpty();
}
}

+ 71
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ChangeTest.java View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertThrows;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import java.util.Arrays;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class ChangeTest {
private static final Column<String> COL_A = Column.ofString("A");
private static final Column<String> COL_B = Column.ofString("B");
@Test
public void testEmpty() {
assertThat(Change.empty().getRanges()).isEmpty();
assertThat(Change.empty().getAssignments()).isEmpty();
// Not all "no-op" changes are equal to the "empty" change (unlike RangeTree). This should be
// fine however since Changes are expected to have a very short lifecycle in most code and not
// be used as keys in maps etc...
assertThat(Change.empty())
.isNotEqualTo(Change.builder(RangeTree.empty()).assign(COL_A, "foo").build());
assertThat(Change.empty()).isNotEqualTo(Change.builder(ranges("12xxxx")).build());
}
@Test
public void testBuilder() {
Change c = Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_B, "bar").build();
assertThat(c.getRanges()).containsExactly("12xxxx");
Assignment<String> assignFoo = Assignment.of(COL_A, "foo");
Assignment<String> assignBar = Assignment.of(COL_B, "bar");
assertThat(c.getAssignments()).containsExactly(assignFoo, assignBar);
assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(assignFoo, assignBar)));
// Don't allow same column twice (this could be relaxed in future if necessary)!
assertThrows(IllegalArgumentException.class,
() -> Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_A, "bar").build());
}
@Test
public void testBuilderUnassignment() {
Change c = Change.builder(ranges("12xxxx")).unassign(COL_A).build();
Assignment<String> unassign = Assignment.unassign(COL_A);
assertThat(c.getAssignments()).containsExactly(unassign);
assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(unassign)));
}
private static RangeTree ranges(String... rangeSpecs) {
return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse));
}
}

+ 58
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ColumnGroupTest.java View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;
import com.google.common.collect.ImmutableSet;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class ColumnGroupTest {
@Test
public void testGroupColumns() {
Column<Boolean> prototype = Column.ofBoolean("Region");
ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype);
Column<Boolean> us = group.getColumnFromId("US");
assertThat(us.getName()).isEqualTo("Region:US");
assertThat(us.type()).isEqualTo(Boolean.class);
Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA"));
assertThat(ca.getName()).isEqualTo("Region:CA");
// Only the suffix part should be given to get the column from the group.
assertThrows(IllegalArgumentException.class, () -> group.getColumnFromId("Region:US"));
}
@Test
public void testExtractGroupColumns() {
Column<String> first = Column.ofString("FirstColumn");
Column<String> last = Column.ofString("LastColumn");
Column<Boolean> prototype = Column.ofBoolean("Region");
ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype);
Column<Boolean> us = group.getColumnFromId("US");
Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA"));
// The prototype is a valid column, but it's not part of its own group.
assertThat(group.extractGroupColumns(ImmutableSet.of(first, us, prototype, ca, last)))
.containsExactly(PhoneRegion.of("US"), us, PhoneRegion.of("CA"), ca).inOrder();
}
}

+ 93
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/ColumnTest.java View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN;
import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN;
import static java.lang.Boolean.FALSE;
import static java.lang.Boolean.TRUE;
import static org.junit.Assert.assertThrows;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class ColumnTest {
@Test
public void testBooleanColumn() {
Column<Boolean> column = Column.ofBoolean("bool");
assertThat(column.getName()).isEqualTo("bool");
assertThat(column.type()).isEqualTo(Boolean.class);
assertThat(column.cast(true)).isTrue();
assertThrows(ClassCastException.class, () -> column.cast(""));
// All upper or all lower case are accepted.
assertThat(column.parse("true")).isTrue();
assertThat(column.parse("false")).isFalse();
assertThat(column.parse("TRUE")).isTrue();
assertThat(column.parse("FALSE")).isFalse();
assertThat(column.serialize(TRUE)).isEqualTo("true");
assertThat(column.serialize(FALSE)).isEqualTo("false");
// We're lenient, but not that lenient.
assertThrows(IllegalArgumentException.class, () -> column.parse("TruE"));
assertThrows(IllegalArgumentException.class, () -> column.parse("FaLse"));
assertThrows(IllegalArgumentException.class, () -> Column.ofBoolean("Foo:Bar"));
}
@Test
public void testStringColumn() {
Column<String> column = Column.ofString("string");
assertThat(column.getName()).isEqualTo("string");
assertThat(column.type()).isEqualTo(String.class);
assertThat(column.cast("hello")).isEqualTo("hello");
assertThat(column.parse("")).isNull();
assertThrows(ClassCastException.class, () -> column.cast(true));
// Anything other than the empty string is permitted.
assertThat(column.parse("world")).isEqualTo("world");
assertThat(column.serialize("world")).isEqualTo("world");
// Unquoted whitespace is stripped.
assertThat(column.parse(" world ")).isEqualTo("world");
// You can preserve whitespace by surrounding the string with double quotes.
assertThat(column.parse("\" world \"")).isEqualTo(" world ");
assertThat(column.serialize(" world ")).isEqualTo("\" world \"");
// And null is always the empty string.
assertThat(column.serialize(null)).isEqualTo("");
assertThrows(IllegalArgumentException.class, () -> Column.ofString("Foo:Bar"));
}
@Test
public void testEnumColumn() {
Column<ValidNumberType> column = Column.of(ValidNumberType.class, "type", UNKNOWN);
assertThat(column.getName()).isEqualTo("type");
assertThat(column.type()).isEqualTo(ValidNumberType.class);
assertThat(column.cast(FIXED_LINE)).isEqualTo(FIXED_LINE);
assertThrows(ClassCastException.class, () -> column.cast(""));
// Several case formats are supported.
assertThat(column.parse("FIXED_LINE")).isEqualTo(FIXED_LINE);
assertThat(column.parse("fixed_line")).isEqualTo(FIXED_LINE);
assertThat(column.parse("fixedLine")).isEqualTo(FIXED_LINE);
// We're lenient, but not that lenient.
assertThrows(IllegalArgumentException.class, () -> column.parse("fIxEdLiNe"));
assertThrows(IllegalArgumentException.class,
() -> Column.of(XmlNumberType.class, "Foo:Bar", XML_UNKNOWN));
}
}

+ 177
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/CsvParserTest.java View File

@ -0,0 +1,177 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper;
import static org.junit.Assert.assertThrows;
import com.google.common.collect.ImmutableMap;
import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class CsvParserTest {
@Test
public void testSimple() {
// Simplest case.
assertSingleRow(CsvParser.commaSeparated(), "Hello,World!", "Hello", "World!");
// Empty row yields one empty value in the "first column" (matches behaviour with quoting).
assertSingleRow(CsvParser.commaSeparated(), "", "");
assertSingleRow(CsvParser.commaSeparated(), "\"\"", "");
// Trailing delimiter yields a trailing empty value (matches behaviour with quoting).
assertSingleRow(CsvParser.commaSeparated(), "foo,", "foo", "");
assertSingleRow(CsvParser.commaSeparated(), "foo,\"\"", "foo", "");
}
@Test
public void testOtherDelimiters() {
// Tabs sequences are not "folded" (maybe this could be an option?)
assertSingleRow(CsvParser.tabSeparated(), "Hello\t\tWorld!", "Hello", "", "World!");
assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!");
}
@Test
public void testWhitespaceTrimming() {
// Whitespace is preserved by default, but can be trimmed.
assertSingleRow(CsvParser.commaSeparated(),
" foo, bar, baz ", " foo", " bar", " baz ");
assertSingleRow(CsvParser.commaSeparated().trimWhitespace(),
" foo, bar, baz ", "foo", "bar", "baz");
assertSingleRow(CsvParser.commaSeparated().trimWhitespace(),
" foo, , ", "foo", "", "");
}
@Test
public void testQuoting() {
// Quoting works as expected (and combines with whitespace trimming).
assertSingleRow(CsvParser.commaSeparated(),
"\"foo\",\"\"\"bar, baz\"\"\"", "foo", "\"bar, baz\"");
assertSingleRow(CsvParser.commaSeparated().trimWhitespace(),
" \"foo\" , \"\"\"bar, baz\"\"\" ", "foo", "\"bar, baz\"");
}
@Test
public void testQuoting_illegal() {
// Without whitespace trimming any quotes in "unquoted" values are not permitted.
assertThrows(IllegalArgumentException.class, () ->
parse(CsvParser.commaSeparated(), "foo, \"bar, baz\""));
}
@Test
public void testDelimiter() {
assertSingleRow(CsvParser.tabSeparated(), "Hello\tWorld!", "Hello", "World!");
assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!");
}
@Test
public void testUnicode() {
assertSingleRow(CsvParser.withSeparator('-'), "😱-😂-💩", "😱", "😂", "💩");
assertSingleRow(CsvParser.commaSeparated(), "\0,😱😂,\n", "\0", "😱😂", "\n");
// Fun fact, not all ISO control codes count as "whitespace".
assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), "\0,😱😂,\n", "\0", "😱😂", "");
}
@Test
public void testMultiline() {
// Newlines become literals in quoted values.
List<List<String>> rows = parse(CsvParser.commaSeparated().allowMultiline(),
"foo,\"Hello,",
"World!\"");
assertThat(rows).hasSize(1);
assertThat(rows.get(0)).containsExactly("foo", "Hello,\nWorld!").inOrder();
}
@Test
public void testMultilineWithTrimming() {
List<List<String>> rows = parse(
CsvParser.commaSeparated().allowMultiline().trimWhitespace(),
" foo , \" Hello,",
"World! \" ");
assertThat(rows).hasSize(1);
assertThat(rows.get(0)).containsExactly("foo", " Hello,\nWorld! ").inOrder();
}
@Test
public void testMultiline_illegal() {
// If not configured for multiline values, this is an unterminated quoted value.
assertThrows(IllegalArgumentException.class, () ->
parse(CsvParser.commaSeparated(), "foo,\"Hello,", "World!\""));
// This fails because no more lines exist (even if multiline is allowed)
assertThrows(IllegalArgumentException.class, () ->
parse(CsvParser.commaSeparated().allowMultiline(), "foo,\"Hello,"));
}
@Test
public void testRowMapping() {
List<ImmutableMap<String, String>> rows = parseMap(
CsvParser.commaSeparated(),
rowMapper(),
"FOO,BAR",
"foo,bar",
"Hello,World!",
"No Trailing,",
",",
"");
assertThat(rows).hasSize(5);
assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder();
assertThat(rows.get(1)).containsExactly("FOO", "Hello", "BAR", "World!").inOrder();
assertThat(rows.get(2)).containsExactly("FOO", "No Trailing").inOrder();
assertThat(rows.get(3)).isEmpty();
assertThat(rows.get(4)).isEmpty();
}
@Test
public void testRowMapping_withHeader() {
List<String> header = new ArrayList<>();
List<ImmutableMap<String, String>> rows = parseMap(
CsvParser.commaSeparated(),
rowMapper(header::addAll),
"FOO,BAR",
"foo,bar");
assertThat(rows).hasSize(1);
assertThat(header).containsExactly("FOO", "BAR").inOrder();
assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder();
}
private void assertSingleRow(CsvParser parser, String line, String... values) {
List<List<String>> rows = parse(parser, line);
assertThat(rows).hasSize(1);
assertThat(rows.get(0)).containsExactlyElementsIn(values).inOrder();
}
private static List<List<String>> parse(CsvParser parser, String... lines) {
List<List<String>> rows = new ArrayList<>();
parser.parse(Stream.of(lines), r -> rows.add(r.collect(toImmutableList())));
return rows;
}
private static List<ImmutableMap<String, String>> parseMap(
CsvParser p, RowMapper mapper, String... lines) {
List<ImmutableMap<String, String>> rows = new ArrayList<>();
p.parse(Stream.of(lines), mapper.mapTo(rows::add));
return rows;
}
}

+ 275
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/CsvTableTest.java View File

@ -0,0 +1,275 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.AREA_CODE_LENGTH;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.COMMENT;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE_OR_MOBILE;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.MOBILE;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.FORMAT;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.REGIONS;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TABLE_COLUMNS;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TYPE;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toCsv;
import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toRangeTable;
import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.ALL;
import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.CHANGES;
import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.LHS;
import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.RHS;
import static org.junit.Assert.assertThrows;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Table;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema;
import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Optional;
import java.util.stream.IntStream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class CsvTableTest {
private static final CsvKeyMarshaller<String> TEST_MARSHALLER =
CsvKeyMarshaller.ofSortedString("Id");
private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA"));
private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US"));
@Test
public void testRangeTableExport() throws IOException {
ImmutableList<Column<?>> columns =
ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_CA, REGION_US, COMMENT);
RangeTable table = RangeTable.builder(TABLE_COLUMNS)
.apply(row(columns, key("1", 7), MOBILE, 0, true, true))
.apply(row(columns, key("2x[34]", 7, 8), FIXED_LINE_OR_MOBILE, 0, true, null, "Foo Bar"))
.apply(row(columns, key("345", 8), FIXED_LINE, 3, true, null))
.apply(row(columns, key("456x8", 8), FIXED_LINE, 3, null, true))
.build();
CsvTable<RangeKey> csv = toCsv(table);
assertCsv(csv,
"Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment",
"1 ; 7 ; MOBILE ; 0 ; \"CA,US\"",
"2x[34] ; 7,8 ; FIXED_LINE_OR_MOBILE ; 0 ; \"CA\" ; \"Foo Bar\"",
"345 ; 8 ; FIXED_LINE ; 3 ; \"CA\"",
"456x8 ; 8 ; FIXED_LINE ; 3 ; \"US\"");
assertThat(toRangeTable(csv)).isEqualTo(table);
}
@Test
public void testExampleNumberExport() throws IOException {
Table<PhoneRegion, ValidNumberType, DigitSequence> table = HashBasedTable.create();
table.put(PhoneRegion.of("US"), ValidNumberType.TOLL_FREE, DigitSequence.of("800123456"));
table.put(PhoneRegion.of("US"), ValidNumberType.PREMIUM_RATE, DigitSequence.of("945123456"));
table.put(PhoneRegion.of("CA"), ValidNumberType.MOBILE, DigitSequence.of("555123456"));
// Ordering is well defined in the CSV output.
// TODO: Consider making columns able to identify if their values need CSV escaping.
CsvTable<ExampleNumberKey> csv = ExamplesTableSchema.toCsv(table);
assertCsv(csv,
"Region ; Type ; Number",
"CA ; MOBILE ; \"555123456\"",
"US ; TOLL_FREE ; \"800123456\"",
"US ; PREMIUM_RATE ; \"945123456\"");
assertThat(ExamplesTableSchema.toExampleTable(csv)).isEqualTo(table);
}
@Test
public void testDiff() throws IOException {
ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT);
RangeTable lhs = RangeTable.builder(TABLE_COLUMNS)
.apply(row(columns, key("1", 6), "Left Side Only"))
.apply(row(columns, key("3", 6), "Left Value"))
.apply(row(columns, key("4", 6), "Same Value"))
.build();
RangeTable rhs = RangeTable.builder(TABLE_COLUMNS)
.apply(row(columns, key("2", 6), "Right Side Only"))
.apply(row(columns, key("3", 6), "Right Value"))
.apply(row(columns, key("4", 6), "Same Value"))
.build();
assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), ALL),
"Diff ; Prefix ; Length ; Comment",
"---- ; 1 ; 6 ; \"Left Side Only\"",
"++++ ; 2 ; 6 ; \"Right Side Only\"",
"<<<< ; 3 ; 6 ; \"Left Value\"",
">>>> ; 3 ; 6 ; \"Right Value\"",
"==== ; 4 ; 6 ; \"Same Value\"");
assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), CHANGES),
"Diff ; Prefix ; Length ; Comment",
"---- ; 1 ; 6 ; \"Left Side Only\"",
"++++ ; 2 ; 6 ; \"Right Side Only\"",
"<<<< ; 3 ; 6 ; \"Left Value\"",
">>>> ; 3 ; 6 ; \"Right Value\"");
assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), LHS),
"Diff ; Prefix ; Length ; Comment",
"---- ; 1 ; 6 ; \"Left Side Only\"",
"<<<< ; 3 ; 6 ; \"Left Value\"",
"==== ; 4 ; 6 ; \"Same Value\"");
assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), RHS),
"Diff ; Prefix ; Length ; Comment",
"++++ ; 2 ; 6 ; \"Right Side Only\"",
">>>> ; 3 ; 6 ; \"Right Value\"",
"==== ; 4 ; 6 ; \"Same Value\"");
}
@Test
public void testEscaping() throws IOException {
ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT);
RangeTable table = RangeTable.builder(TABLE_COLUMNS)
.apply(row(columns, key("1", 6), "Doubling \" Double Quotes"))
.apply(row(columns, key("2", 6), "Escaping \n Newlines"))
.apply(row(columns, key("3", 6), "Other \t \\ \r Escaping"))
.build();
assertCsv(toCsv(table),
"Prefix ; Length ; Comment",
"1 ; 6 ; \"Doubling \"\" Double Quotes\"",
"2 ; 6 ; \"Escaping \\n Newlines\"",
"3 ; 6 ; \"Other \\t \\\\ \\r Escaping\"");
}
@Test
public void testOrdering() throws IOException {
// This came up in relation to discovering that ImmutableSet.copyOf(TreeBasedTable) does not
// result in rows/columns in the order of the TreeBasedTable's column comparator. Hence the
// code does a copy via a temporary ImmutableTable.Builder.
ImmutableList<Column<?>> columns =
ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_US, COMMENT);
RangeTable table = RangeTable.builder(TABLE_COLUMNS)
.apply(row(columns, key("1", 4), null, null, null, "Foo Bar"))
.apply(row(columns, key("2", 4), null, null, true))
.apply(row(columns, key("3", 4), null, 2))
.apply(row(columns, key("4", 4), MOBILE))
.build();
CsvTable<RangeKey> csv = toCsv(table);
assertCsv(
csv,
"Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment",
"1 ; 4 ; ; ; ; \"Foo Bar\"",
"2 ; 4 ; ; ; \"US\"",
"3 ; 4 ; ; 2",
"4 ; 4 ; MOBILE");
assertThat(toRangeTable(csv)).isEqualTo(table);
}
// This is (Jan 2019) currently impossible using ImmutableTable.
@Test
public void testOptionalRowOrdering() throws IOException {
CsvKeyMarshaller<Integer> unorderedIntegerMarshaller =
new CsvKeyMarshaller<>(
n -> IntStream.of(n).boxed().map(Object::toString),
p -> Integer.parseInt(p.get(0)),
Optional.empty(),
"Unordered");
CsvSchema<Integer> schema =
CsvSchema.of(unorderedIntegerMarshaller, RangesTableSchema.SCHEMA.columns());
CsvTable.Builder<Integer> csv = CsvTable.builder(schema);
csv.putRow(4, ImmutableMap.of(COMMENT, "Foo Bar"));
csv.putRow(1, ImmutableMap.of(FORMAT, "Quux"));
csv.putRow(3, ImmutableMap.of(AREA_CODE_LENGTH, 2));
csv.putRow(2, ImmutableMap.of(TYPE, MOBILE));
assertCsv(
csv.build(),
"Unordered ; Type ; Area Code Length ; Format ; Comment",
"4 ; ; ; ; \"Foo Bar\"",
"1 ; ; ; \"Quux\"",
"3 ; ; 2",
"2 ; MOBILE");
}
@Test
public void testUnsafeString() {
Column<String> unsafe = Column.ofString("unsafe");
CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build());
CsvTable<String> csv =
CsvTable.builder(schema).put("key", unsafe, "Control chars Not \0 Allowed").build();
assertThrows(IllegalArgumentException.class, () -> export(csv, false));
}
private enum Perverse {
UNSAFE_VALUE() {
@Override
public String toString() {
return "Unsafe ; for \n \"CSV\"";
}
};
}
@Test
public void testPerverseEdgeCase() {
Column<Perverse> unsafe = Column.of(Perverse.class, "Unsafe", Perverse.UNSAFE_VALUE);
CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build());
CsvTable<String> csv =
CsvTable.builder(schema).put("key", unsafe, Perverse.UNSAFE_VALUE).build();
assertThrows(IllegalArgumentException.class, () -> export(csv, false));
}
private static <K> void assertCsv(CsvTable<K> csv, String... lines) throws IOException {
String aligned = join(lines);
// Assumes test values don't contain semi-colons where space matters.
String unaligned = aligned.replaceAll(" *; *", ";");
String exported = export(csv, true);
assertThat(exported).isEqualTo(aligned);
assertThat(export(csv, false)).isEqualTo(unaligned);
CsvTable<K> imported = CsvTable.importCsv(csv.getSchema(), new StringReader(exported));
assertThat(csv).isEqualTo(imported);
}
private static String export(CsvTable<?> csv, boolean align) {
StringWriter out = new StringWriter();
csv.exportCsv(new PrintWriter(out), align);
return out.toString();
}
private static Change row(ImmutableList<Column<?>> columns, RangeKey key, Object... values) {
Change.Builder row = Change.builder(key.asRangeTree());
checkArgument(values.length <= columns.size());
int n = 0;
for (Object v : values) {
if (v != null) {
Column<?> c = columns.get(n);
row.assign(c, c.cast(v));
}
n++;
}
return row.build();
}
private static String join(String... lines) {
return String.join("\n", lines) + "\n";
}
private static RangeKey key(String spec, Integer... lengths) {
RangeSpecification prefix =
spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec);
return RangeKey.create(prefix, ImmutableSet.copyOf(lengths));
}
}

+ 132
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/RangeKeyTest.java View File

@ -0,0 +1,132 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.PrefixTree;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class RangeKeyTest {
@Test
public void testEmpty() {
ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.empty());
assertThat(keys).isEmpty();
}
@Test
public void testZeroLengthMatch() {
ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.from(RangeSpecification.empty()));
assertThat(keys).containsExactly(key("", 0));
}
@Test
public void testOnlyAnyPath() {
ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("xxx", "xxxx", "xxxxx"));
assertThat(keys).containsExactly(key("", 3, 4, 5));
}
@Test
public void testSimple() {
ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxx", "123xxxx", "123xxxxx"));
assertThat(keys).containsExactly(key("123", 6, 7, 8));
}
@Test
public void testEmbeddedRanges() {
ImmutableList<RangeKey> keys =
RangeKey.decompose(ranges("1x", "1xx", "1xx23", "1xx23x", "1xx23xx"));
assertThat(keys).containsExactly(key("1", 2, 3), key("1xx23", 5, 6, 7)).inOrder();
}
@Test
public void testSplitFactors() {
ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxxx", "1234x", "1234xx"));
// If the input wasn't "factored" first, this would result in:
// key("123[0-35-9]", 7), key("1234", 5, 6, 7)
assertThat(keys).containsExactly(key("123", 7), key("1234", 5, 6)).inOrder();
}
@Test
public void testMergeStrategy() {
ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("12[0-4]xxx", "12xxx", "12xx"));
// The merge strategy for factorizing the tree will prefer to keep the longer paths intact
// and split shorter paths around it. Using the other strategy we would get:
// key("12", 4, 5), key("12[0-4]", 6)
assertThat(keys).containsExactly(key("12[0-4]", 4, 5, 6), key("12[5-9]", 4, 5)).inOrder();
}
@Test
public void testAsRangeSpecifications() {
assertThat(key("", 3, 4, 5).asRangeSpecifications())
.containsExactly(spec("xxx"), spec("xxxx"), spec("xxxxx")).inOrder();
assertThat(key("1[2-4]", 3, 4, 5).asRangeSpecifications())
.containsExactly(spec("1[2-4]x"), spec("1[2-4]xx"), spec("1[2-4]xxx")).inOrder();
assertThat(key("1x[468]", 3, 5, 7).asRangeSpecifications())
.containsExactly(spec("1x[468]"), spec("1x[468]xx"), spec("1x[468]xxxx")).inOrder();
}
@Test
public void testSimpleRealWorldData() {
// From ITU German numbering plan, first few fixed line ranges.
PrefixTree prefixes =
PrefixTree.from(ranges("20[1-389]", "204[135]", "205[1-468]", "206[4-6]", "20[89]"));
RangeTree ranges = prefixes.retainFrom(
ranges("xxxxxx", "xxxxxxx", "xxxxxxxx", "xxxxxxxxx", "xxxxxxxxxx", "xxxxxxxxxxx"));
ImmutableList<RangeKey> keys = RangeKey.decompose(ranges);
assertThat(keys).containsExactly(
key("20[1-389]", 6, 7, 8, 9, 10, 11),
key("204[135]", 6, 7, 8, 9, 10, 11),
key("205[1-468]", 6, 7, 8, 9, 10, 11),
key("206[4-6]", 6, 7, 8, 9, 10, 11))
.inOrder();
}
@Test
public void testContains() {
RangeKey key = key("1[23]", 7, 8, 9);
assertThat(key.contains(digitSequence("12"), 8)).isTrue();
assertThat(key.contains(digitSequence("12"), 10)).isFalse();
assertThat(key.contains(digitSequence("7"), 8)).isFalse();
}
private static RangeKey key(String spec, Integer... lengths) {
RangeSpecification prefix =
spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec);
return RangeKey.create(prefix, ImmutableSet.copyOf(lengths));
}
private static RangeTree ranges(String... spec) {
return RangeTree.from(Stream.of(spec).map(RangeSpecification::parse));
}
private static RangeSpecification spec(String spec) {
return RangeSpecification.parse(spec);
}
private static DigitSequence digitSequence(String spec) {
return DigitSequence.of(spec);
}
}

+ 412
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/RangeTableTest.java View File

@ -0,0 +1,412 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.truth.Truth.assertThat;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN;
import static com.google.i18n.phonenumbers.metadata.testing.RangeTableSubject.assertThat;
import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat;
import static java.util.stream.IntStream.rangeClosed;
import static org.junit.Assert.assertThrows;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Table;
import com.google.common.collect.Table.Cell;
import com.google.common.collect.Tables;
import com.google.i18n.phonenumbers.metadata.PrefixTree;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import java.util.Arrays;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class RangeTableTest {
private static final Column<ValidNumberType> TYPE =
Column.of(ValidNumberType.class, "Type", UNKNOWN);
public static final Column<Integer> AREA_CODE_LENGTH = Column.ofUnsignedInteger("AreaCodeLength");
private static final ColumnGroup<PhoneRegion, Boolean> REGIONS =
ColumnGroup.byRegion(Column.ofBoolean("Region"));
private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US"));
private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA"));
private static final Schema SCHEMA =
Schema.builder().add(TYPE).add(AREA_CODE_LENGTH).add(REGIONS).build();
// This is essentially the most "extreme" simplification you can have. All detail is removed and
// lengths are merged into a contiguous range. It's basically like turning a range into "\d{n,m}"
// For example, { "123", "12345" } becomes { "xxx", "xxxx", "xxxxx" }.
private static final Function<Change, RangeTree> EXTREME_SIMPLIFICATION =
c -> RangeTree.from(
rangeClosed(c.getRanges().getLengths().first(), c.getRanges().getLengths().last())
.mapToObj(RangeSpecification::any));
@Test
public void testEmptyMap() {
RangeTable table = RangeTable.builder(SCHEMA).build();
assertThat(table).isEmpty();
}
@Test
public void testBasicAssign() {
RangeTable.Builder table = RangeTable.builder(SCHEMA);
table.assign(TYPE, MOBILE, ranges("1[234]xxxx"), OverwriteMode.ALWAYS);
assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[234]xxxx"));
table.assign(TYPE, null, ranges("13xxxx"), OverwriteMode.ALWAYS);
assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[24]xxxx"));
Assignment<ValidNumberType> fixedLine = Assignment.of(TYPE, FIXED_LINE);
// Overwrite an existing range.
table.assign(fixedLine, ranges("14xxxx"), OverwriteMode.ALWAYS);
assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx"));
assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("14xxxx"));
// Partially overwrite an existing range (same value).
table.assign(fixedLine, ranges("1[34]xxxx"), OverwriteMode.SAME);
assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx"));
assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[34]xxxx"));
// Fail to overwrite range with a different value in "SAME" mode.
assertThrows(IllegalArgumentException.class,
() -> table.assign(fixedLine, ranges("1[23]xxxx"), OverwriteMode.SAME));
// Add new ranges (but never overwriting).
table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER);
assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx"));
assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[3-5]xxxx"));
// Fail to write ranges with the same value in "NEVER" mode.
assertThrows(IllegalArgumentException.class,
() -> table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER));
// Unassignment (null value) makes no sense for modes other than "ALWAYS".
// TODO: This highlights the way this API is bad, make a separate "unassign" method.
assertThrows(IllegalArgumentException.class,
() -> table.assign(TYPE, null, ranges("123"), OverwriteMode.SAME));
assertThrows(IllegalArgumentException.class,
() -> table.assign(TYPE, null, ranges("123"), OverwriteMode.NEVER));
}
@Test
public void testApplyChanges() {
// Changes ordered top-to-bottom.
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(
ranges("[18]2xxxxx"), ImmutableMap.of(TYPE, MOBILE, AREA_CODE_LENGTH, 3)))
.apply(assign(ranges("7xxxxxx"), TYPE, MOBILE))
.apply(assign(ranges("[1-3]xxxxxx"), TYPE, FIXED_LINE))
.build();
// The union of all the ranges.
assertThat(table).allRanges().containsExactly("[1-37]xxxxxx", "82xxxxx");
// The ranges assigned for various columns.
assertThat(table).assigned(TYPE).containsExactly("[1-37]xxxxxx", "82xxxxx");
assertThat(table).assigned(AREA_CODE_LENGTH).containsExactly("[18]2xxxxx");
// Note that the 12xxxxx range is replaced by the fixed line in the type map.
assertThat(table).assigned(TYPE, FIXED_LINE).containsExactly("[1-3]xxxxxx");
assertThat(table).assigned(TYPE, MOBILE).containsExactly("7xxxxxx", "82xxxxx");
// Area code length unaffected by update of the 12xxxxx range (only type was affected).
assertThat(table).assigned(AREA_CODE_LENGTH, 3).containsExactly("[18]2xxxxx");
}
@Test
public void testBareRangeAddition() {
RangeTable table = RangeTable.builder(SCHEMA)
.add(ranges("1xxxxx"))
.apply(assign(ranges("12xxxx"), TYPE, MOBILE))
.build();
assertThat(table).allRanges().containsExactly("1xxxxx");
// Note that there is not "getUnassignedRanges()" on RangeTable (yet), so we fudge it by
// checking that there's only one column and looking at all the assigned ranges in it.
assertThat(table).hasColumns(TYPE);
assertThat(table).assigned(TYPE).containsExactly("12xxxx");
// Also check that the re-built builder remembers the unassigned ranges.
RangeTable.Builder builder = table.toBuilder();
assertThat(builder.getAllRanges()).containsExactly("1xxxxx");
assertThat(builder.getAssignedRanges(TYPE)).containsExactly("12xxxx");
}
@Test
public void testAssignAndUnassign() {
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(ranges("1xxxxx"), TYPE, MOBILE))
.apply(unassign(ranges("1[0-4]xxxx"), TYPE))
.build();
assertThat(table).allRanges().containsExactly("1xxxxx");
assertThat(table).hasColumns(TYPE);
assertThat(table).assigned(TYPE).containsExactly("1[5-9]xxxx");
// Also check that the re-built builder remembers the unassigned ranges.
RangeTable.Builder builder = table.toBuilder();
assertThat(builder.getAllRanges()).containsExactly("1xxxxx");
assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[5-9]xxxx");
}
@Test
public void testAssignAndRemove() {
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(ranges("1xxxxx"), TYPE, MOBILE))
.remove(ranges("1[5-9]xxxx"))
.build();
assertThat(table).allRanges().containsExactly("1[0-4]xxxx");
assertThat(table).hasColumns(TYPE);
assertThat(table).assigned(TYPE).containsExactly("1[0-4]xxxx");
RangeTable.Builder builder = table.toBuilder();
assertThat(builder.getAllRanges()).containsExactly("1[0-4]xxxx");
assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[0-4]xxxx");
}
@Test
public void testTableImportExport() {
RangeTable original = RangeTable.builder(SCHEMA)
.apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE))
.apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3))
.apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2))
.build();
Table<RangeSpecification, Column<?>, Optional<?>> exported = original.toImmutableTable();
assertThat(exported).hasSize(8);
assertThat(exported).containsCell(assigned("1xxxxxx", TYPE, MOBILE));
assertThat(exported).containsCell(assigned("1xxxxxx", AREA_CODE_LENGTH, 3));
assertThat(exported).containsCell(assigned("2xxxxxx", TYPE, FIXED_LINE));
assertThat(exported).containsCell(assigned("2xxxxxx", AREA_CODE_LENGTH, 2));
assertThat(exported).containsCell(assigned("3xxxxxx", TYPE, MOBILE));
assertThat(exported).containsCell(assigned("3xxxxxx", AREA_CODE_LENGTH, 2));
assertThat(exported).containsCell(assigned("4xxxxxx", TYPE, FIXED_LINE));
assertThat(exported).containsCell(assigned("4xxxxxx", AREA_CODE_LENGTH, 3));
RangeTable imported = RangeTable.from(SCHEMA, exported);
assertThat(imported).isEqualTo(original);
assertThat(imported.toImmutableTable()).isEqualTo(exported);
}
@Test
public void testColumnGroupMapping() {
// Changes ordered top-to-bottom.
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(ranges("1xxxxx"), ImmutableMap.of(REGION_US, true)))
.apply(assign(ranges("2xxxxx"), ImmutableMap.of(REGION_CA, true)))
.apply(assign(ranges("3xxxxx"), ImmutableMap.of(REGION_US, true, REGION_CA, true)))
.build();
// The union of all the ranges.
assertThat(table).allRanges().containsExactly("[1-3]xxxxx");
Map<PhoneRegion, Column<Boolean>> regionMap = REGIONS.extractGroupColumns(table.getColumns());
assertThat(regionMap.keySet()).containsExactly(PhoneRegion.of("US"), PhoneRegion.of("CA"));
assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("US")))).containsExactly("[13]xxxxx");
assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("CA")))).containsExactly("[23]xxxxx");
// If a column in a group is not present, it counts as having no ranges, but if a plain column
// is not in the schema at all, it's an error.
assertThat(table.getAssignedRanges(REGIONS.getColumn(PhoneRegion.of("CH")))).isEmpty();
Column<String> bogus = Column.ofString("Bogus");
assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(bogus));
Column<String> nope = ColumnGroup.byRegion(bogus).getColumn(PhoneRegion.of("US"));
assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(nope));
}
@Test
public void testSubTable() {
RangeTable original = RangeTable.builder(SCHEMA)
.apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE))
.apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3))
.apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2))
.build();
// Restrict to the ranges in which area code length is 2, but keep only the type column.
RangeTable subTable = original.subTable(original.getRanges(AREA_CODE_LENGTH, 2), TYPE);
assertThat(subTable).hasColumns(TYPE);
assertThat(subTable).hasRowCount(2);
assertThat(subTable).hasRanges("2xxxxxx", FIXED_LINE);
assertThat(subTable).hasRanges("3xxxxxx", MOBILE);
}
@Test
public void testGetPrefixMap() {
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(ranges("1234xxxx", "1256xxxx"), TYPE, MOBILE))
.apply(assign(ranges("1236xxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE))
.apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE))
.build();
ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 0);
assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("1234", "125")));
assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("1236")));
// The ranges 4xxxx and 49xxxx overlap (since 49 is a prefix for both) and the prefix map
// contains the shortest unique prefix for each range. The mapping from TOLL_FREE could not
// contain only "4[0-8]" since that would not match "49123". Overlapping range lengths with
// different types is thus highly problematic, but the prefix map will contain mappings for
// both, and it's up to the caller to handle this, possibly by ordering any checks made.
assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4")));
assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49")));
}
@Test
public void testGetPrefixMap_minLength() {
RangeTable table = RangeTable.builder(SCHEMA)
.apply(assign(ranges("123xxxxx", "1256xxxx"), TYPE, MOBILE))
.apply(assign(ranges("124xxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE))
.apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE))
.build();
ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 3);
assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("12[35]")));
assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("124")));
assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4")));
assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49")));
}
@Test
public void testSimplify_multipleColumns() {
RangeTable table = RangeTable.builder(SCHEMA)
// This can't be simplified since expanding any of the area code length ranges will overlap
// (possibly with the unassigned area code length ranges).
.apply(assign(ranges("1[0-4]x_xxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("12x_xxxx"), AREA_CODE_LENGTH, 2))
.apply(assign(ranges("123_xxxx"), AREA_CODE_LENGTH, 3))
.apply(assign(ranges("123_4xxx"), AREA_CODE_LENGTH, 4))
// This can be simplified since it expands into "empty" ranges.
.apply(assign(ranges("156_xxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("156_xxxx"), AREA_CODE_LENGTH, 3))
.apply(assign(ranges("234_xxxx"), TYPE, MOBILE))
// This should be ignored since simplification happens only on the other columns.
.apply(assign(ranges("[12]23_xxxx"), REGION_CA, true))
.build();
RangeTable simplified =
table.simplify(c -> c.getRanges().significantDigits(2), 0, TYPE, AREA_CODE_LENGTH);
assertThat(simplified).hasColumns(TYPE, AREA_CODE_LENGTH);
// The 156 range got pulled back to 2 digits (the other was already 2 digits).
assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("1[0-4]x_xxxx", "15x_xxxx");
// The 234 range got pulled back to 2 digits.
assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("23x_xxxx");
assertThat(simplified).assigned(AREA_CODE_LENGTH, 2).containsExactly("12[0-24-9]_xxxx");
// The 123 ranges were preserved, but the 156 range was pulled back to 2 digits.
assertThat(simplified).assigned(AREA_CODE_LENGTH, 3)
.containsExactly("123_[0-35-9]xxx", "15x_xxxx");
assertThat(simplified).assigned(AREA_CODE_LENGTH, 4).containsExactly("123_4xxx");
}
@Test
public void testSimplify_chineseRanges() {
// This mimics real data found in the CN regular expression whereby a SHARED_COST range
// partially overlaps with the fixed line prefixes.
RangeTable table = RangeTable.builder(SCHEMA)
// The pattern is:
// abc | length=10 | FIXED_LINE
// abc100 | length=8 | FIXED_LINE
// abc95 | length=8,9 | FIXED_LINE
// abc96 | length=8,9 | SHARED_COST
.apply(assign(ranges("123_xxx_xxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("123_100xx"), TYPE, FIXED_LINE))
.apply(assign(ranges("123_95xxx", "123_95xxxx"), TYPE, FIXED_LINE))
.apply(assign(ranges("123_96xxx", "123_96xxxx"), TYPE, SHARED_COST))
// Just add a range that sits "either side" of what's being simplified to ensure it
// doesn't "leak".
.apply(assign(ranges("1[13]4_56xx_xxxx"), TYPE, MOBILE))
.build();
RangeTable simplified = table.simplify(c -> c.getRanges().significantDigits(3), 0, TYPE);
// The simplification function just takes the first 3 significant digits. If the "shared cost"
// ranges were not overlapping, this would result in a "fixed line" range of "123xxx..." with
// lengths 8,9,10. However to avoid corrupting the shared cost range, we end up with:
// abc | length=10 | FIXED_LINE
// abc[0-8] | length=8,9 | FIXED_LINE
// abc9[0-57-9] | length=8,9 | FIXED_LINE
// abc96 | length=8,9 | SHARED_COST
assertThat(simplified).hasColumns(TYPE);
assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly(
"123_xxx_xxxx",
"123_[0-8]xx_xx",
"123_[0-8]xx_xxx",
"123_9[0-57-9]x_xx",
"123_9[0-57-9]x_xxx");
assertThat(simplified).assigned(TYPE, SHARED_COST).containsExactly(
"123_96x_xx",
"123_96x_xxx");
assertThat(simplified).assigned(TYPE, MOBILE).containsExactly(
"1[13]4_xxxx_xxxx");
}
@Test
public void testSimplify_overlappingCheck() {
Schema shortcodeSchema = Schema.builder().add(TYPE).build();
RangeTable table = RangeTable.builder(shortcodeSchema)
.apply(assign(ranges("123x"), TYPE, FIXED_LINE))
.apply(assign(ranges("12x", "12xxx"), TYPE, MOBILE))
.build();
// The simplification function here is good for testing edge case behaviour since it's
// essentially the most "extreme" simplification you can have.
RangeTable simplified = table.simplify(EXTREME_SIMPLIFICATION, 0, TYPE);
assertThat(simplified).hasColumns(TYPE);
assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("123x");
assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("12x", "12[0-24-9]x", "12xxx");
}
private static RangeTree ranges(String... rangeSpecs) {
return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse));
}
private static <T extends Comparable<T>> Change assign(
RangeTree ranges, Column<T> column, T value) {
return Change.builder(ranges).assign(column, value).build();
}
private static <T extends Comparable<T>> Change unassign(RangeTree ranges, Column<T> column) {
return Change.builder(ranges).unassign(column).build();
}
private Change assign(RangeTree ranges, Map<Column<?>, ?> map) {
return Change.of(ranges,
map.entrySet().stream()
.map(e -> Assignment.of(e.getKey(), e.getValue()))
.collect(toImmutableList()));
}
private static Cell<RangeSpecification, Column<?>, Optional<?>> assigned(
String range, Column<?> column, Object value) {
return Tables.immutableCell(RangeSpecification.parse(range), column, Optional.of(value));
}
}

+ 71
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/table/SchemaTest.java View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.table;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN;
import static org.junit.Assert.assertThrows;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import java.util.stream.Stream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@RunWith(JUnit4.class)
public class SchemaTest {
private static final Column<ValidNumberType> TYPE =
Column.of(ValidNumberType.class, "Type", UNKNOWN);
private static final Column<String> OPERATORS = Column.ofString("Operators");
private static final ColumnGroup<PhoneRegion, Boolean> REGIONS =
ColumnGroup.byRegion(Column.ofBoolean("Region"));
private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US"));
private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA"));
private static final Column<Boolean> BOGUS = Column.ofBoolean("Bogus");
private static final Schema SCHEMA =
Schema.builder().add(TYPE).add(OPERATORS).add(REGIONS).build();
@Test
public void testColumnOrdering() {
assertThat(Stream.of(OPERATORS, REGION_US, TYPE, REGION_CA).sorted(SCHEMA.ordering()))
.containsExactly(TYPE, OPERATORS, REGION_CA, REGION_US)
.inOrder();
// The names are the columns/groups (but not the names of columns in groups, such as
// "Region:US", since those are functionally generated and aren't known by the schema.
assertThat(SCHEMA.names()).containsExactly("Type", "Operators", "Region").inOrder();
}
@Test
public void test() {
assertThat(SCHEMA.getColumn("Type")).isEqualTo(TYPE);
assertThat(SCHEMA.getColumn("Region:US")).isEqualTo(REGION_US);
assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Region"));
assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Bogus"));
}
@Test
public void testCheckColumn() {
assertThat(SCHEMA.checkColumn(TYPE)).isEqualTo(TYPE);
assertThat(SCHEMA.checkColumn(REGION_US)).isEqualTo(REGION_US);
assertThrows(IllegalArgumentException.class, () -> SCHEMA.checkColumn(BOGUS));
}
}

+ 132
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/RangeTableSubject.java View File

@ -0,0 +1,132 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.testing;
import static com.google.common.base.Strings.lenientFormat;
import static com.google.common.truth.Fact.simpleFact;
import static com.google.common.truth.Truth.assertAbout;
import static java.util.Arrays.asList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableTable;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import java.util.Optional;
import javax.annotation.Nullable;
/** A Truth subject for asserting on {@link RangeTable} instances. */
public class RangeTableSubject extends Subject {
/** Returns Truth subject for asserting on a {@link RangeTable}. */
public static RangeTableSubject assertThat(@Nullable RangeTable table) {
return assertAbout(RangeTableSubject.SUBJECT_FACTORY).that(table);
}
private static final Factory<RangeTableSubject, RangeTable> SUBJECT_FACTORY =
RangeTableSubject::new;
private final RangeTable actual;
private RangeTableSubject(FailureMetadata failureMetadata, @Nullable RangeTable subject) {
super(failureMetadata, subject);
this.actual = subject;
}
// Add more methods below as needed.
/** Asserts that the table is empty. */
public void isEmpty() {
if (!actual.isEmpty()) {
failWithActual(simpleFact("expected to be empty"));
}
}
/** Asserts that the table has exactly the given columns in the given order (and no others). */
public void hasColumns(Column<?>... columns) {
check("getColumns()").that(actual.getColumns()).containsExactlyElementsIn(asList(columns));
}
/** Asserts that the table has the specified number of rows. */
public void hasRowCount(int count) {
check("toImmutableTable().rowKeySet().size()")
.that(actual.toImmutableTable().rowKeySet().size())
.isEqualTo(count);
}
/**
* Asserts the specified range has the given values for each column. All columns need to be
* specified, with {@code null} meanings "no value present". This method does not ensure that no
* other ranges were also assigned the same values, so for complete coverage in a test it's best
* to use this in conjunction with something like {@link #allRanges()}.
*/
public void hasRanges(String spec, Object... values) {
ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table =
this.actual.toImmutableTable();
RangeSpecification rowKey = RangeSpecification.parse(spec);
if (!table.rowKeySet().contains(rowKey)) {
failWithoutActual(
simpleFact(
lenientFormat(
"specified row %s does not exist in the table: rows=%s",
rowKey, table.rowKeySet())));
}
ImmutableMap<Column<?>, Optional<?>> row = table.row(rowKey);
if (row.size() != values.length) {
failWithoutActual(
simpleFact(
lenientFormat(
"incorrect number of columns: expected %s, got %s", row.size(), values.length)));
}
int n = 0;
for (Optional<?> actual : row.values()) {
Object expected = values[n++];
if (actual.isPresent()) {
if (!actual.get().equals(expected)) {
failWithoutActual(
simpleFact(
lenientFormat("unexpected value in row: expected %s, got %s", expected, actual)));
}
} else if (expected != null) {
failWithoutActual(simpleFact(lenientFormat("missing value in row: expected %s", expected)));
}
}
}
/**
* Returns a {@link RangeTreeSubject} for asserting about the ranges assigned to the given value
* in the specified column.
*/
public RangeTreeSubject assigned(Column<?> column, Object value) {
return RangeTreeSubject.assertWithMessageThat(
actual.getRanges(column, value), "%s in column %s", value, column);
}
/**
* Returns a {@link RangeTreeSubject} for asserting about all ranges assigned in the specified
* column.
*/
public RangeTreeSubject assigned(Column<?> column) {
return RangeTreeSubject.assertWithMessageThat(
actual.getAssignedRanges(column), "column %s", column);
}
/** Returns a {@link RangeTreeSubject} for asserting about all ranges in the table. */
public RangeTreeSubject allRanges() {
return RangeTreeSubject.assertWithMessageThat(actual.getAllRanges(), "all ranges");
}
}

+ 118
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/RangeTreeSubject.java View File

@ -0,0 +1,118 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.testing;
import static com.google.common.truth.Fact.simpleFact;
import static com.google.common.truth.Truth.assertAbout;
import static com.google.common.truth.Truth.assertWithMessage;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableSet;
import com.google.common.truth.FailureMetadata;
import com.google.common.truth.Subject;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.PrefixTree;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import javax.annotation.Nullable;
/** A Truth subject for asserting on {@link RangeTree} instances. */
public class RangeTreeSubject extends Subject {
public static RangeTreeSubject assertThat(@Nullable RangeTree tree) {
return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree);
}
public static RangeTreeSubject assertThat(@Nullable PrefixTree tree) {
return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree.asRangeTree());
}
public static RangeTreeSubject assertWithMessageThat(
@Nullable RangeTree tree, String message, Object... args) {
return assertWithMessage(message, args).about(
RangeTreeSubject.SUBJECT_FACTORY).that(tree);
}
private static final Factory<RangeTreeSubject, RangeTree> SUBJECT_FACTORY =
RangeTreeSubject::new;
private final RangeTree actual;
private RangeTreeSubject(FailureMetadata failureMetadata, @Nullable RangeTree subject) {
super(failureMetadata, subject);
this.actual = subject;
}
// Add more methods below as needed.
public void isEmpty() {
if (!actual.isEmpty()) {
failWithActual(simpleFact("expected to be empty"));
}
}
public void isNotEmpty() {
if (actual.isEmpty()) {
failWithActual(simpleFact("expected not to be empty"));
}
}
public void hasSize(long size) {
check("size()").withMessage("size").that(actual.size()).isEqualTo(size);
}
public void contains(String digits) {
DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits);
if (!actual.contains(seq)) {
failWithActual("expected to contain ", digits);
}
}
public void doesNotContain(String digits) {
DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits);
if (actual.contains(seq)) {
failWithActual("expected not to contain", digits);
}
}
public void containsExactly(RangeSpecification spec) {
RangeTree tree = RangeTree.from(spec);
if (!actual.equals(tree)) {
failWithActual("expected to be equal to", spec);
}
}
public void containsExactly(Iterable<RangeSpecification> specs) {
RangeTree tree = RangeTree.from(specs);
if (!actual.equals(tree)) {
failWithActual("expected to be equal to", specs);
}
}
public void containsExactly(String spec) {
containsExactly(RangeSpecification.parse(spec));
}
public void containsExactly(String... specs) {
containsExactly(FluentIterable.from(specs).transform(RangeSpecification::parse));
}
public void hasLengths(Integer... lengths) {
check("getLengths()")
.that(actual.getLengths())
.containsExactlyElementsIn(ImmutableSet.copyOf(lengths));
}
}

+ 477
- 0
metadata/src/test/java/com/google/i18n/phonenumbers/metadata/testing/TestNumberingScheme.java View File

@ -0,0 +1,477 @@
/*
* Copyright (C) 2017 The Libphonenumber Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.i18n.phonenumbers.metadata.testing;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.lang.Boolean.TRUE;
import static java.util.function.Function.identity;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Table;
import com.google.i18n.phonenumbers.metadata.DigitSequence;
import com.google.i18n.phonenumbers.metadata.RangeSpecification;
import com.google.i18n.phonenumbers.metadata.RangeTree;
import com.google.i18n.phonenumbers.metadata.Types;
import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
import com.google.i18n.phonenumbers.metadata.model.AltFormatSpec;
import com.google.i18n.phonenumbers.metadata.model.FormatSpec;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Attributes;
import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff;
import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType;
import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema;
import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType;
import com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema;
import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType;
import com.google.i18n.phonenumbers.metadata.table.Column;
import com.google.i18n.phonenumbers.metadata.table.RangeTable;
import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Stream;
/**
* Reusable test-only builder for numbering schemes. More methods can be added as necessary to
* support whatever is needed for testing.
*
* <p>Note that the various "modifer" classes returned by methods such as "addRanges()" are
* designed only as fluent APIs and instances of modifiers should never be assigned to variables
* and especially not interleaved with other mutations of the range tables.
*/
public final class TestNumberingScheme {
/**
* Returns a mutable numbering scheme builder for testing. Since an IDD is always required by
* NumberingScheme for geographic regions, a default value of "00" is set by default. This can be
* overridden or reset by {@code setInternationalPrefix{}} and {@code clearInternationalPrefix()}.
*/
public static TestNumberingScheme forCallingCode(
String cc, PhoneRegion main, PhoneRegion... others) {
return new TestNumberingScheme(DigitSequence.of(cc), main, ImmutableSet.copyOf(others));
}
private final DigitSequence callingCode;
private final PhoneRegion mainRegion;
private final ImmutableSet<PhoneRegion> otherRegions;
private final ImmutableMap<PhoneRegion, Column<Boolean>> regionMap;
// See setNationalPrefix() / clearNationalPrefix()
private final List<DigitSequence> nationalPrefix = new ArrayList<>();
// See setInternationalPrefix() / clearInternationalPrefix()
private Optional<DigitSequence> internationalPrefix = Optional.empty();
// See setCarrierPrefixes()
private RangeTree carrierPrefixes = RangeTree.empty();
// Uses the CSV schema (rather than XML) since that handles type/tariff better.
private final RangeTable.Builder csvRanges = RangeTable.builder(RangesTableSchema.TABLE_COLUMNS);
private final Map<PhoneRegion, RangeTable.Builder> shortcodes = new HashMap<>();
private final Map<FormatSpec, String> formats = new LinkedHashMap<>();
// Alternate formats are largely separate from everything else.
private ImmutableList<AltFormatSpec> altFormats = ImmutableList.of();
// Explicit example numbers.
private final Table<PhoneRegion, ValidNumberType, DigitSequence> examples =
HashBasedTable.create();
private final List<Comment> comments = new ArrayList<>();
private TestNumberingScheme(
DigitSequence cc, PhoneRegion main, ImmutableSet<PhoneRegion> others) {
checkArgument(!others.contains(main), "duplicate regions");
this.callingCode = checkNotNull(cc);
this.mainRegion = checkNotNull(main);
this.otherRegions = others;
this.regionMap = Stream.concat(Stream.of(main), others.stream())
.collect(toImmutableMap(identity(), RangesTableSchema.REGIONS::getColumn));
// Set a reasonable IDD default for geographic regions.
if (!main.equals(PhoneRegion.getWorld())) {
setInternationalPrefix("00");
}
}
/** Sets the national prefix of this scheme, replacing any previous value. */
public TestNumberingScheme setNationalPrefix(String prefix) {
checkArgument(!prefix.isEmpty(), "national prefix must not be empty");
this.nationalPrefix.clear();
this.nationalPrefix.add(DigitSequence.of(prefix));
return this;
}
/** Sets the national prefix of this scheme, replacing any previous value. */
public TestNumberingScheme setNationalPrefixes(String... prefix) {
List<String> prefixes = Arrays.asList(prefix);
this.nationalPrefix.clear();
prefixes.forEach(p -> {
checkArgument(!p.isEmpty(), "national prefix must not be empty");
this.nationalPrefix.add(DigitSequence.of(p));
});
return this;
}
/** Removes the national prefix */
public TestNumberingScheme clearNationalPrefix() {
this.nationalPrefix.clear();
return this;
}
/** Sets the international prefix of this scheme, replacing any previous value. */
public TestNumberingScheme setInternationalPrefix(String prefix) {
checkState(!mainRegion.equals(PhoneRegion.getWorld()),
"[%s] cannot set IDD for non-geographic calling code", callingCode);
this.internationalPrefix = Optional.of(DigitSequence.of(prefix));
return this;
}
/** Removes the international prefix */
public TestNumberingScheme clearInternationalPrefix() {
this.internationalPrefix = Optional.empty();
return this;
}
/** Sets the national prefix of this scheme, replacing any previous value. */
public TestNumberingScheme setCarrierPrefixes(String... prefix) {
this.carrierPrefixes = RangeTree.from(Arrays.stream(prefix).map(RangeSpecification::parse));
return this;
}
/**
* Adds ranges (which must not already exist) to the scheme. This method returns a fluent API
* for modifying the newly added ranges.
*/
public RangeModifier addRanges(ExtType type, ExtTariff tariff, String... specs) {
return addRanges(type, tariff, rangesOf(specs));
}
/**
* Adds ranges (which must not already exist) to the scheme. This method returns a fluent API
* for modifying the newly added ranges.
*/
public RangeModifier addRanges(ExtType type, ExtTariff tariff, RangeTree ranges) {
RangeTree overlap = csvRanges.getAllRanges().intersect(ranges);
checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap);
csvRanges.assign(RangesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER);
csvRanges.assign(RangesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER);
// Setting all regions here generates "legal" numbering schemes by default.
regionMap.values().forEach(c -> csvRanges.assign(c, true, ranges, OverwriteMode.NEVER));
return new RangeModifier(ranges);
}
/** Removes ranges (which need not already exist) from the scheme. */
public void removeRanges(String... specs) {
removeRanges(rangesOf(specs));
}
/** Removes ranges (which need not already exist) from the scheme. */
public void removeRanges(RangeTree ranges) {
csvRanges.remove(ranges);
}
/** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */
public RangeModifier forRangesIn(String... specs) {
return forRangesIn(rangesOf(specs));
}
/** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */
public RangeModifier forRangesIn(RangeTree ranges) {
return new RangeModifier(ranges.intersect(csvRanges.getAllRanges()));
}
/**
* Adds shortcodes (which must not already exist) to a given region in the scheme. This method
* returns a fluent API for modifying the newly added shortcodes.
*/
public ShortcodeModifier addShortcodes(
PhoneRegion region, ShortcodeType type, ExtTariff tariff, String... specs) {
return addShortcodes(region, type, tariff, rangesOf(specs));
}
/**
* Adds shortcodes (which must not already exist) to a given region in the scheme. This method
* returns a fluent API for modifying the newly added shortcodes.
*/
public ShortcodeModifier addShortcodes(
PhoneRegion region, ShortcodeType type, ExtTariff tariff, RangeTree ranges) {
RangeTable.Builder table = shortcodes
.computeIfAbsent(region, r -> RangeTable.builder(ShortcodesTableSchema.SCHEMA.columns()));
RangeTree overlap = table.getAllRanges().intersect(ranges);
checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap);
table.assign(ShortcodesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER);
table.assign(ShortcodesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER);
return new ShortcodeModifier(region, ranges);
}
/** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */
public ShortcodeModifier forShortcodesIn(PhoneRegion region, String... specs) {
return forShortcodesIn(region, rangesOf(specs));
}
/** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */
public ShortcodeModifier forShortcodesIn(PhoneRegion region, RangeTree ranges) {
RangeTable.Builder shortcodeTable =
checkNotNull(shortcodes.get(region), "no shortcodes in region %s", region);
return new ShortcodeModifier(region, ranges.intersect(shortcodeTable.getAllRanges()));
}
public TypeModifier forRangeTypes(PhoneRegion region, ExtType type, ExtTariff tariff) {
return new TypeModifier(region, type, tariff);
}
public TestNumberingScheme setAlternateFormats(List<AltFormatSpec> altFormats) {
this.altFormats = ImmutableList.copyOf(altFormats);
return this;
}
/** Builds a valid numbering scheme from the current state of this builder. */
public NumberingScheme build() {
Attributes attributes = Attributes.create(
callingCode,
mainRegion,
otherRegions,
ImmutableSet.copyOf(nationalPrefix),
carrierPrefixes,
// This is currently simplistic (only 1 value) and could be extended for tests if needed.
internationalPrefix.map(Object::toString).orElse(""),
internationalPrefix.map(p -> RangeTree.from(RangeSpecification.from(p)))
.orElse(RangeTree.empty()),
"",
ImmutableSet.of());
RangeTable xmlTable = XmlRangesSchema.fromExternalTable(csvRanges.build());
ImmutableMap<PhoneRegion, RangeTable> shortcodeMap =
shortcodes.entrySet().stream()
.collect(toImmutableMap(Entry::getKey, e -> e.getValue().build()));
// Some formats may have been unassigned by modifications to the test scheme. Only copy the
// formats with keys that exist in the range tables at the time the scheme is built.
ImmutableSet<String> assignedFormats = Stream.concat(
xmlTable.getAssignedValues(XmlRangesSchema.FORMAT).stream(),
shortcodeMap.values().stream()
.flatMap(t -> t.getAssignedValues(ShortcodesTableSchema.FORMAT).stream()))
.collect(toImmutableSet());
ImmutableMap<String, FormatSpec> formatMap = formats.entrySet().stream()
.filter(e -> assignedFormats.contains(e.getValue()))
.collect(toImmutableMap(Entry::getValue, Entry::getKey));
return NumberingScheme.from(
attributes,
xmlTable,
Maps.transformValues(shortcodes, RangeTable.Builder::build),
formatMap,
altFormats,
fillInMissingExampleNumbersFrom(xmlTable, examples),
comments);
}
public TerritoryModifier forTerritory(PhoneRegion region) {
return new TerritoryModifier(region);
}
/** Fluent API for modifying a set of ranges. */
public final class RangeModifier {
private final RangeTree ranges;
private RangeModifier(RangeTree ranges) {
checkArgument(!ranges.isEmpty(), "cannot modify empty ranges");
this.ranges = ranges;
}
/** Sets the regions in which the ranges are valid. */
public RangeModifier setRegions(PhoneRegion... regions) {
ImmutableSet<PhoneRegion> regionsToSet = ImmutableSet.copyOf(regions);
checkArgument(regionMap.keySet().containsAll(regionsToSet));
regionMap.forEach((r, c) ->
csvRanges.assign(c, regionsToSet.contains(r), ranges, OverwriteMode.ALWAYS));
return this;
}
/** Sets ranges to be "national only" dialing. */
public RangeModifier setNationalOnly(boolean nationalOnly) {
csvRanges.assign(RangesTableSchema.NATIONAL_ONLY, nationalOnly, ranges, OverwriteMode.ALWAYS);
return this;
}
/** Sets the area code length of the ranges. */
public RangeModifier setAreaCodeLength(int n) {
csvRanges.assign(RangesTableSchema.AREA_CODE_LENGTH, n, ranges, OverwriteMode.ALWAYS);
return this;
}
/** Sets the format assigned to the ranges. */
public RangeModifier setFormat(FormatSpec format) {
String id =
formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1));
csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS);
return this;
}
public RangeModifier setFormat(String id, FormatSpec format) {
formats.put(format, id);
csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS);
return this;
}
/** Clears the format assigned to the ranges. */
public RangeModifier clearFormat() {
csvRanges.assign(RangesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS);
return this;
}
public RangeModifier setGeocode(SimpleLanguageTag lang, String name) {
csvRanges.assign(
RangesTableSchema.GEOCODES.getColumn(lang), name, ranges, OverwriteMode.ALWAYS);
return this;
}
}
/** Fluent API for modifying a set of shortcodes in a region. */
public final class ShortcodeModifier {
private final PhoneRegion region;
private final RangeTree ranges;
private ShortcodeModifier(PhoneRegion region, RangeTree ranges) {
checkArgument(!ranges.isEmpty(), "cannot modify empty ranges");
this.region = checkNotNull(region);
this.ranges = ranges;
}
private RangeTable.Builder shortcode() {
return shortcodes.get(region);
}
/** Sets the format assigned to the shortcodes. */
public ShortcodeModifier setFormat(FormatSpec format) {
String id =
formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1));
shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS);
return this;
}
/** Sets the format assigned to the shortcodes. */
public ShortcodeModifier setFormat(String id, FormatSpec format) {
formats.put(format, id);
shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS);
return this;
}
/** Clears the format assigned to the shortcodes. */
public ShortcodeModifier clearFormat() {
shortcode().assign(ShortcodesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS);
return this;
}
}
/** Fluent API for modifying attributes of range types. */
public final class TypeModifier {
private final PhoneRegion region;
private final ExtType type;
private final ExtTariff tariff;
public TypeModifier(PhoneRegion region, ExtType type, ExtTariff tariff) {
this.region = checkNotNull(region);
this.type = checkNotNull(type);
this.tariff = checkNotNull(tariff);
checkArgument(regionMap.containsKey(region),
"invalid test region '%s' not in: %s", region, regionMap.keySet());
}
public TypeModifier setExampleNumber(String ex) {
inferValidNumberType(type, tariff)
.ifPresent(t -> examples.put(region, t, DigitSequence.of(ex)));
return this;
}
public TypeModifier addComment(String... lines) {
inferValidNumberType(type, tariff)
.flatMap(Types::toXmlType)
.ifPresent(t -> comments.add(
Comment.create(Comment.anchor(region, t), Arrays.asList(lines))));
return this;
}
}
/** Fluent API for modifying territory-level attributes. */
public final class TerritoryModifier {
private final PhoneRegion region;
public TerritoryModifier(PhoneRegion region) {
this.region = checkNotNull(region);
}
public TerritoryModifier addComment(String... lines) {
comments.add(Comment.create(Comment.anchor(region), Arrays.asList(lines)));
return this;
}
}
private Table<PhoneRegion, ValidNumberType, DigitSequence> fillInMissingExampleNumbersFrom(
RangeTable xmlTable, Table<PhoneRegion, ValidNumberType, DigitSequence> examples) {
// Take a copy since the build() method is not meant to be modifying the builder itself.
HashBasedTable<PhoneRegion, ValidNumberType, DigitSequence> examplesCopy =
HashBasedTable.create(examples);
addMissingExampleNumbersFor(mainRegion, xmlTable, examplesCopy);
otherRegions.forEach(r -> addMissingExampleNumbersFor(r, xmlTable, examplesCopy));
return examplesCopy;
}
private static void addMissingExampleNumbersFor(
PhoneRegion region,
RangeTable xmlTable,
Table<PhoneRegion, ValidNumberType, DigitSequence> examples) {
Column<Boolean> regionColumn = XmlRangesSchema.REGIONS.getColumn(region);
RangeTable regionTable =
xmlTable.subTable(xmlTable.getRanges(regionColumn, TRUE), XmlRangesSchema.TYPE);
for (ValidNumberType type : regionTable.getAssignedValues(XmlRangesSchema.TYPE)) {
if (examples.contains(region, type)) {
continue;
}
RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, type);
// Assigned types must be assigned via non empty ranges (so first() cannot fail).
examples.put(region, type, ranges.first());
}
}
private static RangeTree rangesOf(String... specs) {
checkArgument(specs.length > 0, "must provide at least one range specifier");
RangeTree ranges = RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
checkArgument(!ranges.getInitial().canTerminate(), "cannot add the empty digit sequence");
return ranges;
}
private static Optional<ValidNumberType> inferValidNumberType(ExtType type, ExtTariff tariff) {
// Tariff takes precedence over type.
Optional<ValidNumberType> vnt = tariff.toValidNumberType();
if (!vnt.isPresent()) {
vnt = type.toValidNumberType();
}
return vnt;
}
}

Loading…
Cancel
Save