PiperOrigin-RevId: 319856719 Co-authored-by: The libphonenumber Team <noreply@google.com>pull/3882/head
| @ -0,0 +1,311 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.base.Preconditions; | |||
| import com.google.common.collect.DiscreteDomain; | |||
| import com.google.errorprone.annotations.Immutable; | |||
| import com.google.errorprone.annotations.concurrent.LazyInit; | |||
| /** | |||
| * A small, fast, immutable representation of a phone number digit sequence. This class represents | |||
| * contiguous sequences of digits in phone numbers, such as "123" or "000". It does not encode | |||
| * semantic information such as the region code to which a number belongs or perform any semantic | |||
| * validation. It can be thought of as equivalent to a String containing only the ASCII digits | |||
| * {@code '0'} to {@code '9'}. | |||
| */ | |||
| @Immutable | |||
| public final class DigitSequence implements Comparable<DigitSequence> { | |||
| private static final CharMatcher ASCII_DIGITS = CharMatcher.inRange('0', '9'); | |||
| // IMPORTANT | |||
| // This cannot be more than 18 to avoid overflowing a signed long (it must be signed due to the | |||
| // calculation of the "distance" metric which can be +ve or -ve). | |||
| // | |||
| // If it does need to be raised, this whole class probably needs to be rethought. ITU recommends | |||
| // a limit of 15 digits (not including country calling code) but there are currently 2 examples | |||
| // in the metadata XML file which exceed this (Japan) where some non-international toll free | |||
| // numbers (those starting with 0037 and 0036) can be up to 17 digits (still okay) in the current | |||
| // metadata but there's a note saying that they may even extend to 21 digits!! | |||
| // | |||
| // An appropriate way to split this class would be to make a closed type hierarchy with 2 | |||
| // separate implementations, one using a long to encode the numbers and one using BigInteger (or | |||
| // maybe just encoding digits in a string directly). | |||
| // The good thing about this approach is that instances of the different implementations could | |||
| // never be equal to each other. This is likely not a difficult refactoring, although the Domain | |||
| // class will also need to be considered carefully and details like the "index()" value will have | |||
| // to change completely between the classes. | |||
| // | |||
| /** The maximum number of digits which can be held in a digit sequence. */ | |||
| public static final int MAX_DIGITS = 18; | |||
| // Simple lookup of powers-of-10 for all valid sequence lengths (0 - MAX_DIGITS). | |||
| private static final long[] POWERS_OF_TEN = new long[MAX_DIGITS + 1]; | |||
| static { | |||
| // 1, 10, 100, 1000, 10000 ... | |||
| POWERS_OF_TEN[0] = 1; | |||
| for (int n = 1; n < POWERS_OF_TEN.length; n++) { | |||
| POWERS_OF_TEN[n] = 10 * POWERS_OF_TEN[n - 1]; | |||
| } | |||
| } | |||
| // A table of adjustment values to convert a digit sequence into an absolute index in the | |||
| // integer domain, to impose a true lexicographical ordering. The value of a digit sequence is | |||
| // adjusted by the number of additional elements in the phone number domain which cannot be | |||
| // represented as integers (the empty sequence or anything with leading zeros). This results in | |||
| // an absolute ordering of all digit sequences. For example the digit sequence "0123" is length | |||
| // 4, and there are 111 additional additional elements that come before 4-length sequences | |||
| // ("", "00"-"09", "000"-"099"), so its index is {@code 123 + 111 = 234}. | |||
| // To calculate this value dynamically for any length N, offset=floor(10^N / 9). | |||
| private static final long[] DOMAIN_OFFSET = new long[MAX_DIGITS + 1]; | |||
| static { | |||
| // 0, 1, 11, 111, 1111 ... | |||
| for (int n = 1; n < DOMAIN_OFFSET.length; n++) { | |||
| DOMAIN_OFFSET[n] = 10 * DOMAIN_OFFSET[n - 1] + 1; | |||
| } | |||
| } | |||
| private static final DigitSequence EMPTY = new DigitSequence(0, 0L); | |||
| private static final DigitSequence[] SINGLETON_DIGITS = new DigitSequence[] { | |||
| new DigitSequence(1, 0L), | |||
| new DigitSequence(1, 1L), | |||
| new DigitSequence(1, 2L), | |||
| new DigitSequence(1, 3L), | |||
| new DigitSequence(1, 4L), | |||
| new DigitSequence(1, 5L), | |||
| new DigitSequence(1, 6L), | |||
| new DigitSequence(1, 7L), | |||
| new DigitSequence(1, 8L), | |||
| new DigitSequence(1, 9L), | |||
| }; | |||
| // Simple helper to return {@code 10^n} for all valid sequence lengths. | |||
| private static long pow10(int n) { | |||
| return POWERS_OF_TEN[n]; | |||
| } | |||
| /** | |||
| * Returns the domain in which phone number digit sequences exist. This is needed when creating | |||
| * canonical {@link com.google.common.collect.Range Ranges} of digit-sequences. | |||
| */ | |||
| public static DiscreteDomain<DigitSequence> domain() { | |||
| return Domain.INSTANCE; | |||
| } | |||
| private static final class Domain extends DiscreteDomain<DigitSequence> { | |||
| private static final Domain INSTANCE = new Domain(); | |||
| private static final DigitSequence MIN = EMPTY; | |||
| private static final DigitSequence MAX = DigitSequence.of("999999999999999999"); | |||
| @Override | |||
| public DigitSequence next(DigitSequence num) { | |||
| long next = num.value + 1; | |||
| if (next < pow10(num.length)) { | |||
| return new DigitSequence(num.length, next); | |||
| } else { | |||
| int len = num.length + 1; | |||
| return (len <= MAX_DIGITS) ? new DigitSequence(len, 0) : null; | |||
| } | |||
| } | |||
| @Override | |||
| public DigitSequence previous(DigitSequence num) { | |||
| long prev = num.value - 1; | |||
| if (prev >= 0) { | |||
| return new DigitSequence(num.length, prev); | |||
| } else { | |||
| int len = num.length - 1; | |||
| return (len >= 0) ? new DigitSequence(len, pow10(len) - 1) : null; | |||
| } | |||
| } | |||
| @Override | |||
| public long distance(DigitSequence start, DigitSequence end) { | |||
| // The indices get up to 19 digits but can't overflow Long.MAX_VALUE, so they can be safely | |||
| // subtracted to get a signed long "distance" without risk of over-/under- flow. | |||
| return end.index() - start.index(); | |||
| } | |||
| @Override | |||
| public DigitSequence minValue() { | |||
| return MIN; | |||
| } | |||
| @Override | |||
| public DigitSequence maxValue() { | |||
| return MAX; | |||
| } | |||
| } | |||
| /** Returns the digit sequence of length one representing the given digit value. */ | |||
| public static DigitSequence singleton(int digit) { | |||
| Preconditions.checkArgument(0 <= digit && digit <= 9, "invalid digit value: %s", digit); | |||
| return SINGLETON_DIGITS[digit]; | |||
| } | |||
| /** | |||
| * Returns the empty digit sequence. This is useful in special cases where you need to build up | |||
| * a digit sequence starting from nothing). | |||
| */ | |||
| public static DigitSequence empty() { | |||
| return EMPTY; | |||
| } | |||
| /** Returns a digit sequence for the given string (e.g. "012345"). */ | |||
| public static DigitSequence of(String digits) { | |||
| Preconditions.checkArgument(digits.length() <= MAX_DIGITS, | |||
| "Digit string too long: '%s'", digits); | |||
| Preconditions.checkArgument(ASCII_DIGITS.matchesAllOf(digits), | |||
| "Digit string contains non-digit characters: '%s'", digits); | |||
| return digits.isEmpty() ? empty() : new DigitSequence(digits.length(), Long.parseLong(digits)); | |||
| } | |||
| /** | |||
| * Returns a digit sequence of {@code length} containing only the digit '0'. This is useful when | |||
| * performing range calculations to determine the smallest digit sequence in a block. | |||
| */ | |||
| public static DigitSequence zeros(int length) { | |||
| return new DigitSequence(length, 0L); | |||
| } | |||
| /** | |||
| * Returns a digit sequence of {@code length} containing only the digit '9'. This is useful when | |||
| * performing range calculations to determine the largest digit sequence in a block. | |||
| */ | |||
| public static DigitSequence nines(int length) { | |||
| return new DigitSequence(length, pow10(length) - 1); | |||
| } | |||
| // The overall length of the digit sequence, including any leading zeros. | |||
| private final int length; | |||
| // The decimal value of the digit sequence (excluding leading zeros, obviously). | |||
| private final long value; | |||
| // Cached toString() representation (toString() of DigitSequence is used in comparisons for | |||
| // sorting to achieve lexicographical ordering, which means it gets churned a lot). | |||
| @LazyInit | |||
| private String toString; | |||
| // Called directly from RangeSpecification. | |||
| DigitSequence(int length, long value) { | |||
| // Don't check for -ve length as this should never happen and will blow up in pow10() anyway. | |||
| Preconditions.checkArgument(length <= MAX_DIGITS, | |||
| "Digit sequence too long [%s digits]", length); | |||
| // This should not happen unless there's a code error, so nice user messages aren't needed. | |||
| Preconditions.checkArgument(value >= 0 && value < pow10(length)); | |||
| this.length = length; | |||
| this.value = value; | |||
| } | |||
| /** Returns if this sequence is empty (i.e. length == 0). */ | |||
| public boolean isEmpty() { | |||
| return length == 0; | |||
| } | |||
| /** Returns the length of this digit sequence. */ | |||
| public int length() { | |||
| return length; | |||
| } | |||
| /** | |||
| * Returns the digit at index {@code n} in this digit sequence, starting from the most | |||
| * significant digit. | |||
| */ | |||
| public int getDigit(int n) { | |||
| Preconditions.checkElementIndex(n, length); | |||
| return (int) (value / pow10(((length - 1) - n)) % 10); | |||
| } | |||
| /** | |||
| * Returns the sub-sequence representing only the first {@code n} digits in this sequence. For | |||
| * example, {@code "01234".first(3) == "012"}. | |||
| */ | |||
| public DigitSequence first(int n) { | |||
| Preconditions.checkElementIndex(n, length); | |||
| return new DigitSequence(n, value / pow10(length - n)); | |||
| } | |||
| /** | |||
| * Returns the sub-sequence representing only the last {@code n} digits in this sequence. For | |||
| * example, {@code "01234".last(3) == "234"}. | |||
| */ | |||
| public DigitSequence last(int n) { | |||
| Preconditions.checkElementIndex(n, length); | |||
| return new DigitSequence(n, value % pow10(n)); | |||
| } | |||
| /** | |||
| * Returns a new sequence which extends this sequence by a single digit ({@code 0 <= digit <= 9}). | |||
| */ | |||
| public DigitSequence extendBy(int digit) { | |||
| Preconditions.checkArgument(0 <= digit && digit <= 9); | |||
| return new DigitSequence(length + 1, (10 * value) + digit); | |||
| } | |||
| /** Returns a new sequence which extends this sequence by the given value. */ | |||
| public DigitSequence extendBy(DigitSequence n) { | |||
| Preconditions.checkNotNull(n); | |||
| return new DigitSequence(length + n.length, (pow10(n.length) * value) + n.value); | |||
| } | |||
| /** | |||
| * Returns the digit sequence immediately after this one, or {@code null} if this is the | |||
| * maximum value. | |||
| */ | |||
| public DigitSequence next() { | |||
| return domain().next(this); | |||
| } | |||
| /** | |||
| * Returns the digit sequence immediately before this one, or {@code null} if this is the | |||
| * minimum value. | |||
| */ | |||
| public DigitSequence previous() { | |||
| return domain().previous(this); | |||
| } | |||
| /** Returns the absolute index of this digit sequence within the integer domain. */ | |||
| private long index() { | |||
| return value + DOMAIN_OFFSET[length]; | |||
| } | |||
| @Override | |||
| public int compareTo(DigitSequence other) { | |||
| return Long.signum(index() - other.index()); | |||
| } | |||
| @Override | |||
| public boolean equals(Object o) { | |||
| return (o instanceof DigitSequence) && index() == ((DigitSequence) o).index(); | |||
| } | |||
| @Override | |||
| public int hashCode() { | |||
| return Long.hashCode(index()); | |||
| } | |||
| @Override | |||
| public String toString() { | |||
| // This little dance is required (according to the docs for the LazyInit annotation) for lazy | |||
| // initialization of non-volatile fields (yes, that's a double init in a single statement). | |||
| String localVar = toString; | |||
| if (localVar == null) { | |||
| toString = localVar = (length > 0 ? String.format("%0" + length + "d", value) : ""); | |||
| } | |||
| return localVar; | |||
| } | |||
| } | |||
| @ -0,0 +1,65 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.Preconditions; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import java.util.Comparator; | |||
| /** | |||
| * A key for uniquely identifying number metadata for a region. For "geographical" regions, the | |||
| * region code suffices to identify the range information, but for "non geographical" regions, the | |||
| * calling code is required and the region is set to "UN001" (world). | |||
| */ | |||
| @AutoValue | |||
| public abstract class MetadataKey implements Comparable<MetadataKey> { | |||
| private static final Comparator<MetadataKey> ORDERING = | |||
| Comparator.comparing(MetadataKey::region).thenComparing(MetadataKey::callingCode); | |||
| /** | |||
| * Returns a key to identify phone number data in the given region with the specified calling | |||
| * code. Care must be taken when creating keys because it is possible to create invalid keys that | |||
| * would not match any data (e.g. region="US", calling code="44"). | |||
| */ | |||
| public static MetadataKey create(PhoneRegion region, DigitSequence callingCode) { | |||
| // Null checks and semantic checks. | |||
| Preconditions.checkArgument(region.equals(PhoneRegion.getWorld()) | |||
| || (region.toString().length() == 2 && !region.equals(PhoneRegion.getUnknown()))); | |||
| Preconditions.checkArgument(!callingCode.isEmpty()); | |||
| return new AutoValue_MetadataKey(region, callingCode); | |||
| } | |||
| /** | |||
| * Returns the region for this key (this is {@link PhoneRegion#getWorld()} for non-geographical | |||
| * regions). | |||
| */ | |||
| public abstract PhoneRegion region(); | |||
| /** Returns the calling code for this key. */ | |||
| public abstract DigitSequence callingCode(); | |||
| @Override | |||
| public int compareTo(MetadataKey other) { | |||
| return ORDERING.compare(this, other); | |||
| } | |||
| // Used in human readable formatting during presubmit checks; be careful if you change it. | |||
| @Override | |||
| public final String toString() { | |||
| return String.format("region=%s, calling code=+%s", region(), callingCode()); | |||
| } | |||
| } | |||
| @ -0,0 +1,351 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.SetOperations; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| /** | |||
| * A variation of a {@link RangeTree} which represents a set of prefixes (as opposed to a set of | |||
| * ranges). While this implementation is backed by a {@code RangeTree} and has a similar serialized | |||
| * representation, it is a deliberately distinct type and should not be thought of as a subset of | |||
| * {@code RangeTree}. In particular, set operations are defined to work differently for | |||
| * {@code PrefixTree} due to its differing semantics and some set operations (e.g. subtraction) are | |||
| * not even well defined. | |||
| */ | |||
| public final class PrefixTree { | |||
| private static final PrefixTree EMPTY = new PrefixTree(RangeTree.empty()); | |||
| /** Returns the "empty" prefix tree, which matches no ranges. */ | |||
| public static PrefixTree empty() { | |||
| return EMPTY; | |||
| } | |||
| /** | |||
| * Returns a prefix tree with the paths of the given ranges, trimmed to the earliest point of | |||
| * termination. For example, the ranges {@code {"1[0-3]", "1234", "56x"}} will result in the | |||
| * prefixes {@code {"1[0-3]", "56x"}}, since {@code "1[0-3]"} contains {@code "12"}, which is a | |||
| * prefix of {@code "1234"}. | |||
| */ | |||
| public static PrefixTree from(RangeTree ranges) { | |||
| return !ranges.isEmpty() | |||
| ? new PrefixTree(removeTrailingAnyDigitPaths(TrimmingVisitor.trim(ranges))) | |||
| : empty(); | |||
| } | |||
| /** | |||
| * Returns a prefix tree containing all digit sequences in the given range specification. A | |||
| * single range specification cannot overlap in the way that general range trees can, so unlike | |||
| * {@link #from(RangeTree)}, this method will never throw {@code IllegalArgumentException}. | |||
| */ | |||
| public static PrefixTree from(RangeSpecification spec) { | |||
| // Range specifications define ranges of a single length, so must always be a valid prefix. | |||
| return from(RangeTree.from(spec)); | |||
| } | |||
| /** | |||
| * Returns the minimal prefix tree which includes all the paths in "include", and none of the | |||
| * paths in "exclude". For example: | |||
| * <pre> {@code | |||
| * minimal({ "123x", "456x" }, { "13xx", "459x" }, 0) == { "12", "456" } | |||
| * minimal({ "123x", "456x" }, {}, 0) == { "" } | |||
| * minimal({ "123x", "456x" }, {}, 1) == { "[14]" } | |||
| * }</pre> | |||
| * | |||
| * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some | |||
| * circumstances. | |||
| * | |||
| * <p>Caveat: In cases where the {@code include} and {@code exclude} ranges overlap, the shortest | |||
| * possible prefix is chosen. For example: | |||
| * <pre> {@code | |||
| * minimal({ "12", "1234", "56" }, { "123", "5678" }) == { "12", "56" } | |||
| * }</pre> | |||
| * This means that it may not always be true that {@code minimal(A, B).intersect(minimal(B, A))} | |||
| * is empty. | |||
| */ | |||
| public static PrefixTree minimal(RangeTree include, RangeTree exclude, int minLength) { | |||
| checkArgument(include.intersect(exclude).isEmpty(), "ranges must be disjoint"); | |||
| checkArgument(minLength >= 0, "invalid minimum prefix length: %s", minLength); | |||
| PrefixTree prefix = PrefixTree.from(include); | |||
| if (prefix.isEmpty()) { | |||
| // This matches no input, not all input. | |||
| return prefix; | |||
| } | |||
| // Ignore anything that the prefix already captures, since there's no point avoiding shortening | |||
| // the prefix to avoid what's already overlapping. | |||
| exclude = exclude.subtract(prefix.retainFrom(exclude)); | |||
| // This can contain only the empty sequence (i.e. match all input) if the original include set | |||
| // was something like "xxxxx". In that case the initial node is just the terminal. | |||
| RangeTree minimal; | |||
| DfaNode root = prefix.asRangeTree().getInitial(); | |||
| if (prefix.isIdentity() || exclude.isEmpty()) { | |||
| // Either we already accept anything, or there is nothing to exclude. | |||
| minimal = emit(root, RangeSpecification.empty(), RangeTree.empty(), minLength); | |||
| } else { | |||
| minimal = recursivelyMinimize( | |||
| root, RangeSpecification.empty(), exclude.getInitial(), RangeTree.empty(), minLength); | |||
| } | |||
| // No need to go via the static factory here, since that does a bunch of work we know cannot | |||
| // be necessary. The range tree here is a subset of an already valid prefix tree, so cannot | |||
| // contain "early terminating nodes" or "trailing any digit sequences". | |||
| return new PrefixTree(minimal); | |||
| } | |||
| private final RangeTree ranges; | |||
| private PrefixTree(RangeTree ranges) { | |||
| // Caller is responsible for ensuring that the ranges conform to expectations of a prefix tree. | |||
| this.ranges = ranges; | |||
| } | |||
| /** | |||
| * Returns a {@link RangeTree} containing the same digit sequences as this prefix tree. Prefix | |||
| * trees and range trees do not have the same semantics, but they do have the same serialized | |||
| * form (i.e. to serialize a prefix tree, you can just serialize the corresponding range tree). | |||
| */ | |||
| public RangeTree asRangeTree() { | |||
| return ranges; | |||
| } | |||
| /** | |||
| * Returns whether this prefix tree is empty. Filtering a {@link RangeTree} by the empty prefix | |||
| * tree always returns the empty range tree. The result of filtering a range tree is defined as | |||
| * containing only digit sequences which are prefixed by some digit sequence in the prefix tree. | |||
| * If the prefix tree is empty, no digit sequence can ever satisfy that requirement. | |||
| */ | |||
| public boolean isEmpty() { | |||
| return ranges.isEmpty(); | |||
| } | |||
| /** | |||
| * Returns whether this prefix tree matches any digit sequence. Filtering a {@link RangeTree} by | |||
| * the identity prefix returns the original range tree. The result of filtering a range tree is | |||
| * defined as containing only digit sequences which are prefixed by some digit sequence in the | |||
| * prefix tree. The identity prefix tree contains the empty digit sequence, which is a prefix of | |||
| * every digit sequence. | |||
| */ | |||
| public boolean isIdentity() { | |||
| return !ranges.isEmpty() && ranges.getInitial().equals(RangeTree.getTerminal()); | |||
| } | |||
| /** Returns whether the given sequence would be retained by this prefix tree. */ | |||
| public boolean prefixes(DigitSequence digits) { | |||
| DfaNode node = ranges.getInitial(); | |||
| for (int n = 0; n < digits.length(); n++) { | |||
| DfaEdge e = node.find(digits.getDigit(n)); | |||
| if (e == null) { | |||
| break; | |||
| } | |||
| node = e.getTarget(); | |||
| } | |||
| return node.equals(RangeTree.getTerminal()); | |||
| } | |||
| /** | |||
| * Returns a subset of the given ranges, containing only ranges which are prefixed by an | |||
| * element in this prefix tree. For example: | |||
| * <pre> {@code | |||
| * RangeTree r = { "12xx", "1234x" } | |||
| * PrefixTree p = { "12[0-5]" } | |||
| * p.retainFrom(r) = { "12[0-5]x", "1234x"} | |||
| * }</pre> | |||
| * Note that if the prefix tree is empty, this method returns the empty range tree. | |||
| */ | |||
| public RangeTree retainFrom(RangeTree ranges) { | |||
| return SetOperations.INSTANCE.retainFrom(this, ranges); | |||
| } | |||
| /** | |||
| * Returns the union of two prefix trees. For prefix trees {@code p1}, {@code p2} and any range | |||
| * tree {@code R}, the union {@code P = p1.union(p2)} is defined such that: | |||
| * <pre> {@code | |||
| * P.retainFrom(R) = p1.retainFrom(R).union(p2.retainFrom(R)) | |||
| * }</pre> | |||
| * If prefixes are the same length this is equivalent to {@link RangeTree#union(RangeTree)}, | |||
| * but when prefixes overlap, only the more general (shorter) prefix is retained. | |||
| */ | |||
| public PrefixTree union(PrefixTree other) { | |||
| return SetOperations.INSTANCE.union(this, other); | |||
| } | |||
| /** | |||
| * Returns the intersection of two prefix trees. For prefix trees {@code p1}, {@code p2} and any | |||
| * range tree {@code R}, the intersection {@code P = p1.intersect(p2)} is defined such that: | |||
| * <pre> {@code | |||
| * P.retainFrom(R) = p1.retainFrom(R).intersect(p2.retainFrom(R)) | |||
| * }</pre> | |||
| * If prefixes are the same length this is equivalent to {@link RangeTree#intersect(RangeTree)}, | |||
| * but when prefixes overlap, only the more specific (longer) prefix is retained. | |||
| */ | |||
| public PrefixTree intersect(PrefixTree other) { | |||
| return SetOperations.INSTANCE.intersect(this, other); | |||
| } | |||
| /** | |||
| * Returns a prefix tree trimmed to at most {@code maxLength} digits. The returned value may be | |||
| * shorter if, in the process of trimming, trailing edges are collapsed to "any digit" sequences. | |||
| * For example: | |||
| * <pre> {@code | |||
| * { "12[0-4]5", "12[5-9]" }.trim(3) == "12" | |||
| * { "7001", "70[1-9]", "7[1-9]" }.trim(3) == "7" | |||
| * }</pre> | |||
| */ | |||
| public PrefixTree trim(int maxLength) { | |||
| return PrefixTree.from( | |||
| RangeTree.from( | |||
| ranges.asRangeSpecifications().stream() | |||
| .map(s -> s.first(maxLength)) | |||
| .collect(toImmutableList()))); | |||
| } | |||
| @Override | |||
| public int hashCode() { | |||
| return ranges.hashCode(); | |||
| } | |||
| @Override | |||
| public boolean equals(Object o) { | |||
| return (o instanceof PrefixTree) && ranges.equals(((PrefixTree) o).ranges); | |||
| } | |||
| @Override | |||
| public String toString() { | |||
| return ranges.toString(); | |||
| } | |||
| private static final class TrimmingVisitor implements DfaVisitor { | |||
| static RangeTree trim(RangeTree ranges) { | |||
| if (ranges.isEmpty()) { | |||
| return ranges; | |||
| } | |||
| if (ranges.getInitial().canTerminate()) { | |||
| // Not the "empty range tree" (which matches no input), but the range tree containing the | |||
| // empty range specification (which matches only the empty digit sequence). | |||
| return RangeTree.from(RangeSpecification.empty()); | |||
| } | |||
| TrimmingVisitor v = new TrimmingVisitor(); | |||
| ranges.accept(v); | |||
| return RangeTree.from(v.paths); | |||
| } | |||
| private final List<RangeSpecification> paths = new ArrayList<>(); | |||
| private RangeSpecification path = RangeSpecification.empty(); | |||
| @Override | |||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||
| RangeSpecification oldPath = path; | |||
| path = path.extendByMask(edge.getDigitMask()); | |||
| if (target.canTerminate()) { | |||
| paths.add(path); | |||
| } else { | |||
| target.accept(this); | |||
| } | |||
| path = oldPath; | |||
| } | |||
| } | |||
| // Note: This is NOT as simple as just calling "getPrefix()" on each range specification because | |||
| // ranges that are too short become problematic. Consider { "7[1-9]", "70x" } which should result | |||
| // in "7". If we just call "getPrefix()" and merge, we end up with "7x". | |||
| // | |||
| // One way to fix this is by repeatedly creating prefix trees (removing trailing "any digit" | |||
| // sequences) until it becomes stable. | |||
| // | |||
| // The other way (simpler) is to extend the length of any shorter range specifications to bring | |||
| // them up to the max length before merging them. In the above example, we extend the length of | |||
| // "7[1-9]" to "7[1-9]x" and merge it with "70x" to get "7xx", which can then have its prefix | |||
| // extracted. | |||
| private static RangeTree removeTrailingAnyDigitPaths(RangeTree ranges) { | |||
| if (ranges.isEmpty()) { | |||
| return ranges; | |||
| } | |||
| // Skip this if "ranges" matches only one length (since it would be a no-op). | |||
| if (ranges.getLengths().size() > 1) { | |||
| int length = ranges.getLengths().last(); | |||
| ranges = ranges.map(s -> s.length() < length ? s.extendByLength(length - s.length()) : s); | |||
| } | |||
| // Having merged everything, we can now extract the correct prefixes as the final step. | |||
| return ranges.map(RangeSpecification::getPrefix); | |||
| } | |||
| /** | |||
| * Recursively determines the next level of prefix minimization. The algorithm follows as much | |||
| * of the "included" path as possible (node), potentially splitting into several sub-recursive | |||
| * steps if the current included edge overlaps with multiple "excluded" paths. Once a path no | |||
| * longer overlaps with the exclude paths, it is added to the result. Paths are also added to | |||
| * the result if they terminate while still overlapping the excluded paths. | |||
| */ | |||
| private static RangeTree recursivelyMinimize( | |||
| DfaNode node, RangeSpecification path, DfaNode exclude, RangeTree minimal, int minLength) { | |||
| for (DfaEdge edge : node.getEdges()) { | |||
| int mask = edge.getDigitMask(); | |||
| DfaNode target = edge.getTarget(); | |||
| // This algorithm only operates on the DFA of a prefix tree (not a general range tree). As | |||
| // such the only terminating node we can reach is the terminal node itself. If we hit that | |||
| // from the current edge, just emit it and continue on to the next edge. | |||
| if (target.equals(RangeTree.getTerminal())) { | |||
| minimal = minimal.union(RangeTree.from(path.extendByMask(mask))); | |||
| continue; | |||
| } | |||
| checkState(!target.canTerminate(), "invalid DFA state for prefix tree at: %s", path); | |||
| // Otherwise recurse on every "exclude" path, using the intersection of the "include" and | |||
| // "exclude" masks. Anything left on the include mask which didn't overlap any of excluded | |||
| // edges can emitted. This also works at the end of the exclude paths (exclude == TERMINAL) | |||
| // since that has no outgoing edges (so the entire include path is emitted). | |||
| for (DfaEdge ex : exclude.getEdges()) { | |||
| int m = ex.getDigitMask() & mask; | |||
| if (m != 0) { | |||
| mask &= ~m; | |||
| minimal = | |||
| recursivelyMinimize(target, path.extendByMask(m), ex.getTarget(), minimal, minLength); | |||
| } | |||
| } | |||
| // The mask identifies edges which are now outside the exclude tree, and thus safe to emit. | |||
| if (mask != 0) { | |||
| // Emitting an included path may involve emitting some of the sub-tree below it in order | |||
| // to make up the minimal length (we can't do this for the terminating case above). | |||
| minimal = emit(target, path.extendByMask(mask), minimal, minLength); | |||
| } | |||
| } | |||
| return minimal; | |||
| } | |||
| /** | |||
| * Recursively visits the sub-tree under the given node, extending the path until it reaches the | |||
| * minimum length before emitting it. | |||
| */ | |||
| private static RangeTree emit( | |||
| DfaNode node, RangeSpecification path, RangeTree minimal, int minLength) { | |||
| if (path.length() >= minLength || node.equals(RangeTree.getTerminal())) { | |||
| minimal = minimal.union(RangeTree.from(path)); | |||
| } else { | |||
| for (DfaEdge e : node.getEdges()) { | |||
| minimal = minimal.union( | |||
| emit(e.getTarget(), path.extendByMask(e.getDigitMask()), minimal, minLength)); | |||
| } | |||
| } | |||
| return minimal; | |||
| } | |||
| } | |||
| @ -0,0 +1,752 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||
| import static java.lang.Integer.numberOfLeadingZeros; | |||
| import static java.lang.Integer.numberOfTrailingZeros; | |||
| import com.google.common.collect.ContiguousSet; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.Iterables; | |||
| import com.google.common.collect.Range; | |||
| import com.google.common.collect.RangeSet; | |||
| import java.util.ArrayList; | |||
| import java.util.Arrays; | |||
| import java.util.Comparator; | |||
| import java.util.Iterator; | |||
| import java.util.List; | |||
| import java.util.Set; | |||
| /** | |||
| * A compact representation of a disjoint set of ranges of digit sequences. This is a compact way | |||
| * to represent one or many ranges of digit sequences which share the same length. Examples include: | |||
| * <pre>{@code | |||
| * "01234" --> the singleton range containing only the digit sequence "01234" | |||
| * "012xx" --> the contiguous digit sequence range ["01200".."01299"] | |||
| * "012[3-5]6xx" --> the disjoint set of contiguous digit sequence ranges | |||
| * ["0123600".."0123699"], ["0124600".."0124699"], ["0125600".."0125699"] | |||
| * }</pre> | |||
| * Note that the sets of contiguous ranges defined by a {@code RangeSpecification} are always | |||
| * mutually disjoint. | |||
| * | |||
| * <p>Range specifications have a natural prefix based lexicographical ordering (based on the | |||
| * most-significant point at which a difference appears), but if you are comparing a disjoint set | |||
| * of range specifications (e.g. from a {@link RangeTree}) then it can be more intuitive to use an | |||
| * ordering based on the minimum digit sequence, but note this approach fails if the range | |||
| * specifications can overlap (e.g. comparing "1xx" and "100"). | |||
| */ | |||
| public final class RangeSpecification implements Comparable<RangeSpecification> { | |||
| /** The mask of all possible digits. */ | |||
| public static final char ALL_DIGITS_MASK = (1 << 10) - 1; | |||
| private static final RangeSpecification EMPTY = new RangeSpecification(""); | |||
| /** Returns the empty range specification, which matches only the empty digit sequence. */ | |||
| public static RangeSpecification empty() { | |||
| return EMPTY; | |||
| } | |||
| /** Returns the range specification of length one which matches any of the given digits. */ | |||
| public static RangeSpecification singleton(Iterable<Integer> digits) { | |||
| int mask = 0; | |||
| for (int digit : digits) { | |||
| checkArgument(0 <= digit && digit <= 9, "bad digit value '%s'", digit); | |||
| mask |= (1 << digit); | |||
| } | |||
| return new RangeSpecification(String.valueOf((char) mask)); | |||
| } | |||
| /** Returns a new range specification which matches only the given non-empty digit sequence. */ | |||
| public static RangeSpecification from(DigitSequence s) { | |||
| if (s.length() == 0) { | |||
| return RangeSpecification.empty(); | |||
| } | |||
| char[] masks = new char[s.length()]; | |||
| for (int n = 0; n < masks.length; n++) { | |||
| masks[n] = (char) (1 << s.getDigit(n)); | |||
| } | |||
| return new RangeSpecification(new String(masks)); | |||
| } | |||
| /** Returns a new range specification which matches any digit sequence of the specified length. */ | |||
| public static RangeSpecification any(int length) { | |||
| checkArgument(length >= 0); | |||
| if (length == 0) { | |||
| return RangeSpecification.empty(); | |||
| } | |||
| char[] masks = new char[length]; | |||
| Arrays.fill(masks, ALL_DIGITS_MASK); | |||
| return new RangeSpecification(new String(masks)); | |||
| } | |||
| /** | |||
| * Parses the string form of a range specification (e.g. "1234[57-9]xxx"). This must be | |||
| * correctly formed, including having all ranges be well formed (e.g. not "[33]", "[3-3]" or | |||
| * "[6-4]"). | |||
| * | |||
| * <p>Note that non-canonical ranges are permitted if the digits are in order (e.g. "[1234]", | |||
| * "[4-5]" or "[0-9]" but not "[4321]"). The returned range specification is canonical (e.g. | |||
| * {@code parse("12[34569]").toString() == "12[3-69]"}). | |||
| * | |||
| * <p>The empty string is parsed as the empty range specification. | |||
| * | |||
| * <p>The use of single ASCII underscores ("_") to group ranges and aid readability is supported | |||
| * during parsing but is not retained in the parsed result (e.g. | |||
| * {@code parse("12_34[5-8]_xxx_xxx").toString() == "1234[5-8]xxxxxx"}). Note that underscore may | |||
| * not be present inside ranges (e.g. "1_4") or at the ends of the range (e.g. "123xxx_"). | |||
| */ | |||
| public static RangeSpecification parse(String s) { | |||
| if (s.isEmpty()) { | |||
| return empty(); | |||
| } | |||
| checkArgument(!s.startsWith("_") && !s.endsWith("_"), "cannot start/end with '_': %s", s); | |||
| StringBuilder bitmasks = new StringBuilder(); | |||
| boolean lastCharWasUnderscore = false; | |||
| for (int n = 0; n < s.length(); n++) { | |||
| char c = s.charAt(n); | |||
| switch (c) { | |||
| case '_': | |||
| checkArgument(!lastCharWasUnderscore, "cannot have multiple '_' in a row: %s", s); | |||
| lastCharWasUnderscore = true; | |||
| // Continue the for-loop rather than breaking out the switch to avoid resetting the flag. | |||
| continue; | |||
| case 'x': | |||
| bitmasks.append(ALL_DIGITS_MASK); | |||
| break; | |||
| case '[': | |||
| n += 1; | |||
| int end = s.indexOf(']', n); | |||
| checkArgument(end != -1, "unclosed range in specification: %s", s); | |||
| checkArgument(end > n, "empty range in specification: %s", s); | |||
| bitmasks.append(parseRange(s, n, end)); | |||
| n = end; | |||
| break; | |||
| default: | |||
| checkArgument('0' <= c && c <= '9', | |||
| "bad digit value '%s' in range specification: %s", c, s); | |||
| bitmasks.append((char) (1 << (c - '0'))); | |||
| break; | |||
| } | |||
| lastCharWasUnderscore = false; | |||
| } | |||
| return new RangeSpecification(bitmasks.toString()); | |||
| } | |||
| private static char parseRange(String s, int start, int end) { | |||
| int mask = 0; | |||
| for (int n = start; n < end;) { | |||
| char c = s.charAt(n++); | |||
| checkArgument('0' <= c && c <= '9', | |||
| "bad digit value '%s' in range specification: %s", c, s); | |||
| int shift = (c - '0'); | |||
| // check that this bit and all above it are zero (to ensure correct ordering). | |||
| checkArgument(mask >> shift == 0, "unordered range in specification: %s", s); | |||
| if (n == end || s.charAt(n) != '-') { | |||
| // Single digit not in a range. | |||
| mask |= 1 << shift; | |||
| continue; | |||
| } | |||
| n++; | |||
| checkArgument(n < end, "unclosed range in specification: %s", s); | |||
| c = s.charAt(n++); | |||
| checkArgument('0' <= c && c <= '9', | |||
| "bad digit value '%s' in range specification: %s", c, s); | |||
| int rshift = (c - '0'); | |||
| checkArgument(rshift > shift, "unordered range in specification: %s", s); | |||
| // Set bits from shift to rshift inclusive (e.g. 11111 & ~11 = 11100). | |||
| mask |= ((1 << (rshift + 1)) - 1) & ~((1 << shift) - 1); | |||
| } | |||
| return (char) mask; | |||
| } | |||
| /** | |||
| * Returns the canonical representation of the given ranges. The number of range specifications | |||
| * in the returned instance may be higher or lower than the number of given ranges. | |||
| * <p> | |||
| * NOTE: This is only used by RangeTree for generating a RangeTree from a RangeSet, and is not | |||
| * suitable as a public API (one day we might generate the RangeTree directly and be able to | |||
| * delete this code). | |||
| */ | |||
| static ImmutableList<RangeSpecification> from(RangeSet<DigitSequence> ranges) { | |||
| List<RangeSpecification> specs = new ArrayList<>(); | |||
| Set<Range<DigitSequence>> s = ranges.asRanges(); | |||
| checkArgument(!s.isEmpty(), "empty range set not permitted"); | |||
| // Make sure are ranges we use are canonicalized over the domain of DigitSequences (so Range | |||
| // operations (e.g. isConnected()) work as expected. See Range for more on why this matters. | |||
| Range<DigitSequence> cur = s.iterator().next().canonical(domain()); | |||
| checkArgument(!cur.contains(DigitSequence.empty()), | |||
| "empty digit sequence not permitted in range set"); | |||
| for (Range<DigitSequence> next : Iterables.skip(ranges.asRanges(), 1)) { | |||
| next = next.canonical(domain()); | |||
| if (cur.isConnected(next)) { | |||
| // Even though 'cur' and 'next' are both canonicalized, it's not guaranteed that they are | |||
| // closed-open (singleton ranges are fully closed and any range containing the maximum | |||
| // value must be closed. To "union" the two ranges we must also preserve the bound types. | |||
| cur = Range.range( | |||
| cur.lowerEndpoint(), cur.lowerBoundType(), | |||
| next.upperEndpoint(), next.upperBoundType()) | |||
| .canonical(domain()); | |||
| continue; | |||
| } | |||
| addRangeSpecsOf(cur, specs); | |||
| cur = next; | |||
| } | |||
| addRangeSpecsOf(cur, specs); | |||
| return ImmutableList.sortedCopyOf(Comparator.comparing(RangeSpecification::min), specs); | |||
| } | |||
| /** Adds the canonical minimal range specifications for a single range to the given list. */ | |||
| private static void addRangeSpecsOf(Range<DigitSequence> r, List<RangeSpecification> specs) { | |||
| // Given range is already canonical but may span multiple lengths. It's easier to view this | |||
| // as a contiguous set when finding first/last elements however to avoid worrying about bound | |||
| // types. A contiguous set is not an expensive class to create. | |||
| ContiguousSet<DigitSequence> s = ContiguousSet.create(r, domain()); | |||
| DigitSequence start = s.first(); | |||
| DigitSequence end = s.last(); | |||
| while (start.length() < end.length()) { | |||
| // Add <start> to "999..." for the current block length (the max domain value is all 9's). | |||
| DigitSequence blockEnd = DigitSequence.nines(start.length()); | |||
| addRangeSpecs(start, blockEnd, specs); | |||
| // Reset the start to the next length up (i.e. the "000..." sequence that's one longer). | |||
| start = blockEnd.next(); | |||
| } | |||
| // Finally and the range specs up to (and including) the end value. | |||
| addRangeSpecs(start, end, specs); | |||
| } | |||
| // Adds canonical minimal range specifications for the range of same-length digit sequences. | |||
| private static void addRangeSpecs( | |||
| DigitSequence start, DigitSequence end, List<RangeSpecification> specs) { | |||
| int length = start.length(); | |||
| checkArgument(end.length() == length); | |||
| // Masks contains a running total of the bitmasks we want to convert to RangeSpecifications. | |||
| // As processing proceeds, the mask array is reused. This is because the prefix used for | |||
| // successive range specifications is always a subset of the previous specifications and the | |||
| // trailing part of the array always fills up with the range mask for 'x' (i.e. [0-9]). | |||
| int[] masks = new int[length]; | |||
| // Stage 1: | |||
| // Starting from the last digit in the 'start' sequence, work up until we find something that | |||
| // is not a '0'. This is the first digit that needs to be adjusted to create a range | |||
| // specification covering it and the digits 'below' it. For example, the first specification | |||
| // for the range ["1200".."9999"] is "1[2-9]xx". | |||
| // Once a specification is emitted, the start value is adjusted to the next digit sequence | |||
| // immediately above the end of the emitted range, so after emitting "1[2-9]xx", start="2000". | |||
| // Once each range specification is emitted, we continue working 'up' the digit sequence until | |||
| // the next calculated start value exceeds the 'end' of our range. This specification cannot | |||
| // be emitted and signals the end of stage 1. | |||
| setBitmasks(masks, start); | |||
| for (int n = previousNon(0, start, length); n != -1; n = previousNon(0, start, n)) { | |||
| int loDigit = start.getDigit(n); | |||
| DigitSequence prefix = start.first(n); | |||
| DigitSequence blockEnd = prefix.extendBy(DigitSequence.nines(length - n)); | |||
| if (blockEnd.compareTo(end) > 0) { | |||
| // The end of this block would exceed the end of the main range, so we must stop. | |||
| break; | |||
| } | |||
| // The bitmasks we want is: | |||
| // <first (n-1) digits of 'start'> [loDigit..9] <any digits mask...> | |||
| masks[n] = bitmaskUpFrom(loDigit); | |||
| fillBitmasksAfter(masks, n); | |||
| specs.add(RangeSpecification.fromBitmasks(masks)); | |||
| // Adjust the range start now we have emitted the range specification. | |||
| start = blockEnd.next(); | |||
| } | |||
| // Stage 2: | |||
| // Very similar to stage 1, but work up from the last digit in the 'end' sequence. The | |||
| // difference now is that we look for the first digit that's not '9' and generate ranges that | |||
| // go down to the start of the range, not up to the end. Thus for ["0000", "1299"] the first | |||
| // specification generated is "1[0-2]xx", which is emitted at the end of the list. | |||
| int midIdx = specs.size(); | |||
| setBitmasks(masks, end); | |||
| for (int n = previousNon(9, end, length); n != -1; n = previousNon(9, end, n)) { | |||
| int hiDigit = end.getDigit(n); | |||
| DigitSequence prefix = end.first(n); | |||
| DigitSequence blockStart = prefix.extendBy(DigitSequence.zeros(length - n)); | |||
| if (blockStart.compareTo(start) < 0) { | |||
| // The start of this block would precede the start of the main range, so we must stop. | |||
| break; | |||
| } | |||
| // The bitmasks we want is: | |||
| // <first (n-1) digits of 'end'> [0..hiDigit] <any digits mask...> | |||
| masks[n] = bitmaskDownFrom(hiDigit); | |||
| fillBitmasksAfter(masks, n); | |||
| specs.add(midIdx, RangeSpecification.fromBitmasks(masks)); | |||
| // Adjust the range end now we have emitted the range specification. | |||
| end = blockStart.previous(); | |||
| } | |||
| // Stage 3: Having emitted the first and last set of range specifications, it only remains to | |||
| // emit the "center" specification in the middle of the list. This is special as neither bound | |||
| // is the end of a block. In previous stages, all partial ranges are either "up to 9" or | |||
| // "down to zero". For example: ["1234".."1789"] has the center range "1[3-6]xx", and | |||
| // ["1234".."1345"] has no center range at all. | |||
| if (start.compareTo(end) < 0) { | |||
| // Find the last digit before start and end combine (ie, 1200, 1299 --> 12xx --> n=1). We | |||
| // know that 'start' and 'end' are the same length and bound a range like: | |||
| // <prefix> [X..Y] [000..999] | |||
| // but X or Y could be 0 or 9 respectively (just not both). | |||
| // | |||
| // Note that we don't even both to test the first digit in the sequences because if 'start' | |||
| // and 'end' span a full range (e.g. [000.999]) we can just use the same code to fill the | |||
| // masks correctly anyway. | |||
| int n = start.length(); | |||
| while (--n > 0 && start.getDigit(n) == 0 && end.getDigit(n) == 9) {} | |||
| // Bitwise AND the masks for [X..9] and [0..Y] to get the mask for [X..Y]. | |||
| // Note that the "masks" array already contains the correct prefix digits up to (n-1). | |||
| masks[n] = bitmaskUpFrom(start.getDigit(n)) & bitmaskDownFrom(end.getDigit(n)); | |||
| fillBitmasksAfter(masks, n); | |||
| specs.add(midIdx, RangeSpecification.fromBitmasks(masks)); | |||
| } | |||
| } | |||
| // Sets the values in the given array to correspond to the digits in the given sequence. If a | |||
| // range specification were made from the resulting array it would match only that digit sequence. | |||
| private static void setBitmasks(int[] masks, DigitSequence s) { | |||
| for (int n = 0; n < s.length(); n++) { | |||
| masks[n] = 1 << s.getDigit(n); | |||
| } | |||
| } | |||
| /** | |||
| * Creates a range specification from a given array of integer masks. The Nth element of the | |||
| * array corresponds to the Nth element in the range specification, and mask values must be | |||
| * non-zero and have only bits 0 to 9 set. | |||
| */ | |||
| private static RangeSpecification fromBitmasks(int[] bitmasks) { | |||
| checkArgument(bitmasks.length <= DigitSequence.MAX_DIGITS, | |||
| "range specification too large"); | |||
| StringBuilder s = new StringBuilder(bitmasks.length); | |||
| s.setLength(bitmasks.length); | |||
| for (int n = 0; n < bitmasks.length; n++) { | |||
| int mask = bitmasks[n]; | |||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask); | |||
| s.setCharAt(n, (char) mask); | |||
| } | |||
| return new RangeSpecification(s.toString()); | |||
| } | |||
| // Fills the bitmasks after the given index with the "all digits" mask (i.e. matching [0-9]). | |||
| // This can accept -1 as the index since it always pre-increments before using it. | |||
| private static void fillBitmasksAfter(int[] masks, int n) { | |||
| // Because of the iterative way the mask array is handled, we can stop filling when we hit | |||
| // ALL_DIGITS_MASK because everything past that must already be filled. | |||
| while (++n < masks.length && masks[n] != ALL_DIGITS_MASK) { | |||
| masks[n] = ALL_DIGITS_MASK; | |||
| } | |||
| } | |||
| // Starting at digit-N, returns the index of the nearest preceding digit that's not equal to the | |||
| // given value (or -1 if no such digit exists). | |||
| private static int previousNon(int digit, DigitSequence s, int n) { | |||
| while (--n >= 0 && s.getDigit(n) == digit) {} | |||
| return n; | |||
| } | |||
| /** Returns the bitmask for the range {@code [n-9]}. */ | |||
| private static int bitmaskUpFrom(int n) { | |||
| return (-1 << n) & ALL_DIGITS_MASK; | |||
| } | |||
| /** Returns the bitmask for the range {@code [0-n]}. */ | |||
| private static int bitmaskDownFrom(int n) { | |||
| return ALL_DIGITS_MASK >>> (9 - n); | |||
| } | |||
| // String containing one bitmasks per character (bits 0..9). | |||
| private final String bitmasks; | |||
| // Minimum and maximum sequences (inclusive) which span the ranges defined by this specification. | |||
| // Caching this is deliberate, since we sort disjoint ranges using the minimum value. It might | |||
| // not be so useful to cache the maximum value though. | |||
| private final DigitSequence min; | |||
| private final DigitSequence max; | |||
| // Total number of sequences matched by this specification. | |||
| private final long sequenceCount; | |||
| private RangeSpecification(String bitmasks) { | |||
| int length = bitmasks.length(); | |||
| checkArgument(length <= DigitSequence.MAX_DIGITS, | |||
| "Range specification too long (%s digits)", length); | |||
| this.bitmasks = bitmasks; | |||
| long minValue = 0; | |||
| long maxValue = 0; | |||
| long sequenceCount = 1; | |||
| for (int n = 0; n < length; n++) { | |||
| int mask = bitmasks.charAt(n); | |||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask); | |||
| minValue = (minValue * 10) + numberOfTrailingZeros(mask); | |||
| maxValue = (maxValue * 10) + (31 - numberOfLeadingZeros(mask)); | |||
| sequenceCount *= Integer.bitCount(mask); | |||
| } | |||
| this.min = new DigitSequence(length, minValue); | |||
| this.max = new DigitSequence(length, maxValue); | |||
| this.sequenceCount = sequenceCount; | |||
| } | |||
| /** | |||
| * Returns the number of digits that this specification can match. This is the length of all | |||
| * digit sequences which can match this specification. | |||
| */ | |||
| public int length() { | |||
| return bitmasks.length(); | |||
| } | |||
| /** Returns the smallest digit sequence matched by this range. */ | |||
| public DigitSequence min() { | |||
| return min; | |||
| } | |||
| /** Returns the largest digit sequence matched by this range. */ | |||
| public DigitSequence max() { | |||
| return max; | |||
| } | |||
| /** Returns the total number of digit sequences matched by (contained in) this specification. */ | |||
| public long getSequenceCount() { | |||
| return sequenceCount; | |||
| } | |||
| /** | |||
| * Returns the bitmask of the Nth range in this specification. Bit-X (0<= X <= 9) corresponds to | |||
| * the digit with value X. As every range in a specification must match at least one digit, this | |||
| * mask can never be zero. | |||
| */ | |||
| public int getBitmask(int n) { | |||
| return bitmasks.charAt(n); | |||
| } | |||
| /** | |||
| * Returns whether the given digit sequence is in one of the ranges specified by this instance. | |||
| * This is more efficient that obtaining the associated {@code RangeSet} and checking that. | |||
| */ | |||
| public boolean matches(DigitSequence digits) { | |||
| if (digits.length() != length()) { | |||
| return false; | |||
| } | |||
| for (int n = 0; n < length(); n++) { | |||
| if ((bitmasks.charAt(n) & (1 << digits.getDigit(n))) == 0) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| // Returns the next sequence in forward order which is contained by a range defined by this | |||
| // range specification, or null if none exists. The given sequence must not be matched by this | |||
| // specification. | |||
| private DigitSequence nextRangeStart(DigitSequence s) { | |||
| // Easy length based checks (this is where the fact that range specification only define ranges | |||
| // of the same length really simplifies things). | |||
| if (s.length() < length()) { | |||
| return min(); | |||
| } else if (s.length() > length()) { | |||
| return null; | |||
| } | |||
| // Algorithm: | |||
| // 1) Find the highest digit that isn't in the corresponding bitmask for the range. | |||
| // 2) Try and increase the digit value until it's inside the next available range. | |||
| // 3) If that fails, move back up the sequence and increment the next digit up. | |||
| // 4) Repeat until a digit can be adjusted to start a new range, or all digits are exhausted. | |||
| // If all digits exhausted, the sequence was above all ranges in this specification. | |||
| // Otherwise return a new sequence using the unchanged prefix of the original sequence, the | |||
| // newly adjusted digit and the trailing digits of the minimal sequence. | |||
| for (int n = 0; n < length(); n++) { | |||
| int d = s.getDigit(n); | |||
| int mask = bitmasks.charAt(n); | |||
| if ((mask & (1 << d)) != 0) { | |||
| continue; | |||
| } | |||
| while (true) { | |||
| // Digit 'd' is either outside the range mask (first time though the loop) or inside a | |||
| // range. Either way we want to find the next digit above it which is inside a range. | |||
| // First increment 'd', and then find the next set bit in the mask at or above that point. | |||
| // Not extra check is needed at the end of ranges because numberOfTrailingZeros(0)==32 | |||
| // which neatly ensures that the new value of 'd' must be out-of-range. | |||
| // If mask=[3-58]: d=1-->d'=3, d=4-->d'=5, d=5-->d'=8, d=8-->d'>9 | |||
| d++; | |||
| d += numberOfTrailingZeros(mask >>> d); | |||
| if (d <= 9) { | |||
| // Found the value of the largest digit which can be adjusted to start the next range. | |||
| // Everything higher than this digit is the same as the original sequence and everything | |||
| // lower that this digit is the same as the corresponding digit in the minimal value. | |||
| return s.first(n).extendBy(d).extendBy(min.last((length() - n) - 1)); | |||
| } | |||
| // No more bits available in this range, so go back up to the previous range. | |||
| if (--n < 0) { | |||
| // The sequence was above the last element in the set. | |||
| // Example: Range Spec: 1[2-8][3-8]456, Sequence: 188457 | |||
| return null; | |||
| } | |||
| d = s.getDigit(n); | |||
| mask = bitmasks.charAt(n); | |||
| } | |||
| } | |||
| // If we finish the outer loop the given sequence was in a range (which is an error). | |||
| throw new IllegalArgumentException( | |||
| "Digit sequence '" + s + "' is in the range specified by: " + this); | |||
| } | |||
| // Given a sequence inside a range defined by this specification, return the highest sequence | |||
| // in the current range (possibly just the given sequence). | |||
| private DigitSequence currentRangeEnd(DigitSequence s) { | |||
| // Build up a value representing the trailing digits (which must always be 9's). | |||
| long nines = 0; | |||
| for (int n = length() - 1; n >= 0; n--, nines = (10 * nines) + 9) { | |||
| int mask = bitmasks.charAt(n); | |||
| if (mask == ALL_DIGITS_MASK) { | |||
| continue; | |||
| } | |||
| // The new digit is the top of the current range that the current sequence digit is in. | |||
| int d = nextUnsetBit(mask, s.getDigit(n)) - 1; | |||
| DigitSequence end = | |||
| s.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines)); | |||
| // Edge case for cases like "12[34][09]x" where "1239x" and "1240x" abut. This adjustment | |||
| // will happen at most once because the second range cannot also include an upper bound | |||
| // ending at '9', since otherwise (mask == ALL_DIGITS_MASK) at this position. The next | |||
| // sequence must be terminated with zeros starting at the current position having "rolled | |||
| // over" on the digit above. | |||
| if (d == 9) { | |||
| DigitSequence next = end.next(); | |||
| if (matches(next)) { | |||
| d = nextUnsetBit(mask, 0) - 1; | |||
| end = next.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines)); | |||
| } | |||
| } | |||
| return end; | |||
| } | |||
| // The range specification is entirely 'x', which means it's a single range. | |||
| return max; | |||
| } | |||
| /** | |||
| * Returns a generating iterator which iterates in forward order over the disjoint ranges defined | |||
| * by this specification. This is not actually as useful as you might expect because in a lot of | |||
| * cases you would be dealing with a sequence of range specifications and it's not true that all | |||
| * ranges from multiple specifications are disjoint. | |||
| */ | |||
| Iterable<Range<DigitSequence>> asRanges() { | |||
| return () -> new Iterator<Range<DigitSequence>>() { | |||
| // Start is always in a range. | |||
| private DigitSequence start = min; | |||
| @Override | |||
| public boolean hasNext() { | |||
| return start != null; | |||
| } | |||
| @Override | |||
| public Range<DigitSequence> next() { | |||
| DigitSequence end = currentRangeEnd(start); | |||
| Range<DigitSequence> r = Range.closed(start, end).canonical(DigitSequence.domain()); | |||
| start = nextRangeStart(end.next()); | |||
| return r; | |||
| } | |||
| }; | |||
| } | |||
| /** | |||
| * Returns a new range specification which is extended by the given mask value. For example: | |||
| * <pre>{@code | |||
| * "0123[4-6]".extendByMask(7) == "0123[4-6][0-2]" | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification extendByMask(int mask) { | |||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "bad mask value '%s'", mask); | |||
| return new RangeSpecification(bitmasks + ((char) mask)); | |||
| } | |||
| /** | |||
| * Returns a new range specification which is extended by the given specification. For example: | |||
| * <pre>{@code | |||
| * "0123[4-6]".extendBy("7[89]") == "0123[4-6]7[89]" | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification extendBy(RangeSpecification extra) { | |||
| return new RangeSpecification(bitmasks + extra.bitmasks); | |||
| } | |||
| /** | |||
| * Returns a new range specification which is extended by a sequence of any digits of the given | |||
| * length. For example: | |||
| * <pre>{@code | |||
| * "012".extendByLength(4) == "012xxxx" | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification extendByLength(int length) { | |||
| return this.extendBy(any(length)); | |||
| } | |||
| /** | |||
| * Returns a range specification containing only the first {@code n} digits. If the given length | |||
| * is the same or greater than the specification's length, this specification is returned. | |||
| * For example: | |||
| * <pre>{@code | |||
| * "01[2-4]xx".first(8) == "01[2-4]xx" (same instance) | |||
| * "01[2-4]xx".first(5) == "01[2-4]xx" (same instance) | |||
| * "01[2-4]xx".first(3) == "01[2-4]" | |||
| * "01[2-4]xx".first(0) == "" (the empty specification) | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification first(int n) { | |||
| checkArgument(n >= 0); | |||
| if (n == 0) { | |||
| return empty(); | |||
| } | |||
| return n < length() ? new RangeSpecification(bitmasks.substring(0, n)) : this; | |||
| } | |||
| /** | |||
| * Returns a range specification containing only the last {@code n} digits. If the given length | |||
| * is the same or greater than the specification's length, this specification is returned. | |||
| * For example: | |||
| * <pre>{@code | |||
| * "01[2-4]xx".last(8) == "01[2-4]xx" (same instance) | |||
| * "01[2-4]xx".last(5) == "01[2-4]xx" (same instance) | |||
| * "01[2-4]xx".last(3) == "[2-4]xx" | |||
| * "01[2-4]xx".last(0) == "" (the empty specification) | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification last(int n) { | |||
| checkArgument(n >= 0); | |||
| if (n == 0) { | |||
| return empty(); | |||
| } | |||
| return n < length() ? new RangeSpecification(bitmasks.substring(length() - n)) : this; | |||
| } | |||
| /** | |||
| * Returns a range specification with any trailing "any digit" sequence removed. For example: | |||
| * <pre>{@code | |||
| * "0123".getPrefix() == "0123" (same instance) | |||
| * "0123xx".getPrefix() == "0123" | |||
| * "xxx".getPrefix() == "" (the empty specification) | |||
| * }</pre> | |||
| */ | |||
| public RangeSpecification getPrefix() { | |||
| int length = length(); | |||
| while (length > 0 && getBitmask(length - 1) == ALL_DIGITS_MASK) { | |||
| length--; | |||
| } | |||
| return first(length); | |||
| } | |||
| @Override | |||
| public int compareTo(RangeSpecification other) { | |||
| int length = Math.min(length(), other.length()); | |||
| for (int i = 0; i < length; i++) { | |||
| int mask = getBitmask(i); | |||
| int otherMask = other.getBitmask(i); | |||
| if (mask == otherMask) { | |||
| continue; | |||
| } | |||
| int commonBits = mask & otherMask; | |||
| mask -= commonBits; | |||
| otherMask -= commonBits; | |||
| // At least one mask is still non-zero and they don't overlap. | |||
| // | |||
| // The mask with the lowest set bit is the smaller mask in the ordering, since that bit | |||
| // distinguishes a smaller prefix than can never exist in the other specification. | |||
| // Testing the number of trailing zeros is equivalent to finding the lowest set bit. | |||
| return Integer.compare(numberOfTrailingZeros(mask), numberOfTrailingZeros(otherMask)); | |||
| } | |||
| return Integer.compare(length(), other.length()); | |||
| } | |||
| @Override | |||
| public boolean equals(Object o) { | |||
| return (o instanceof RangeSpecification) && bitmasks.equals(((RangeSpecification) o).bitmasks); | |||
| } | |||
| @Override | |||
| public int hashCode() { | |||
| return bitmasks.hashCode(); | |||
| } | |||
| /** | |||
| * If you want lexicographical ordering of range specifications, don't use this method, use the | |||
| * {@code min().toString()}. This works assuming the ranges being compared are disjoint. | |||
| */ | |||
| @Override | |||
| public String toString() { | |||
| // Consider caching if it turns out that we are serializing a lot of these. | |||
| StringBuilder s = new StringBuilder(); | |||
| for (int n = 0; n < bitmasks.length(); n++) { | |||
| appendMask(bitmasks.charAt(n), s); | |||
| } | |||
| return s.toString(); | |||
| } | |||
| /** Returns the string representation of a single bit-mask. */ | |||
| public static String toString(int bitMask) { | |||
| checkArgument(bitMask > 0 && bitMask < (1 << 10), "bad mask value: %s", bitMask); | |||
| return appendMask(bitMask, new StringBuilder()).toString(); | |||
| } | |||
| static StringBuilder appendMask(int mask, StringBuilder out) { | |||
| if (mask == ALL_DIGITS_MASK) { | |||
| out.append('x'); | |||
| } else if (hasOneBit(mask)) { | |||
| out.append(asChar(numberOfTrailingZeros(mask))); | |||
| } else { | |||
| out.append('['); | |||
| for (int loBit = numberOfTrailingZeros(mask); | |||
| loBit != 32; | |||
| loBit = numberOfTrailingZeros(mask)) { | |||
| // Always append the loBit digit into the range. | |||
| out.append(asChar(loBit)); | |||
| int hiBit = nextUnsetBit(mask, loBit); | |||
| int numBits = hiBit - loBit; | |||
| if (numBits > 1) { | |||
| // Stylistically prefer "[34]" to "[3-4]" for compactness. | |||
| if (numBits > 2) { | |||
| out.append('-'); | |||
| } | |||
| out.append(asChar(hiBit - 1)); | |||
| } | |||
| // Clear the bits we've just processed before going back round the loop. | |||
| mask &= ~((1 << hiBit) - 1); | |||
| } | |||
| out.append(']'); | |||
| } | |||
| return out; | |||
| } | |||
| // Turns a value in the range [0-9] into the corresponding ASCII character. | |||
| private static char asChar(int digit) { | |||
| return (char) ('0' + digit); | |||
| } | |||
| // Determines if the given bit-mask has only one bit set. | |||
| private static boolean hasOneBit(int mask) { | |||
| return (mask & (mask - 1)) == 0; | |||
| } | |||
| private static int nextUnsetBit(int mask, int bit) { | |||
| // Example mask transform for [013-589] if bit=3: | |||
| // v-- bit=3 | |||
| // 01100111011 | |||
| // 00000000111 (1 << 3) - 1 | |||
| // 01100111111 OR with mask | |||
| // 10011000000 Bitwise NOT | |||
| // ^-- return=6 | |||
| return numberOfTrailingZeros(~(mask | ((1 << bit) - 1))); | |||
| } | |||
| } | |||
| @ -0,0 +1,194 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| /** | |||
| * Factor a range tree into a sequence of trees which attempts to minimize overall complexity in | |||
| * the face of non-determinism. This can be used to reduce the size of any generated regular | |||
| * expressions. | |||
| */ | |||
| public final class RangeTreeFactorizer { | |||
| /** Strategies to control how merging is achieved when building factors.*/ | |||
| public enum MergeStrategy { | |||
| /** | |||
| * Edges are only merged if they accept exactly the same set of digits. If the existing factor | |||
| * contains "[0-5]" it will not be merged with the candidate edge "[0-8]". | |||
| */ | |||
| REQUIRE_EQUAL_EDGES, | |||
| /** | |||
| * Edges can be merged if the candidate edge accepts more digits than the existing edge. If the | |||
| * existing factor contains "[0-5]" and the candidate edge is "[0-8]", the candidate edge is | |||
| * split so that "[0-5]" is merged as normal and an additional edge "[6-8]" is branched off. | |||
| */ | |||
| ALLOW_EDGE_SPLITTING, | |||
| } | |||
| /** | |||
| * Factors the given range tree. | |||
| * <p> | |||
| * Paths are processed longest-first, and a path belongs in particular "factor" if it can be | |||
| * added without "causing a split" in the existing factor. For example, given an existing factor | |||
| * {@code {"12[3-6]x", "45xx"}}: | |||
| * <ul> | |||
| * <li>The path "12[3-6]" can be added, since it is a prefix of one of the existing paths in | |||
| * the DFA. | |||
| * <li>The path "13xx" can be added since it forms a new branch in the DFA, which does not | |||
| * affect any existing branches ("13..." is disjoint with "12..."). | |||
| * <li>The path "12[34]" cannot be added since it would "split" the existing path | |||
| * "12[3-6]x" in the DFA ("[34]" is a subset of "[3-6]"). " | |||
| * <li>Depending on the merge strategy, the path "12[0-6]x" might be added ("[0-6]" is a | |||
| * superset of "[3-6]"). See {@link MergeStrategy} for more information. | |||
| * </ul> | |||
| */ | |||
| public static ImmutableList<RangeTree> factor(RangeTree ranges, MergeStrategy strategy) { | |||
| // If only one length on all paths, the DFA is already "factored". | |||
| if (ranges.getLengths().size() == 1) { | |||
| return ImmutableList.of(ranges); | |||
| } | |||
| List<RangeTree> factors = new ArrayList<>(); | |||
| // Start with the "naive" factors (splitting by length) from longest to shortest. | |||
| for (int n : ranges.getLengths().descendingSet()) { | |||
| factors.add(ranges.intersect(RangeTree.from(RangeSpecification.any(n)))); | |||
| } | |||
| // Now attempt to merge as much of each of the shorter factors as possible into the longer ones. | |||
| // In each loop we subsume a candidate factor into previous factors, either in whole or in part. | |||
| int index = 1; | |||
| while (index < factors.size()) { | |||
| // Merge (as much as possible) each "naive" factor into earlier factors. | |||
| RangeTree r = factors.get(index); | |||
| for (int n = 0; n < index && !r.isEmpty(); n++) { | |||
| RangeTree merged = new RangeTreeFactorizer(factors.get(n), strategy).mergeFrom(r); | |||
| factors.set(n, merged); | |||
| // Calculate the ranges which haven't yet been merged into any earlier factor. | |||
| r = r.subtract(merged); | |||
| } | |||
| if (r.isEmpty()) { | |||
| // All ranges merged, so remove the original factor (index now references the next factor). | |||
| factors.remove(index); | |||
| } else { | |||
| // We have some un-factorable ranges which are kept to start a new factor. | |||
| factors.set(index, r); | |||
| index++; | |||
| } | |||
| } | |||
| return ImmutableList.copyOf(factors); | |||
| } | |||
| // This is modified as paths are added. | |||
| private RangeTree factor; | |||
| private final MergeStrategy strategy; | |||
| RangeTreeFactorizer(RangeTree factor, MergeStrategy strategy) { | |||
| this.factor = checkNotNull(factor); | |||
| this.strategy = strategy; | |||
| } | |||
| RangeTree mergeFrom(RangeTree ranges) { | |||
| recursivelyMerge(ranges.getInitial(), factor.getInitial(), RangeSpecification.empty()); | |||
| return factor; | |||
| } | |||
| void recursivelyMerge(DfaNode srcNode, DfaNode dstNode, RangeSpecification path) { | |||
| if (srcNode.canTerminate()) { | |||
| factor = factor.union(RangeTree.from(path)); | |||
| } else { | |||
| srcNode.accept(new FactoringVisitor(dstNode, path)); | |||
| } | |||
| } | |||
| private final class FactoringVisitor implements DfaVisitor { | |||
| private final RangeSpecification path; | |||
| private final DfaNode dstNode; | |||
| // True if we encountered a situation when an edge we are merging (srcMask) has a partial | |||
| // overlap with the existing edge (dstMask) (e.g. merging "[0-6]" into "[4-9]"). This is | |||
| // distinct from the case where the existing edge is a subset of the edge being merged (e.g. | |||
| // merging "[0-6]" into "[2-4]", where the edge being merged can be split into "[0156]" and | |||
| // "[2-4]"). In either strategy, a partial overlap will prevent merging. | |||
| private boolean partialOverlap = false; | |||
| // Records the union of all edge ranges visited for the current node. This is used to determine | |||
| // the remaining edges that must be added after visiting the existing factor (especially in the | |||
| // case of ALLOW_EDGE_SPLITTING). | |||
| private int allDstMask = 0; | |||
| FactoringVisitor(DfaNode dstNode, RangeSpecification path) { | |||
| this.dstNode = dstNode; | |||
| this.path = path; | |||
| } | |||
| @Override | |||
| public void visit(DfaNode source, DfaEdge srcEdge, DfaNode srcTarget) { | |||
| int srcMask = srcEdge.getDigitMask(); | |||
| dstNode.accept((s, dstEdge, dstTarget) -> { | |||
| int dstMask = dstEdge.getDigitMask(); | |||
| if ((strategy == REQUIRE_EQUAL_EDGES) ? (dstMask == srcMask) : (dstMask & ~srcMask) == 0) { | |||
| // The set of digits accepted by the edge being merged (mask) is equal-to or a superset | |||
| // of the digits of the edge in the factor we are merging into. The path is extended by | |||
| // the destination edge because during recursion we only follow paths already in the | |||
| // factor. | |||
| recursivelyMerge(srcTarget, dstTarget, path.extendByMask(dstMask)); | |||
| } else { | |||
| partialOverlap |= (dstMask & srcMask) != 0; | |||
| } | |||
| allDstMask |= dstMask; | |||
| }); | |||
| if (!partialOverlap) { | |||
| // Work out the digits that weren't in any of the edges of the factor we were processing | |||
| // and merge the sub-tree under that edge into the current factor. For REQUIRE_EQUAL_EDGES | |||
| // the extraMask is always either srcMask or 0 (since the edge was either added in full, | |||
| // or disjoint with all the existing edges). For ALLOW_EDGE_SPLITTING it's the remaining | |||
| // range that wasn't merged with any of the existing paths. | |||
| int extraMask = srcMask & ~allDstMask; | |||
| if (extraMask != 0) { | |||
| new MergingVisitor(path).recurse(srcTarget, extraMask); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| private final class MergingVisitor implements DfaVisitor { | |||
| private final RangeSpecification path; | |||
| MergingVisitor(RangeSpecification path) { | |||
| this.path = checkNotNull(path); | |||
| } | |||
| void recurse(DfaNode node, int mask) { | |||
| RangeSpecification newPath = path.extendByMask(mask); | |||
| if (node.canTerminate()) { | |||
| factor = factor.union(RangeTree.from(newPath)); | |||
| } else { | |||
| node.accept(new MergingVisitor(newPath)); | |||
| } | |||
| } | |||
| @Override | |||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||
| recurse(target, edge.getDigitMask()); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,112 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.base.CaseFormat.LOWER_CAMEL; | |||
| import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PAGER; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PERSONAL_NUMBER; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UAN; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOICEMAIL; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOIP; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PAGER; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PERSONAL_NUMBER; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PREMIUM_RATE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_SHARED_COST; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_TOLL_FREE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UAN; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOICEMAIL; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOIP; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.common.collect.ImmutableBiMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||
| import java.util.Optional; | |||
| import java.util.stream.Stream; | |||
| /** Static utility for conversion of number types. */ | |||
| public final class Types { | |||
| private static final ImmutableBiMap<String, XmlNumberType> XML_TYPE_MAP = | |||
| Stream.of(XmlNumberType.values()) | |||
| .filter(t -> t != XML_UNKNOWN && t != XmlNumberType.UNRECOGNIZED) | |||
| .collect(toImmutableBiMap(Types::toXmlName, identity())); | |||
| // Map the subset of XmlNumberType values which correspond to valid number types. Note that while | |||
| // FIXED_LINE and MOBILE exist in both types, and can be converted, their semantics change. | |||
| private static final ImmutableBiMap<XmlNumberType, ValidNumberType> XML_TO_SCHEMA_TYPE_MAP = | |||
| ImmutableBiMap.<XmlNumberType, ValidNumberType>builder() | |||
| .put(XML_FIXED_LINE, FIXED_LINE) | |||
| .put(XML_MOBILE, MOBILE) | |||
| .put(XML_PAGER, PAGER) | |||
| .put(XML_TOLL_FREE, TOLL_FREE) | |||
| .put(XML_PREMIUM_RATE, PREMIUM_RATE) | |||
| .put(XML_SHARED_COST, SHARED_COST) | |||
| .put(XML_PERSONAL_NUMBER, PERSONAL_NUMBER) | |||
| .put(XML_VOIP, VOIP) | |||
| .put(XML_UAN, UAN) | |||
| .put(XML_VOICEMAIL, VOICEMAIL) | |||
| .build(); | |||
| /** Returns the set of valid XML type names. */ | |||
| public static ImmutableSet<String> getXmlNames() { | |||
| return XML_TYPE_MAP.keySet(); | |||
| } | |||
| /** Returns the XML element name based on the given XML range type. */ | |||
| public static String toXmlName(XmlNumberType type) { | |||
| checkState(type.name().startsWith("XML_"), "Bad type: %s", type); | |||
| return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(4)); | |||
| } | |||
| /** Returns the XML element name based on the given XML shortcode type. */ | |||
| public static String toXmlName(XmlShortcodeType type) { | |||
| checkState(type.name().startsWith("SC_"), "Bad type: %s", type); | |||
| return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(3)); | |||
| } | |||
| /** | |||
| * Returns the XML range type based on the given case-sensitive XML element name (e.g. | |||
| * "fixedLine"). | |||
| */ | |||
| public static Optional<XmlNumberType> forXmlName(String xmlName) { | |||
| return Optional.ofNullable(XML_TYPE_MAP.get(xmlName)); | |||
| } | |||
| /** Returns the {@code ValidNumberType} equivalent of the given XML range type (if it exists). */ | |||
| public static Optional<ValidNumberType> toSchemaType(XmlNumberType rangeType) { | |||
| return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.get(rangeType)); | |||
| } | |||
| /** Returns the {@code XmlNumberType} equivalent of the given schema range type (if it exists). */ | |||
| public static Optional<XmlNumberType> toXmlType(ValidNumberType schemaType) { | |||
| return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.inverse().get(schemaType)); | |||
| } | |||
| private Types() {} | |||
| } | |||
| @ -0,0 +1,99 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static java.util.Comparator.comparing; | |||
| import static java.util.Comparator.naturalOrder; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.ibm.icu.util.ULocale; | |||
| import java.util.Comparator; | |||
| import java.util.regex.Pattern; | |||
| /** | |||
| * A simple type-safe identifier for CLDR regions for phone numbers. Only basic checking of regions | |||
| * is performed, but this should be fine since the set of input regions is tightly controlled. | |||
| * | |||
| * <p>The metadata tooling makes only minimal use of the semantics of region codes, relying on | |||
| * them mainly as key values, and never tries to canonicalize or modify them. | |||
| */ | |||
| @AutoValue | |||
| public abstract class PhoneRegion implements Comparable<PhoneRegion> { | |||
| // We limit the non XX region codes to just "world" for this project. | |||
| private static final Pattern VALID_CODE = Pattern.compile("[A-Z]{2}|001"); | |||
| // Since we want "ZZ" < "001" in the ordering. | |||
| private static Comparator<PhoneRegion> ORDERING = | |||
| comparing(r -> r.locale().getCountry(), | |||
| comparing(String::length).thenComparing(naturalOrder())); | |||
| private static final PhoneRegion UNKNOWN = of("ZZ"); | |||
| private static final PhoneRegion WORLD = of("001"); | |||
| /** Returns the "world" region (001). */ | |||
| public static PhoneRegion getWorld() { | |||
| return PhoneRegion.WORLD; | |||
| } | |||
| /** Returns the "unknown" region (ZZ). */ | |||
| public static PhoneRegion getUnknown() { | |||
| return PhoneRegion.UNKNOWN; | |||
| } | |||
| /** | |||
| * Returns the region identified by the given case-insensitive CLDR String representation. | |||
| * | |||
| * @throws IllegalArgumentException if there is no region for {@code cldrCode} | |||
| */ | |||
| public static PhoneRegion of(String cldrCode) { | |||
| checkArgument(VALID_CODE.matcher(cldrCode).matches(), "invalid region code: %s", cldrCode); | |||
| return new AutoValue_PhoneRegion(new ULocale.Builder().setRegion(cldrCode).build()); | |||
| } | |||
| @Override | |||
| public int compareTo(PhoneRegion other) { | |||
| return ORDERING.compare(this, other); | |||
| } | |||
| /** Returns the string representation for the region (either a two-letter or three-digit code). */ | |||
| @Override public final String toString() { | |||
| String s = locale().getCountry(); | |||
| checkArgument(!s.isEmpty(), "invalid (empty) country: %s", locale()); | |||
| return s; | |||
| } | |||
| // Visible for AutoValue only. | |||
| abstract ULocale locale(); | |||
| /** | |||
| * Return an English identifier for the region in the form {@code "<region name> (<cldr code>)"}. | |||
| * If the English name is not available, then {@code "Region: <cldr code>"} is returned. This | |||
| * This string is only suitable for use in comments. | |||
| * | |||
| * @throws IllegalStateException if this method is called on the "world" region. | |||
| */ | |||
| public String getEnglishNameForXmlComments() { | |||
| checkState(!equals(getWorld()), "cannot ask for display name of 'world' region"); | |||
| String regionStr = locale().getCountry(); | |||
| // Use "US" so we get "en_US", and not just "en", since the policy is to use the name as it | |||
| // would appear in America. | |||
| String displayCountry = locale().getDisplayCountry(ULocale.US); | |||
| return !displayCountry.isEmpty() && !displayCountry.equals(regionStr) | |||
| ? String.format("%s (%s)", displayCountry, regionStr) | |||
| : String.format("Region: %s", regionStr); | |||
| } | |||
| } | |||
| @ -0,0 +1,60 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import com.google.auto.value.AutoValue; | |||
| import java.util.regex.Pattern; | |||
| /** | |||
| * A simple type-safe identifier for BCP 47 language tags containing only language code and an | |||
| * optional script (e.g. "en" or "zh-Hant"). This class does no canonicalization on the values its | |||
| * given, apart from normalizing the separator to a hyphen. | |||
| * | |||
| * <p>We can't really use {@code Locale} here because there's an issue whereby the JDK deliberately | |||
| * uses deprecated language tags and would, for example, convert "id" (Indonesian) to "in", which | |||
| * is at odds with BCP 47. See {@link java.util.Locale#forLanguageTag(String) forLanguageTag()} for | |||
| * more information. | |||
| * | |||
| * <p>The metadata tooling makes only minimal use of the semantics of language codes, relying on | |||
| * them mainly as key values, and never tries to canonicalize or modify them (i.e. it is possible | |||
| * that a language code used for this data may end up being non-canonical). It is up to any library | |||
| * which loads the metadata at runtime to ensure that its mappings to the data account for current | |||
| * canonicalization. | |||
| */ | |||
| @AutoValue | |||
| public abstract class SimpleLanguageTag { | |||
| // This can be extended or modified to use Locale as necessary. | |||
| private static final Pattern SIMPLE_TAG = Pattern.compile("[a-z]{2,3}(?:[-_][A-Z][a-z]{3})?"); | |||
| /** | |||
| * Returns a language tag instance for the given string with minimal structural checking. If the | |||
| * given tag uses {@code '_'} for separating language and script it's converted into {@code '-'}. | |||
| */ | |||
| public static SimpleLanguageTag of(String lang) { | |||
| checkArgument(SIMPLE_TAG.matcher(lang).matches(), "invalid language tag: %s", lang); | |||
| return new AutoValue_SimpleLanguageTag(lang.replace('_', '-')); | |||
| } | |||
| // Visible for AutoValue only. | |||
| abstract String lang(); | |||
| @Override | |||
| public final String toString() { | |||
| return lang(); | |||
| } | |||
| } | |||
| @ -0,0 +1,94 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.auto.value.extension.memoized.Memoized; | |||
| import com.google.common.base.Ascii; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatGroup; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||
| import java.util.Optional; | |||
| /** | |||
| * An alternate format, used to describe less common ways we believe a phone number can be | |||
| * formatted in a region. These can be derived from an "alias" in the formats table, or as | |||
| * "historical" formats which are not associated with any specific current format. | |||
| * | |||
| * <p>Note that alternate formats can be defined with the same template, and they are merged | |||
| * together to produce a canonical map in which the format template is the key. | |||
| */ | |||
| @AutoValue | |||
| public abstract class AltFormatSpec { | |||
| private static final CharMatcher OPT_DIGIT = CharMatcher.is('*'); | |||
| private static final CharMatcher ANY_DIGIT = CharMatcher.is('X'); | |||
| private static final CharMatcher ALLOWED_TEMPLATE_CHARS = CharMatcher.anyOf("X* "); | |||
| public static AltFormatSpec create( | |||
| FormatTemplate template, RangeSpecification prefix, String parent, Optional<String> comment) { | |||
| // As only a limited set of chars is allowed, we know things like national prefix or carrier | |||
| // codes cannot be present. We're just interested in basic grouping like "XXX XXX**". | |||
| String spec = template.getSpecifier(); | |||
| checkArgument(ALLOWED_TEMPLATE_CHARS.matchesAllOf(spec) && !template.getXmlPrefix().isPresent(), | |||
| "invalid alternate format template: %s", template); | |||
| // Prefix must be shorter than the template and not contain any trailing 'x'. | |||
| checkArgument(prefix.length() <= template.minLength() && prefix.equals(prefix.getPrefix()), | |||
| "invalid prefix '%s' for alternate format template: %s", prefix, template); | |||
| // If variable length, the spec must have room for the prefix before the '*' characters. | |||
| checkArgument( | |||
| OPT_DIGIT.matchesNoneOf(spec) | |||
| || prefix.length() <= ANY_DIGIT.countIn(spec.substring(0, OPT_DIGIT.indexIn(spec))), | |||
| "invalid prefix '%s' for alternate format template: %s", prefix, template); | |||
| return new AutoValue_AltFormatSpec(template, prefix, parent, comment); | |||
| } | |||
| /** Return the alternate format template containing only simple grouping (e.g. "XXX XXX**"). */ | |||
| public abstract FormatTemplate template(); | |||
| /** | |||
| * Returns the prefix for this alternate format which (along with the template length) defines | |||
| * the bounds over which this format can apply based. | |||
| */ | |||
| public abstract RangeSpecification prefix(); | |||
| /** Returns the ID of the format for which this specifier is an alternative. */ | |||
| public abstract String parentFormatId(); | |||
| /** Returns the arbitrary comment, possibly containing newlines, for this format. */ | |||
| public abstract Optional<String> comment(); | |||
| /** Returns the format specifier as used in the CSV representation (e.g. "20 XXX XXX"). */ | |||
| @Memoized | |||
| public String specifier() { | |||
| RangeSpecification prefix = prefix(); | |||
| int digitIdx = 0; | |||
| StringBuilder buf = new StringBuilder(); | |||
| for (FormatGroup g : template().getGroups()) { | |||
| for (int i = 0; i < g.maxLength(); i++, digitIdx++) { | |||
| // Uppercasing is so that 'x' --> 'X' | |||
| buf.append(digitIdx < prefix.length() | |||
| ? Ascii.toUpperCase(RangeSpecification.toString(prefix.getBitmask(digitIdx))) | |||
| : (i < g.minLength() ? "X" : "*")); | |||
| } | |||
| buf.append(" "); | |||
| } | |||
| buf.setLength(buf.length() - 1); | |||
| return buf.toString(); | |||
| } | |||
| } | |||
| @ -0,0 +1,146 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.CharMatcher.whitespace; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.common.annotations.VisibleForTesting; | |||
| import com.google.common.base.Ascii; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector; | |||
| import java.io.BufferedReader; | |||
| import java.io.IOException; | |||
| import java.io.Reader; | |||
| import java.io.Writer; | |||
| import java.nio.file.Files; | |||
| import java.nio.file.Path; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.function.Consumer; | |||
| import java.util.function.Function; | |||
| import java.util.function.Supplier; | |||
| import java.util.stream.Stream; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * The schema of the "AltFormats" table with rows identified by an "alternate format specifier": | |||
| * <ol> | |||
| * <li>{@link #PARENT}: The ID of the "main" format that this is an alternate of. | |||
| * <li>{@link #COMMENT}: Freeform comment text. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||
| * <ol> | |||
| * <li>{@code Format}: The alternate format specifier including prefix and grouping information | |||
| * (e.g. "20 XXXX XXXX"). | |||
| * </ol> | |||
| */ | |||
| public final class AltFormatsSchema { | |||
| private static final String FORMAT = "Format"; | |||
| private static final String PARENT = "Parent Format"; | |||
| private static final String COMMENT = "Comment"; | |||
| public static final ImmutableList<String> HEADER = ImmutableList.of(FORMAT, PARENT, COMMENT); | |||
| private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace(); | |||
| private static final RowMapper ROW_MAPPER = | |||
| rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected alt-format header: %s", h)); | |||
| /** Loads the alternate formats from a given file path. */ | |||
| public static ImmutableList<AltFormatSpec> loadAltFormats(Path path) { | |||
| if (!Files.exists(path)) { | |||
| return ImmutableList.of(); | |||
| } | |||
| try (Reader csv = Files.newBufferedReader(path)) { | |||
| return importAltFormats(csv); | |||
| } catch (IOException e) { | |||
| throw new RuntimeException(e); | |||
| } | |||
| } | |||
| @VisibleForTesting | |||
| static ImmutableList<AltFormatSpec> importAltFormats(Reader csv) throws IOException { | |||
| List<AltFormatSpec> altFormats = new ArrayList<>(); | |||
| Consumer<Stream<String>> rowCallback = getRowCallback(altFormats); | |||
| try (BufferedReader r = new BufferedReader(csv)) { | |||
| CSV_PARSER.parse(r.lines(), | |||
| row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText))); | |||
| } | |||
| return ImmutableList.copyOf(altFormats); | |||
| } | |||
| public static ImmutableList<AltFormatSpec> importAltFormats(Supplier<List<String>> rows) { | |||
| List<AltFormatSpec> altFormats = new ArrayList<>(); | |||
| Consumer<Stream<String>> rowCallback = getRowCallback(altFormats); | |||
| // Expect header row always. | |||
| rowCallback.accept(rows.get().stream()); | |||
| List<String> row; | |||
| while ((row = rows.get()) != null) { | |||
| rowCallback.accept(row.stream()); | |||
| } | |||
| return ImmutableList.copyOf(altFormats); | |||
| } | |||
| private static Consumer<Stream<String>> getRowCallback(List<AltFormatSpec> altFormats) { | |||
| return ROW_MAPPER.mapTo( | |||
| row -> altFormats.add(parseAltFormat(row.get(FORMAT), row.get(PARENT), row.get(COMMENT)))); | |||
| } | |||
| public static AltFormatSpec parseAltFormat( | |||
| String altId, String parent, @Nullable String comment) { | |||
| // "1X [2-8]XXX** XXX" --> "XX XXXX** XXX" | |||
| FormatTemplate template = FormatTemplate.parse(altId.replaceAll("[0-9]|\\[[-0-9]+\\]", "X")); | |||
| // "1X [2-8]XXX** XXX" --> "1X [2-8]" --> "1X[2-8]" --> "1x[2-8]" | |||
| // The prefix here can (and often will be) the empty string. | |||
| // This fails if '*' is ever left in the specification, but that really should not happen. | |||
| RangeSpecification prefix = RangeSpecification.parse( | |||
| Ascii.toLowerCase(whitespace().removeFrom(altId.replaceAll("[X* ]*$", "")))); | |||
| return AltFormatSpec.create(template, prefix, parent, Optional.ofNullable(comment)); | |||
| } | |||
| /** Exports alternate formats to a collector (potentially escaping fields for CSV). */ | |||
| public static void export( | |||
| List<AltFormatSpec> altFormats, Consumer<Stream<String>> collector, boolean toCsv) { | |||
| collector.accept(HEADER.stream()); | |||
| Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity(); | |||
| altFormats.forEach( | |||
| f -> collector.accept( | |||
| Stream.of(f.specifier(), f.parentFormatId(), f.comment().map(escapeFn).orElse("")))); | |||
| } | |||
| /** Helper method to write alternate formats in same CSV format as CsvTable. */ | |||
| public static boolean exportCsv(Writer csv, List<AltFormatSpec> altFormats) { | |||
| if (altFormats.isEmpty()) { | |||
| return false; | |||
| } | |||
| CsvTableCollector collector = new CsvTableCollector(true); | |||
| export(altFormats, collector, true); | |||
| collector.writeCsv(csv); | |||
| return true; | |||
| } | |||
| private AltFormatsSchema() {} | |||
| } | |||
| @ -0,0 +1,132 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||
| import static java.util.Comparator.comparing; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.common.annotations.VisibleForTesting; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector; | |||
| import java.io.BufferedReader; | |||
| import java.io.IOException; | |||
| import java.io.Reader; | |||
| import java.io.Writer; | |||
| import java.nio.file.Files; | |||
| import java.nio.file.Path; | |||
| import java.util.ArrayList; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.function.Consumer; | |||
| import java.util.function.Function; | |||
| import java.util.function.Supplier; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * The data schema for handling XML comments. Note that, unlike other "table" schemas, this does | |||
| * not represent comments in the form of a CsvTable. This is because comment anchors can appear | |||
| * multiple times in the CSV file (so there's no unique key). This is not an issue since the | |||
| * internal data representation handles this, but it just means that code cannot be reused as much. | |||
| */ | |||
| public class CommentsSchema { | |||
| private static final String REGION = "Region"; | |||
| private static final String LABEL = "Label"; | |||
| private static final String COMMENT = "Comment"; | |||
| public static final ImmutableList<String> HEADER = ImmutableList.of(REGION, LABEL, COMMENT); | |||
| private static final Comparator<Comment> ORDERING = comparing(Comment::getAnchor); | |||
| private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace(); | |||
| private static final RowMapper ROW_MAPPER = | |||
| rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected comment header: %s", h)); | |||
| /** Loads the comments from a given file path. */ | |||
| public static ImmutableList<Comment> loadComments(Path path) { | |||
| if (!Files.exists(path)) { | |||
| return ImmutableList.of(); | |||
| } | |||
| try (Reader csv = Files.newBufferedReader(path)) { | |||
| return importComments(csv); | |||
| } catch (IOException e) { | |||
| throw new RuntimeException(e); | |||
| } | |||
| } | |||
| @VisibleForTesting | |||
| static ImmutableList<Comment> importComments(Reader csv) throws IOException { | |||
| List<Comment> comments = new ArrayList<>(); | |||
| Consumer<Stream<String>> rowCallback = getRowCallback(comments); | |||
| try (BufferedReader r = new BufferedReader(csv)) { | |||
| CSV_PARSER.parse(r.lines(), | |||
| row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText))); | |||
| } | |||
| return ImmutableList.sortedCopyOf(ORDERING, comments); | |||
| } | |||
| public static ImmutableList<Comment> importComments(Supplier<List<String>> rows) { | |||
| List<Comment> comments = new ArrayList<>(); | |||
| Consumer<Stream<String>> rowCallback = getRowCallback(comments); | |||
| // Expect header row always. | |||
| rowCallback.accept(rows.get().stream()); | |||
| List<String> row; | |||
| while ((row = rows.get()) != null) { | |||
| rowCallback.accept(row.stream()); | |||
| } | |||
| return ImmutableList.sortedCopyOf(ORDERING, comments); | |||
| } | |||
| private static Consumer<Stream<String>> getRowCallback(List<Comment> comments) { | |||
| return ROW_MAPPER.mapTo(row -> { | |||
| if (row.containsKey(COMMENT)) { | |||
| comments.add( | |||
| Comment.fromText( | |||
| Anchor.of(PhoneRegion.of(row.get(REGION)), row.get(LABEL)), | |||
| row.get(COMMENT))); | |||
| } | |||
| }); | |||
| } | |||
| /** Exports alternate formats to a collector (potentially escaping fields for CSV). */ | |||
| public static void export( | |||
| List<Comment> comments, Consumer<Stream<String>> collector, boolean toCsv) { | |||
| collector.accept(HEADER.stream()); | |||
| Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity(); | |||
| comments.stream() | |||
| .sorted(ORDERING) | |||
| .forEach(c -> collector.accept(Stream.of( | |||
| c.getAnchor().region().toString(), c.getAnchor().label(), escapeFn.apply(c.toText())))); | |||
| } | |||
| /** Helper method to write comments in same CSV format as CsvTable. */ | |||
| public static boolean exportCsv(Writer csv, List<Comment> comments) { | |||
| if (comments.isEmpty()) { | |||
| return false; | |||
| } | |||
| CsvTableCollector collector = new CsvTableCollector(true); | |||
| export(comments, collector, true); | |||
| collector.writeCsv(csv); | |||
| return true; | |||
| } | |||
| } | |||
| @ -0,0 +1,236 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.auto.value.extension.memoized.Memoized; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedMap; | |||
| import com.google.common.collect.ImmutableTable; | |||
| import com.google.common.collect.Iterables; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey; | |||
| import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeKey; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode; | |||
| import com.google.i18n.phonenumbers.metadata.table.DiffKey; | |||
| import com.google.i18n.phonenumbers.metadata.table.DiffKey.Status; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import java.io.IOException; | |||
| import java.util.HashMap; | |||
| import java.util.Map; | |||
| import java.util.Optional; | |||
| /** | |||
| * All CSV based tables and legacy XML for a single calling code. This is the data from which all | |||
| * legacy data can be reconstructed (metadata XML, carrier/geocode/timezone mappings). | |||
| * | |||
| * <p>This is loaded at once, possibly from multiple files, since conversion to legacy formats | |||
| * often requires more than one of these data structures. | |||
| */ | |||
| @AutoValue | |||
| public abstract class CsvData { | |||
| /** CSV data loading API. */ | |||
| public interface CsvDataProvider { | |||
| /** Loads the top-level metadata table which containing data for all supported calling codes.*/ | |||
| CsvTable<DigitSequence> loadMetadata() throws IOException; | |||
| /** Loads the CSV data for a single calling code. */ | |||
| CsvData loadData(DigitSequence cc) throws IOException; | |||
| } | |||
| /** | |||
| * Creates a single CsvData instance, either directly or from a provider. The given metadata | |||
| * table will have the single row relating to the specified calling code removed. | |||
| */ | |||
| public static CsvData create( | |||
| DigitSequence cc, | |||
| CsvTable<DigitSequence> allMetadata, | |||
| CsvTable<RangeKey> ranges, | |||
| CsvTable<ShortcodeKey> shortcodes, | |||
| CsvTable<ExampleNumberKey> examples, | |||
| CsvTable<String> formats, | |||
| ImmutableList<AltFormatSpec> altFormats, | |||
| CsvTable<String> operators, | |||
| ImmutableList<Comment> comments) { | |||
| // Row keys are unique, so we end up with at most 1 row in the filtered table. | |||
| CsvTable<DigitSequence> ccMetadata = | |||
| allMetadata.toBuilder().filterRows(r -> r.equals(cc)).build(); | |||
| checkMetadata(!ccMetadata.getKeys().isEmpty(), "no such calling code %s in metadata", cc); | |||
| checkRegions(ccMetadata, ranges, shortcodes); | |||
| checkNoOverlappingRows(ranges); | |||
| checkNoOverlappingShortcodeRows(shortcodes); | |||
| return new AutoValue_CsvData( | |||
| cc, ccMetadata, ranges, shortcodes, examples, formats, altFormats, operators, comments); | |||
| } | |||
| private static void checkNoOverlappingRows(CsvTable<RangeKey> csv) { | |||
| RangeTree allRanges = RangeTree.empty(); | |||
| for (RangeKey key : csv.getKeys()) { | |||
| RangeTree ranges = key.asRangeTree(); | |||
| checkMetadata(allRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key); | |||
| allRanges = allRanges.union(ranges); | |||
| } | |||
| } | |||
| private static void checkNoOverlappingShortcodeRows(CsvTable<ShortcodeKey> csv) { | |||
| Map<PhoneRegion, RangeTree> allRangesMap = new HashMap<>(); | |||
| for (ShortcodeKey key : csv.getKeys()) { | |||
| RangeTree allRegionRanges = allRangesMap.getOrDefault(key.getRegion(), RangeTree.empty()); | |||
| RangeTree ranges = key.getRangeKey().asRangeTree(); | |||
| checkMetadata(allRegionRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key); | |||
| allRangesMap.put(key.getRegion(), allRegionRanges.union(ranges)); | |||
| } | |||
| } | |||
| private static void checkRegions( | |||
| CsvTable<DigitSequence> metadata, | |||
| CsvTable<RangeKey> ranges, | |||
| CsvTable<ShortcodeKey> shortcodes) { | |||
| DigitSequence cc = Iterables.getOnlyElement(metadata.getKeys()); | |||
| PhoneRegion mainRegion = metadata.getOrDefault(cc, MetadataTableSchema.MAIN_REGION); | |||
| Regions extraRegions = metadata.getOrDefault(cc, MetadataTableSchema.EXTRA_REGIONS); | |||
| ImmutableSet<PhoneRegion> csvRegions = ranges | |||
| .getValues(RangesTableSchema.CSV_REGIONS).stream() | |||
| .flatMap(r -> r.getValues().stream()) | |||
| .collect(toImmutableSet()); | |||
| if (extraRegions.getValues().isEmpty()) { | |||
| checkMetadata(csvRegions.size() == 1 && csvRegions.contains(mainRegion), | |||
| "inconsistent regions:\nmetadata: %s\nranges table: %s", mainRegion, csvRegions); | |||
| } else { | |||
| checkMetadata(!extraRegions.getValues().contains(mainRegion), | |||
| "invalid metadata: main region is duplicated in 'extra regions' column"); | |||
| checkMetadata( | |||
| csvRegions.contains(mainRegion) | |||
| && csvRegions.containsAll(extraRegions.getValues()) | |||
| && csvRegions.size() == extraRegions.getValues().size() + 1, | |||
| "inconsistent regions:\nmetadata: %s + %s\nranges table: %s", | |||
| mainRegion, extraRegions, csvRegions); | |||
| } | |||
| ImmutableSet<PhoneRegion> shortcodeRegions = | |||
| shortcodes.getKeys().stream().map(ShortcodeKey::getRegion).collect(toImmutableSet()); | |||
| checkMetadata(csvRegions.containsAll(shortcodeRegions), | |||
| "unexpected regions for shortcodes:\nmetadata: %s\nshortcode regions: %s", | |||
| csvRegions, shortcodeRegions); | |||
| } | |||
| /** The difference between two CSV snapshots captured as a set of CVS tables. */ | |||
| @AutoValue | |||
| public abstract static class Diff { | |||
| private static <K> Optional<CsvTable<DiffKey<K>>> diff(CsvTable<K> lhs, CsvTable<K> rhs) { | |||
| CsvTable<DiffKey<K>> diff = CsvTable.diff(lhs, rhs, DiffMode.CHANGES); | |||
| if (diff.getKeys().stream().anyMatch(k -> k.getStatus() != Status.UNCHANGED)) { | |||
| return Optional.of(diff); | |||
| } | |||
| return Optional.empty(); | |||
| } | |||
| // Visible for AutoValue | |||
| Diff() {} | |||
| /** Returns the contextualized diff of the ranges table. */ | |||
| public abstract Optional<CsvTable<DiffKey<RangeKey>>> rangesDiff(); | |||
| /** Returns the contextualized diff of the shortcodes table. */ | |||
| public abstract Optional<CsvTable<DiffKey<ShortcodeKey>>> shortcodesDiff(); | |||
| /** Returns the contextualized diff of the examples table. */ | |||
| public abstract Optional<CsvTable<DiffKey<ExampleNumberKey>>> examplesDiff(); | |||
| /** Returns the contextualized diff of the formats table. */ | |||
| public abstract Optional<CsvTable<DiffKey<String>>> formatsDiff(); | |||
| /** Returns the contextualized diff of the operators table. */ | |||
| public abstract Optional<CsvTable<DiffKey<String>>> operatorsDiff(); | |||
| } | |||
| /** Creates the diff between two CSV data snapshots. */ | |||
| public static Diff diff(CsvData before, CsvData after) { | |||
| // TODO: Add diffing for comments and/or alternate formats. | |||
| return new AutoValue_CsvData_Diff( | |||
| Diff.diff(before.getRanges(), after.getRanges()), | |||
| Diff.diff(before.getShortcodes(), after.getShortcodes()), | |||
| Diff.diff(before.getExamples(), after.getExamples()), | |||
| Diff.diff(before.getFormats(), after.getFormats()), | |||
| Diff.diff(before.getOperators(), after.getOperators())); | |||
| } | |||
| // Visible for AutoValue | |||
| CsvData() {} | |||
| /** Returns the calling code for this CSV data. */ | |||
| public abstract DigitSequence getCallingCode(); | |||
| /** | |||
| * Returns the single row of the metadata table for the calling code (see | |||
| * {@code MetadataTableSchema}). | |||
| */ | |||
| public abstract CsvTable<DigitSequence> getMetadata(); | |||
| /** Returns the ranges table for the calling code (see {@code RangesTableSchema}) */ | |||
| public abstract CsvTable<RangeKey> getRanges(); | |||
| /** Returns the shortcode table for the calling code (see {@code ShortcodesTableSchema}) */ | |||
| public abstract CsvTable<ShortcodeKey> getShortcodes(); | |||
| /** Returns the examples table for the calling code (see {@code ExamplesTableSchema}). */ | |||
| public abstract CsvTable<ExampleNumberKey> getExamples(); | |||
| /** Returns the format table for the calling code (see {@code FormatsTableSchema}). */ | |||
| public abstract CsvTable<String> getFormats(); | |||
| /** | |||
| * Returns the alternate format table for the calling code (see {@code AltFormatsTableSchema}). | |||
| */ | |||
| public abstract ImmutableList<AltFormatSpec> getAltFormats(); | |||
| /** Returns the operator table for the calling code (see {@code OperatorsTableSchema}). */ | |||
| public abstract CsvTable<String> getOperators(); | |||
| /** Returns the set of comments for the calling code. */ | |||
| public abstract ImmutableList<Comment> getComments(); | |||
| @Memoized | |||
| public RangeTable getRangesAsTable() { | |||
| return RangesTableSchema.toRangeTable(getRanges()); | |||
| } | |||
| @Memoized | |||
| public ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodesAsTables() { | |||
| return ShortcodesTableSchema.toShortcodeTables(getShortcodes()); | |||
| } | |||
| @Memoized | |||
| public ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExamplesAsTable() { | |||
| return ExamplesTableSchema.toExampleTable(getExamples()); | |||
| } | |||
| /** Canonicalizes range tables in the CSV data. This is potentially slow for large regions. */ | |||
| // TODO: Is there any way to reliably detect canonical CSV for sub-regions? | |||
| public final CsvData canonicalizeRangeTables() { | |||
| CsvTable<RangeKey> ranges = RangesTableSchema.toCsv(getRangesAsTable()); | |||
| CsvTable<ShortcodeKey> shortcodes = ShortcodesTableSchema.toCsv(getShortcodesAsTables()); | |||
| return create( | |||
| getCallingCode(), | |||
| getMetadata(), | |||
| ranges, | |||
| shortcodes, | |||
| getExamples(), | |||
| getFormats(), | |||
| getAltFormats(), | |||
| getOperators(), | |||
| getComments() | |||
| ); | |||
| } | |||
| } | |||
| @ -0,0 +1,126 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey.ORDERING; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableTable; | |||
| import com.google.common.collect.Table; | |||
| import com.google.common.collect.Table.Cell; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * The schema of the "Example Numbers" table with rows keyed by {@link ExampleNumberKey} and | |||
| * columns: | |||
| * <ol> | |||
| * <li>{@link #NUMBER}: The national number | |||
| * <li>{@link #COMMENT}: Evidence for why an example number was chosen. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||
| * <ol> | |||
| * <li>{@code Region}: The region code of the example number. | |||
| * <li>{@code Type}: The {@link ValidNumberType} of the example number. | |||
| * </ol> | |||
| */ | |||
| public final class ExamplesTableSchema { | |||
| /** A key for rows in the example numbers table. */ | |||
| @AutoValue | |||
| public abstract static class ExampleNumberKey { | |||
| public static final Comparator<ExampleNumberKey> ORDERING = | |||
| Comparator.comparing(ExampleNumberKey::getRegion).thenComparing(ExampleNumberKey::getType); | |||
| public static ExampleNumberKey of(PhoneRegion region, ValidNumberType type) { | |||
| return new AutoValue_ExamplesTableSchema_ExampleNumberKey(region, type); | |||
| } | |||
| public abstract PhoneRegion getRegion(); | |||
| public abstract ValidNumberType getType(); | |||
| } | |||
| /** A number column containing the digit sequence of a national number. */ | |||
| public static final Column<DigitSequence> NUMBER = Column.create( | |||
| DigitSequence.class, "Number", DigitSequence.empty(), DigitSequence::of); | |||
| /** A general comment field, usually describing how an example number was determined. */ | |||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||
| private static final CsvKeyMarshaller<ExampleNumberKey> MARSHALLER = new CsvKeyMarshaller<>( | |||
| ExamplesTableSchema::write, | |||
| ExamplesTableSchema::read, | |||
| Optional.of(ORDERING), | |||
| "Region", | |||
| "Type"); | |||
| private static final Schema COLUMNS = Schema.builder() | |||
| .add(NUMBER) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** Schema instance defining the example numbers CSV table. */ | |||
| public static final CsvSchema<ExampleNumberKey> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||
| /** | |||
| * Converts a {@link Table} of example numbers into a {@link CsvTable}, using | |||
| * {@link ExampleNumberKey}s as row keys. | |||
| */ | |||
| public static CsvTable<ExampleNumberKey> toCsv( | |||
| Table<PhoneRegion, ValidNumberType, DigitSequence> table) { | |||
| ImmutableTable.Builder<ExampleNumberKey, Column<?>, Object> out = ImmutableTable.builder(); | |||
| out.orderRowsBy(ORDERING).orderColumnsBy(COLUMNS.ordering()); | |||
| for (Cell<PhoneRegion, ValidNumberType, DigitSequence> c : table.cellSet()) { | |||
| out.put(ExampleNumberKey.of(c.getRowKey(), c.getColumnKey()), NUMBER, c.getValue()); | |||
| } | |||
| return CsvTable.from(SCHEMA, out.build()); | |||
| } | |||
| /** | |||
| * Converts a {@link Table} of example numbers into a {@link CsvTable}, using | |||
| * {@link ExampleNumberKey}s as row keys. | |||
| */ | |||
| public static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> | |||
| toExampleTable(CsvTable<ExampleNumberKey> csv) { | |||
| ImmutableTable.Builder<PhoneRegion, ValidNumberType, DigitSequence> out = | |||
| ImmutableTable.builder(); | |||
| for (ExampleNumberKey k : csv.getKeys()) { | |||
| out.put(k.getRegion(), k.getType(), csv.getOrDefault(k, NUMBER)); | |||
| } | |||
| return out.build(); | |||
| } | |||
| private static Stream<String> write(ExampleNumberKey key) { | |||
| return Stream.of(key.getRegion().toString(), key.getType().toString()); | |||
| } | |||
| private static ExampleNumberKey read(List<String> parts) { | |||
| return ExampleNumberKey.of( | |||
| PhoneRegion.of(parts.get(0)), ValidNumberType.valueOf(parts.get(1))); | |||
| } | |||
| private ExamplesTableSchema() {} | |||
| } | |||
| @ -0,0 +1,68 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.model.CsvData.CsvDataProvider; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import java.io.IOException; | |||
| import java.nio.file.Path; | |||
| /** | |||
| * A CSV provider which reads files rooted in a given directory. The file layout should match that | |||
| * in the CSV metadata directory ({@code googledata/third_party/i18n/phonenumbers/metadata}). | |||
| */ | |||
| public final class FileBasedCsvLoader implements CsvDataProvider { | |||
| /** Returns a CSV loader which reads files from the given base directory. */ | |||
| public static FileBasedCsvLoader using(Path dir) throws IOException { | |||
| return new FileBasedCsvLoader(dir); | |||
| } | |||
| private final Path root; | |||
| private final CsvTable<DigitSequence> metadata; | |||
| private FileBasedCsvLoader(Path root) throws IOException { | |||
| this.root = checkNotNull(root); | |||
| this.metadata = MetadataTableSchema.SCHEMA.load(root.resolve("metadata.csv")); | |||
| } | |||
| @Override | |||
| public CsvTable<DigitSequence> loadMetadata() { | |||
| return metadata; | |||
| } | |||
| @Override | |||
| public CsvData loadData(DigitSequence cc) throws IOException { | |||
| Path ccDir = root.resolve(cc.toString()); | |||
| return CsvData.create( | |||
| cc, | |||
| metadata, | |||
| RangesTableSchema.SCHEMA.load(csvFile(ccDir, "ranges")), | |||
| ShortcodesTableSchema.SCHEMA.load(csvFile(ccDir, "shortcodes")), | |||
| ExamplesTableSchema.SCHEMA.load(csvFile(ccDir, "examples")), | |||
| FormatsTableSchema.SCHEMA.load(csvFile(ccDir, "formats")), | |||
| AltFormatsSchema.loadAltFormats(csvFile(ccDir, "altformats")), | |||
| OperatorsTableSchema.SCHEMA.load(csvFile(ccDir, "operators")), | |||
| CommentsSchema.loadComments(csvFile(ccDir, "comments")) | |||
| ); | |||
| } | |||
| private static Path csvFile(Path dir, String name) { | |||
| return dir.resolve(name + ".csv"); | |||
| } | |||
| } | |||
| @ -0,0 +1,637 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.base.Strings; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.function.ToIntFunction; | |||
| import java.util.stream.Collectors; | |||
| import java.util.stream.IntStream; | |||
| /** | |||
| * A specifier for the three types of format available in a formatting rule, "national", | |||
| * "international" and "carrier specific". Each format is represented by a single string which acts | |||
| * as a format template, and from which the necessary XML regular expressions can be recovered. | |||
| * | |||
| * <p>The basic syntax of a specifier is something like {@code "XX XXX-XXXX"}, where '{@code X}' | |||
| * represents a digit from the phone number being formatted. When converted into the legacy XML | |||
| * syntax, a national specifier with this format would represent the "pattern" attribute | |||
| * {@code "(\d{2})(\d{3})(\d{4})"} and the "format" element {@code "$1 $2-$3"}. | |||
| * | |||
| * <p>By adding the '{@code *}' character, one group of variable length may be defined. Thus | |||
| * {@code "XX XXX-XX**"} represents the pattern {@code "(\d{2})(\d{3})(\d{2,4})"}. | |||
| * | |||
| * <p>If the national prefix should be present, for either national or carrier specific formatting, | |||
| * it is represented by the '{@code #}' symbol. Similarly, for carrier specific formatting, the | |||
| * '{@code @}' symbol represents the carrier code placeholder (and must be present exactly once in | |||
| * any carrier specific format specifier). | |||
| * | |||
| * <p>By analyzing the unique prefixes of both national and carrier specific specifiers, the XML | |||
| * syntax can be derived. In a fairly simple example, the format specifiers: | |||
| * <ul> | |||
| * <li>national: {@code "(#XX) XXX-XXXX"} | |||
| * <li>carrier: {@code "#@ XX XXX-XXXX"} | |||
| * <li>international: {@code "XX XXX XXXX"} | |||
| * </ul> | |||
| * would result in: | |||
| * <ul> | |||
| * <li>pattern: {@code "(\d{2})(\d{3})(\d{4})"} | |||
| * <li>national_prefix_formatting_rule: {@code "($NP$FG)"} | |||
| * <li>carrier_specific_formatting_rule: {@code "$NP$CC $FG"} | |||
| * <li>format: {@code "$1 $2-$3"} | |||
| * <li>international_format: {@code "$1 $2 $3"} | |||
| * </ul> | |||
| * The derived "pattern" groups must be the same between all specifiers, while the "national" and | |||
| * "carrier" specifiers must share a common suffix after the "first group". This is a limitation of | |||
| * the XML representation which must be preserved here. | |||
| * | |||
| * <p>If no carrier specific format specifier is present, the extraction of a format rule will | |||
| * still occur (since the formatting rule also affects "as you type" formatting"). Thus: | |||
| * <ul> | |||
| * <li>national: {@code "(XX) XXX"} | |||
| * </ul> | |||
| * will result in: | |||
| * <ul> | |||
| * <li>format: {@code "$1 $2"} | |||
| * <li>national_prefix_formatting_rule: {@code "($FG)"} | |||
| * </ul> | |||
| * and not: | |||
| * <ul> | |||
| * <li>format: {@code "($1) $2"} | |||
| * </ul> | |||
| * | |||
| * <p>An international format specifier must exist if international formatting is possible (even if | |||
| * it is identical to the national format specifier). If no international specifier exists, then | |||
| * the range of phone numbers associated with this format must be a subset of the "no international | |||
| * dialling" range, and the derived XML element "intlFormat" will contain the value "NA". | |||
| * | |||
| * <p>If literal characters such as "*" are required to be present in the format string, they can | |||
| * be escaped via a '{@code \}' (backslash) character. The set of characters that might need | |||
| * escaping is '{@code X}', '{@code *}', '{@code #}' and '{@code @}'. Note that the dollar symbol | |||
| * '{@code $}' is special, and is prohibited from ever appearing in a format specifier (even though | |||
| * it's not strictly part of the syntax). | |||
| * | |||
| * <p>A {@code FormatSpec} also defines the ranges of numbers for which this format applies. This | |||
| * is a {@link RangeTree}, rather than a {@code PrefixTree}, since length matters (different | |||
| * formats are sometimes distinguished purely on the basis of number length). The possible lengths | |||
| * of the range tree must match the possible lengths of all defined specifier strings. | |||
| */ | |||
| @AutoValue | |||
| public abstract class FormatSpec { | |||
| /** | |||
| * Returns a format specifier from the serialized fields. Note that the given non-local | |||
| * specifiers must share certain properties (e.g. same number of format groups, same min/max | |||
| * length, same trailing group format). Some of this is necessary due to limitations in how | |||
| * formats are represented in the legacy XML schema (e.g. between national and carrier specific | |||
| * formats). Exceptions are raised when any of these properties are violated. | |||
| * | |||
| * @param nationalSpec the national format specifier string (can contain \-escaped characters). | |||
| * @param carrierSpec the optional carrier format specifier string. | |||
| * @param intlSpec the optional international format specifier string. | |||
| * @param localSpec additional local format specifier string. | |||
| * @param nationalPrefixOptional allows the national prefix omitted during parsing even if | |||
| * present in the format, or given during parsing when not present in the format. | |||
| * @param comment a free-from comment for this specifier. | |||
| */ | |||
| public static FormatSpec of( | |||
| String nationalSpec, | |||
| Optional<String> carrierSpec, | |||
| Optional<String> intlSpec, | |||
| Optional<String> localSpec, | |||
| boolean nationalPrefixOptional, | |||
| Optional<Comment> comment) { | |||
| FormatTemplate national = FormatTemplate.parse(nationalSpec); | |||
| checkArgument(!national.hasCarrierCode(), | |||
| "national format specifier must not contain carrier code: %s", nationalSpec); | |||
| Optional<FormatTemplate> carrier = carrierSpec.map(s -> parseCarrierSpec(s, national)); | |||
| Optional<FormatTemplate> intl = intlSpec.map(s -> parseIntlSpec(s, national)); | |||
| Optional<FormatTemplate> local = localSpec.map(s -> parseLocalSpec(s, national)); | |||
| int minLength = national.minLength(); | |||
| int maxLength = national.maxLength(); | |||
| return new AutoValue_FormatSpec( | |||
| national, carrier, intl, local, minLength, maxLength, nationalPrefixOptional, comment); | |||
| } | |||
| /** | |||
| * Returns a local format specifier for the given template. Local specifiers only have a national | |||
| * template and national prefix prohibited. | |||
| */ | |||
| public static FormatSpec localFormat(FormatTemplate local) { | |||
| checkArgument(!local.hasNationalPrefix(), | |||
| "a local template must not have national prefix: %s", local); | |||
| return new AutoValue_FormatSpec( | |||
| local, | |||
| Optional.empty(), | |||
| Optional.empty(), | |||
| Optional.empty(), | |||
| local.minLength(), | |||
| local.maxLength(), | |||
| false, | |||
| Optional.empty()); | |||
| } | |||
| /** Returns the national format template (e.g. "#XX XXX XXXX"). */ | |||
| public abstract FormatTemplate national(); | |||
| /** Returns the carrier specific format template (e.g. "(@ #XX) XXX XXXX"). */ | |||
| public abstract Optional<FormatTemplate> carrier(); | |||
| /** Returns the international format template (e.g. "XX-XXX-XXXX"). */ | |||
| public abstract Optional<FormatTemplate> international(); | |||
| /** | |||
| * Returns the local format template (e.g. "XXX-XXXX"). Local formats must correspond to the | |||
| * "Area Code Length" values in at least some of the ranges to which they are assigned. | |||
| */ | |||
| public abstract Optional<FormatTemplate> local(); | |||
| /** Returns the minimum number of digits which this format matches. */ | |||
| public abstract int minLength(); | |||
| /** Returns the maximum number of digits which this format matches. */ | |||
| public abstract int maxLength(); | |||
| /** | |||
| * Returns whether, for formats without a national prefix specified, it is still possible to | |||
| * trigger this format by adding a national prefix (even though its is not shown). Formats for | |||
| * which this method returns {@code true} are grouped alongside formats with an explicit national | |||
| * prefix (since they must be ordered carefully with respect to each other to account for their | |||
| * "leading digits"). | |||
| */ | |||
| public abstract boolean nationalPrefixOptional(); | |||
| /** Returns the free-form comment associated with this format specifier. */ | |||
| public abstract Optional<Comment> comment(); | |||
| /** | |||
| * Returns the length based bounds for this format (e.g. all digit sequences between the minimum | |||
| * and maximum lengths). | |||
| */ | |||
| public RangeTree getLengthBasedBounds() { | |||
| return RangeTree.from(IntStream.rangeClosed(minLength(), maxLength()) | |||
| .mapToObj(RangeSpecification::any)); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| StringBuilder out = new StringBuilder("FormatSpec{national=").append(national()); | |||
| carrier().ifPresent(t -> out.append(", carrier=").append(t)); | |||
| local().ifPresent(t -> out.append(", local=").append(t)); | |||
| international().ifPresent(t -> out.append(", international=").append(t)); | |||
| out.append(", minLength=").append(minLength()); | |||
| out.append(", maxLength=").append(maxLength()); | |||
| comment().ifPresent(c -> out.append(", comment='").append(c).append('\'')); | |||
| return out.append('}').toString(); | |||
| } | |||
| // ---- RULE PARSING AND CONVERSION METHODS ---- | |||
| private static FormatTemplate parseCarrierSpec(String spec, FormatTemplate national) { | |||
| FormatTemplate carrier = FormatTemplate.parse(spec); | |||
| checkArgument(carrier.hasCarrierCode(), | |||
| "carrier format specifier must contain carrier code: %s", spec); | |||
| // This verifies the groups have the same lengths, but does not check for same formatting. | |||
| checkArgument(carrier.isCompatibleWith(national), | |||
| "carrier format specifier must have compatible groups: %s - %s", | |||
| national.getSpecifier(), spec); | |||
| // This is really ugly, since carrier formats must share the same format in the legacy XML, but | |||
| // can have different formatting rules for the first group. The best way to test this is just | |||
| // compare the XML output directly instead of trying to reason about groups, since group replace | |||
| // also needs to be taken into account. | |||
| checkArgument(carrier.getXmlFormat().equals(national.getXmlFormat()), | |||
| "carrier format specifier must have equal trailing groups: %s - %s", | |||
| national.getSpecifier(), spec); | |||
| // Artificial check (currently true everywhere and likely to never be broken). If this is ever | |||
| // relaxed, the nationalPrefixForParsing regeneration code will need changing to take account | |||
| // of ordering (e.g. generate "(<CC>)?<NP>" instead of "<NP>(<CC>)?"). | |||
| checkArgument(!carrier.hasNationalPrefix() || spec.indexOf('#') < spec.indexOf('@'), | |||
| "national prefix must precede carrier code in carrier format: %s", spec); | |||
| return carrier; | |||
| } | |||
| private static FormatTemplate parseIntlSpec(String spec, FormatTemplate national) { | |||
| FormatTemplate intl = FormatTemplate.parse(spec); | |||
| // In theory this could be relaxed, but then when the spec is written it cannot just call | |||
| // getFormat(). For now, it's always true the international formats don't have "fancy" | |||
| // formatting around the first group (i.e. never "(XXX) XXX XXX") which makes sense since | |||
| // international formats cannot be assumed to be read by people with local knowledge. | |||
| // TODO: To reactivate this check after we are sure that first digit of | |||
| // SN of MX is no more 1 and need not to be swallowed when formatting i.e after parsing change. | |||
| // Context: We have disabled the following check to fix a MX formatting issue i.e using this | |||
| // logic {X>} to remove the mobile token(1) in international format, which is the first digit of | |||
| // the mobile subscriber number. More details in b/111967450. In general, international | |||
| // format should not have such special formatting. Can be fixed as part of b/138727490. | |||
| // checkArgument(!intl.getXmlPrefix().isPresent(), | |||
| // "international format specifier must not have separate prefix: %s", spec); | |||
| checkArgument( | |||
| !intl.hasNationalPrefix(), | |||
| "international format specifier must not contain national prefix: %s", | |||
| spec); | |||
| checkArgument(!intl.hasCarrierCode(), | |||
| "international format specifier must not contain carrier code: %s", spec); | |||
| checkArgument(intl.isCompatibleWith(national), | |||
| "international format specifier must have compatible groups: %s - %s", | |||
| national.getSpecifier(), spec); | |||
| return intl; | |||
| } | |||
| private static FormatTemplate parseLocalSpec(String spec, FormatTemplate national) { | |||
| FormatTemplate local = FormatTemplate.parse(spec); | |||
| checkArgument(!local.getXmlPrefix().isPresent(), | |||
| "local format specifier must not have separate prefix: %s", spec); | |||
| checkArgument(!local.hasNationalPrefix(), | |||
| "local format specifier must not contain national prefix: %s", spec); | |||
| checkArgument(!local.hasCarrierCode(), | |||
| "local format specifier must not contain carrier code: %s", spec); | |||
| checkArgument(local.minLength() < national.minLength(), | |||
| "local format specifier must be shorter than the national format: %s - %s", | |||
| national.getSpecifier(), spec); | |||
| return local; | |||
| } | |||
| // ---- TEMPLATE CLASSES ---- | |||
| /** | |||
| * A single template corresponding to a format specifier such as {@code "(# XXX) XXX-XXXX"}. | |||
| * A template represents one of the types of format (national, international, carrier specific) | |||
| * and enforces as much structural correctness as possible. | |||
| * | |||
| * <p>Templates bridge between the specifier syntax and the XML syntax, with its split prefixes | |||
| * and confusing semantics. As such, there's a lot of slightly subtle business logic in the | |||
| * parsing of templates that, over time, might need to adapt to real world changes (e.g. suffix | |||
| * separators and precise expectations of format structure). | |||
| */ | |||
| @AutoValue | |||
| public abstract static class FormatTemplate { | |||
| // This could be extended, but must never overlap with the escape characters used in the | |||
| // "skeleton" string. It must also always be limited to the Basic Multilingual Plane (BMP). | |||
| // It's really important that '$' is never a meta-character in this syntax, since we escape | |||
| // strings like "$FG" which would otherwise be broken. | |||
| private static final CharMatcher VALID_TEMPLATE_CHARS = | |||
| CharMatcher.ascii().and(CharMatcher.javaIsoControl().negate()).and(CharMatcher.isNot('$')); | |||
| private static final CharMatcher VALID_METACHARS = CharMatcher.anyOf("#@X*{>}\\"); | |||
| // Need to include '$' as a separator, since groups can abut. | |||
| private static final CharMatcher SUFFIX_SEPARATOR = CharMatcher.anyOf(". /-$"); | |||
| private static final char NATIONAL_PREFIX = '#'; | |||
| private static final char CARRIER_CODE = '@'; | |||
| private static final char REQUIRED_DIGIT = 'X'; | |||
| private static final char OPTIONAL_DIGIT = '*'; | |||
| private static final char SUBSTITUTION_START = '{'; | |||
| private static final char SUBSTITUTION_MAP = '>'; | |||
| private static final char SUBSTITUTION_END = '}'; | |||
| private static final String ESCAPED_NATIONAL_PREFIX = "$NP"; | |||
| private static final String ESCAPED_CARRIER_CODE = "$CC"; | |||
| static FormatTemplate parse(String spec) { | |||
| checkArgument(VALID_TEMPLATE_CHARS.matchesAllOf(spec), | |||
| "illegal characters in template: %s", spec); | |||
| List<FormatGroup> groups = new ArrayList<>(); | |||
| StringBuilder skeleton = new StringBuilder(); | |||
| boolean hasNationalPrefix = false; | |||
| boolean hasCarrierCode = false; | |||
| boolean hasVariableLengthGroup = false; | |||
| // Used to avoid abutting groups (i.e. "XXX**XX"). | |||
| boolean canStartGroup = true; | |||
| for (int n = 0; n < spec.length(); n++) { | |||
| char c = spec.charAt(n); | |||
| if (c == REQUIRED_DIGIT) { | |||
| checkArgument(canStartGroup, "illegal group start: %s", spec); | |||
| FormatGroup group = extractGroup(spec, n); | |||
| checkArgument(!(hasVariableLengthGroup && group.isVariableLength()), | |||
| "multiple variable length groups not allowed: %s", spec); | |||
| hasVariableLengthGroup = group.isVariableLength(); | |||
| groups.add(group); | |||
| skeleton.append(escapeGroupNumber(groups.size())); | |||
| // Move to the last character of the group (since we increment again as we loop). | |||
| n += group.maxLength() - 1; | |||
| canStartGroup = false; | |||
| continue; | |||
| } | |||
| if (c == SUBSTITUTION_START) { | |||
| // Expect {GROUP>REPLACEMENT} where group can have optional digits (but normally won't). | |||
| checkArgument(canStartGroup, "illegal group start: %s", spec); | |||
| checkArgument(spec.charAt(n + 1) == REQUIRED_DIGIT, | |||
| "illegal group replacement start: %s", spec); | |||
| FormatGroup group = extractGroup(spec, n + 1); | |||
| checkArgument(!(hasVariableLengthGroup && group.isVariableLength()), | |||
| "multiple variable length groups not allowed: %s", spec); | |||
| hasVariableLengthGroup = group.isVariableLength(); | |||
| // Now expect mapping character and substitution string. | |||
| n += group.maxLength() + 1; | |||
| checkArgument(spec.charAt(n) == SUBSTITUTION_MAP, | |||
| "illegal group replacement (expected %s): '%s'", SUBSTITUTION_MAP, spec); | |||
| int end = spec.indexOf(SUBSTITUTION_END, n + 1); | |||
| checkArgument(end != -1, "missing group replacement end: %s", spec); | |||
| groups.add(group.withReplacement(spec.substring(n + 1, end))); | |||
| skeleton.append(escapeGroupNumber(groups.size())); | |||
| // Unlike the "normal" case above, you can start another group immediately after this | |||
| // (since the {,} make it unambiguous). | |||
| n = end; | |||
| continue; | |||
| } | |||
| canStartGroup = true; | |||
| if (c == NATIONAL_PREFIX) { | |||
| checkArgument(!hasNationalPrefix, "multiple national prefixes not allowed: %s", spec); | |||
| hasNationalPrefix = true; | |||
| skeleton.append(ESCAPED_NATIONAL_PREFIX); | |||
| continue; | |||
| } | |||
| if (c == CARRIER_CODE) { | |||
| checkArgument(!hasCarrierCode, "multiple carrier codes not allowed: %s", spec); | |||
| hasCarrierCode = true; | |||
| skeleton.append(ESCAPED_CARRIER_CODE); | |||
| continue; | |||
| } | |||
| if (c == '\\') { | |||
| // Blows up if trailing '\', but that's fine. | |||
| c = spec.charAt(++n); | |||
| checkArgument(VALID_METACHARS.matches(c), "invalid escaped character '%s': %s", c, spec); | |||
| } else { | |||
| checkArgument(c != OPTIONAL_DIGIT, "unexpected optional marker: %s", spec); | |||
| } | |||
| skeleton.append(c); | |||
| } | |||
| checkArgument(!groups.isEmpty(), "format specifiers must have at least one group: %s", spec); | |||
| // Find the first group which has a replacement (one must exist). This is important for | |||
| // determining where the prefix and suffix should be split when considering hoisting the | |||
| // prefix into a format rule (see getSuffixStart() / getXmlPrefix() / getXmlFormat()). | |||
| int fgIndex = 0; | |||
| while (fgIndex < groups.size() && groups.get(fgIndex).replacement().isPresent()) { | |||
| fgIndex++; | |||
| } | |||
| checkArgument(fgIndex < groups.size(), "cannot replace all groups in a template: %s", spec); | |||
| return new AutoValue_FormatSpec_FormatTemplate( | |||
| spec, | |||
| hasNationalPrefix, | |||
| hasCarrierCode, | |||
| ImmutableList.copyOf(groups), | |||
| fgIndex, | |||
| skeleton.toString()); | |||
| } | |||
| /** | |||
| * Returns the specifier string (e.g. "# XXX-XXXX") which is the serialized form of the | |||
| * template. | |||
| */ | |||
| public abstract String getSpecifier(); | |||
| /** Whether this template formats a national prefix. */ | |||
| public abstract boolean hasNationalPrefix(); | |||
| /** Whether this template formats a carrier selection code prefix. */ | |||
| public abstract boolean hasCarrierCode(); | |||
| /** Returns the information about the groups in this template. */ | |||
| public abstract ImmutableList<FormatGroup> getGroups(); | |||
| /** | |||
| * Returns the index of the first group which does not have a replacement (at least one must). | |||
| */ | |||
| public abstract int getFirstAvailableGroupIndex(); | |||
| // This is an internal representation of the format string used by the XML. It differs in that | |||
| // it isn't split into prefix and suffix (as required in some situations for the XML). As such | |||
| // it only contains "$NP", "$CC", "$<N>", but never "$FG". All valid specifier skeletons must | |||
| // contain "$1"..."$<N>" rather than any replacement strings. | |||
| abstract String skeleton(); | |||
| /** Returns the minumin number of digits which can be matched by this template. */ | |||
| public int minLength() { | |||
| return getLength(this, FormatGroup::minLength); | |||
| } | |||
| /** Returns the maximum number of digits which can be matched by this template. */ | |||
| public int maxLength() { | |||
| return getLength(this, FormatGroup::maxLength); | |||
| } | |||
| /** | |||
| * Returns the maximum number of digits which can be formatted as a single block by this | |||
| * template. If no more than this number of digits are entered, they will be formatted as a | |||
| * single block by this template. | |||
| * | |||
| * <p>This is useful when calculating the leading digits of a format since it might be | |||
| * acceptable to match shortcodes to some formats if they would still format the shortcode | |||
| * within the first block. This simplifies the leading digits in some cases. | |||
| */ | |||
| public int getBlockFormatLength() { | |||
| // If only one group everything is a block, otherwise take the minimum length of the first | |||
| // group. | |||
| return (getGroups().size() == 1) ? maxLength() : getGroups().get(0).minLength(); | |||
| } | |||
| /** Returns a regex to capture the groups for this template (e.g. "(\d{3})(\d{4,5})") */ | |||
| public String getXmlCapturingPattern() { | |||
| return getGroups().stream() | |||
| .map(FormatGroup::toRegex) | |||
| .collect(Collectors.joining(")(", "(", ")")); | |||
| } | |||
| /** | |||
| * Returns the format string for use in the XML (e.g. "$1 $2-$3"). | |||
| * | |||
| * <p>For example given the following templates: | |||
| * <ul> | |||
| * <li>{@code "XXX XXX-XXX"} ==> {@code "$1 $2-$3"} | |||
| * <li>{@code "(#XXX) XXX-XXX"} ==> {@code "$1 $2-$3"} (the prefix is hoisted) | |||
| * <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$2-$3"} ($1 was replaced and hoisted) | |||
| * <li>{@code "{X>}XXX-XXX"} ==> {@code "$2-$3"} ($1 was removed) | |||
| * </ul> | |||
| */ | |||
| public String getXmlFormat() { | |||
| int fgIndex = getFirstAvailableGroupIndex(); | |||
| // Always replace the prefix with $N (which is what $FG maps to). This might be a no-op. | |||
| String format = "$" + (fgIndex + 1) + skeleton().substring(getSuffixStart()); | |||
| // Finally do any group replacement from the skeleton after the "first available group". | |||
| // | |||
| // Note that this code isn't exercised in data at the moment (2018) but is here to avoid | |||
| // needing to place artificial limitations on where group replacement can occur. | |||
| for (int n = fgIndex + 1; n < getGroups().size(); n++) { | |||
| Optional<String> replacement = getGroups().get(n).replacement(); | |||
| if (replacement.isPresent()) { | |||
| format = format.replace("$" + (n + 1), replacement.get()); | |||
| } | |||
| } | |||
| return format; | |||
| } | |||
| /** | |||
| * Returns the format prefix for use in the XML formatting rules (e.g. "($NP $FG)"). If the | |||
| * calculated prefix is just "$FG" then nothing is returned (since that's a no-op value). | |||
| * | |||
| * <p>For example given the following templates: | |||
| * <ul> | |||
| * <li>{@code "XXX XXX-XXX"} ==> XML prefix is empty | |||
| * <li>{@code "(#XXX) XXX-XXX"} ==> {@code "($NP$FG)"} | |||
| * <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$NP123 $FG"} | |||
| * <li>{@code "{X>}XXX-XXX"} ==> XML prefix is empty (but the format will not contain $1) | |||
| * </ul> | |||
| */ | |||
| public Optional<String> getXmlPrefix() { | |||
| String prefix = skeleton().substring(0, getSuffixStart()); | |||
| // We know that "$<fgIndex + 1>" (substitutions are 1-indexed) is in the prefix and | |||
| // should be replaced with "$FG", and everything before that has a replacement. | |||
| int fgIndex = getFirstAvailableGroupIndex(); | |||
| for (int n = 0; n < fgIndex; n++) { | |||
| // Everything before the "first available group" must have a replacement (by definition). | |||
| prefix = prefix.replace("$" + (n + 1), getGroups().get(n).replacement().get()); | |||
| } | |||
| prefix = prefix.replace("$" + (fgIndex + 1), "$FG"); | |||
| checkState(prefix.contains("$FG"), | |||
| "XML prefix must always contain '$FG' (this must be a code error): %s", prefix); | |||
| // After all this work we could still end up with a no-op substitution! | |||
| return prefix.equals("$FG") ? Optional.empty() : Optional.of(prefix); | |||
| } | |||
| /** | |||
| * Returns whether all groups have the same "structure" (i.e. min/max length). They can | |||
| * differ in terms of having replacements however. | |||
| */ | |||
| boolean isCompatibleWith(FormatTemplate other) { | |||
| if (getGroups().size() != other.getGroups().size()) { | |||
| return false; | |||
| } | |||
| for (int n = 0; n < getGroups().size(); n++) { | |||
| if (!getGroups().get(n).isCompatibleWith(other.getGroups().get(n))) { | |||
| return false; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| private int getSuffixStart() { | |||
| // This is only safe because "\$1" cannot be present ('$' cannot be escaped). | |||
| int suffixStart = SUFFIX_SEPARATOR.indexIn(skeleton(), skeleton().indexOf("$1") + 1); | |||
| // If no suffix start found, the entire skeleton is the prefix. | |||
| if (suffixStart == -1) { | |||
| suffixStart = skeleton().length(); | |||
| } | |||
| // Now account for the fact that the first group (and others) could have replacements, which | |||
| // pushes the suffix start to just after the "first available group" (which is what becomes | |||
| // $FG). If the first available group is "$1" then we just get suffixStart. | |||
| int fgNumber = getFirstAvailableGroupIndex() + 1; | |||
| checkState(fgNumber < 10, "invalid first group number: %s", fgNumber); | |||
| return Math.max(suffixStart, skeleton().indexOf("$" + fgNumber) + 2); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| return getSpecifier(); | |||
| } | |||
| private static int getLength(FormatTemplate template, ToIntFunction<FormatGroup> lengthFn) { | |||
| return template.getGroups().stream().mapToInt(lengthFn).sum(); | |||
| } | |||
| private static FormatGroup extractGroup(String template, int start) { | |||
| // We know that 'start' references a group start (i.e. 'X') so length must be at least 1. | |||
| int endRequired = findEndOf(REQUIRED_DIGIT, template, start); | |||
| int endGroup = findEndOf(OPTIONAL_DIGIT, template, endRequired); | |||
| return FormatGroup.of(endRequired - start, endGroup - start); | |||
| } | |||
| private static int findEndOf(char c, String template, int start) { | |||
| int endRequired = CharMatcher.isNot(c).indexIn(template, start); | |||
| return endRequired != -1 ? endRequired : template.length(); | |||
| } | |||
| private static String escapeGroupNumber(int n) { | |||
| checkArgument(n >= 1 && n <= 9, "bad group number: %s", n); | |||
| return "$" + n; | |||
| } | |||
| } | |||
| /** Represents contiguous digit groups in a format (e.g. "XXX" or "XXX***"). */ | |||
| @AutoValue | |||
| public abstract static class FormatGroup { | |||
| private static FormatGroup of(int min, int max) { | |||
| checkArgument(max >= min, "bad group lengths: %s, %s", min, max); | |||
| return new AutoValue_FormatSpec_FormatGroup(min, max, Optional.empty()); | |||
| } | |||
| private FormatGroup withReplacement(String s) { | |||
| return new AutoValue_FormatSpec_FormatGroup(minLength(), maxLength(), Optional.of(s)); | |||
| } | |||
| /** Returns the minimum number of digits in this group. */ | |||
| public abstract int minLength(); | |||
| /** Returns the maximum number of digits in this group. */ | |||
| public abstract int maxLength(); | |||
| /** Returns the optional, arbitrary (possibly empty) replacement string for this group. */ | |||
| abstract Optional<String> replacement(); | |||
| /** | |||
| * Returns if this group can match a variable number of digits. Only one group in any format | |||
| * specifier can have variable length. | |||
| */ | |||
| private boolean isVariableLength() { | |||
| return maxLength() > minLength(); | |||
| } | |||
| /** | |||
| * Returns whether two groups have the same "structure" (i.e. min/max lengths), but does not | |||
| * compare replacement values. Used only for internal checks. | |||
| */ | |||
| private boolean isCompatibleWith(FormatGroup other) { | |||
| return minLength() == other.minLength() && maxLength() == other.maxLength(); | |||
| } | |||
| private String toRegex() { | |||
| if (maxLength() > minLength()) { | |||
| return String.format("\\d{%d,%d}", minLength(), maxLength()); | |||
| } else if (minLength() > 1) { | |||
| return String.format("\\d{%d}", minLength()); | |||
| } else { | |||
| return "\\d"; | |||
| } | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| String group = | |||
| Strings.repeat("X", minLength()) + Strings.repeat("*", maxLength() - minLength()); | |||
| return replacement().map(r -> String.format("{%s>%s}", group, r)).orElse(group); | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,96 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.Optional; | |||
| /** | |||
| * The schema of the "Formats" table with rows keyed by ID, and columns: | |||
| * <ol> | |||
| * <li>{@link #NATIONAL}: Required national format (may contain '#' for national prefix). | |||
| * <li>{@link #CARRIER}: Optional carrier format (may contain '#' and '@' for carrier | |||
| * specifier). Must be compatible with the national format (same suffix). | |||
| * <li>{@link #INTERNATIONAL}: International format (must not contain '#' or '@'). | |||
| * <li>{@link #LOCAL}: Local format (must not contain '#' or '@', and must correspond to assigned | |||
| * area code lengths if present). | |||
| * <li>{@link #COMMENT}: Freeform comment text. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||
| * <ol> | |||
| * <li>{@code Id}: The format ID. | |||
| * </ol> | |||
| */ | |||
| public final class FormatsTableSchema { | |||
| public static final Column<String> NATIONAL = Column.ofString("National"); | |||
| public static final Column<String> CARRIER = Column.ofString("Carrier"); | |||
| public static final Column<String> INTERNATIONAL = Column.ofString("International"); | |||
| public static final Column<String> LOCAL = Column.ofString("Local"); | |||
| public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL = | |||
| Column.ofBoolean("National Prefix Optional"); | |||
| /** An arbitrary optional text comment. */ | |||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||
| private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id"); | |||
| private static final Schema COLUMNS = | |||
| Schema.builder() | |||
| .add(NATIONAL) | |||
| .add(CARRIER) | |||
| .add(INTERNATIONAL) | |||
| .add(LOCAL) | |||
| .add(NATIONAL_PREFIX_OPTIONAL) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** Schema instance defining the operators CSV table. */ | |||
| public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||
| /** Converts a CSV table into a map of format specifiers. */ | |||
| public static ImmutableMap<String, FormatSpec> toFormatSpecs(CsvTable<String> formats) { | |||
| ImmutableMap.Builder<String, FormatSpec> specs = ImmutableMap.builder(); | |||
| for (String id : formats.getKeys()) { | |||
| specs.put( | |||
| id, | |||
| FormatSpec.of( | |||
| formats.getOrDefault(id, NATIONAL), | |||
| toOptional(formats.getOrDefault(id, CARRIER)), | |||
| toOptional(formats.getOrDefault(id, INTERNATIONAL)), | |||
| toOptional(formats.getOrDefault(id, LOCAL)), | |||
| formats.getOrDefault(id, NATIONAL_PREFIX_OPTIONAL), | |||
| toComment(formats.getOrDefault(id, COMMENT)))); | |||
| } | |||
| return specs.build(); | |||
| } | |||
| private static Optional<String> toOptional(String s) { | |||
| return s.isEmpty() ? Optional.empty() : Optional.of(s); | |||
| } | |||
| private static Optional<Comment> toComment(String s) { | |||
| return s.isEmpty() ? Optional.empty() : Optional.of(Comment.fromText(s)); | |||
| } | |||
| private FormatsTableSchema() {} | |||
| } | |||
| @ -0,0 +1,36 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import com.google.errorprone.annotations.FormatMethod; | |||
| /** | |||
| * Represents an error related to CSV metadata, either structural issues in the CSV or semantic | |||
| * errors in the XML representation. MetadataExceptions should only correspond to problems fixable | |||
| * by editing the CSV data. | |||
| */ | |||
| public final class MetadataException extends RuntimeException { | |||
| @FormatMethod | |||
| public static void checkMetadata(boolean cond, String msg, Object... args) { | |||
| if (!cond) { | |||
| throw new MetadataException(String.format(msg, args)); | |||
| } | |||
| } | |||
| public MetadataException(String message) { | |||
| super(message); | |||
| } | |||
| } | |||
| @ -0,0 +1,168 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static java.util.Comparator.naturalOrder; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.Timezones; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.MultiValue; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.Arrays; | |||
| import java.util.Comparator; | |||
| import java.util.Optional; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * The schema of the "Metadata" table with rows keyed by {@link DigitSequence} and columns: | |||
| * | |||
| * <ol> | |||
| * <li>{@link #MAIN_REGION}: The primary region associated with a calling code. | |||
| * <li>{@link #EXTRA_REGIONS}: A list of additional regions shared by the calling code. | |||
| * <li>{@link #NATIONAL_PREFIX}: The (optional) prefix used when dialling national numbers. | |||
| * <li>{@link #IDD_PREFIX}: The default international dialling (IDD) prefix. | |||
| * <li>{@link #TIMEZONE}: The default timezone name(s) for a calling code. Multiple timezones | |||
| * can be specific if separated by {@code '&'}. | |||
| * <li>{@link #MOBILE_PORTABLE_REGIONS}: A list of regions in which mobile numbers are portable | |||
| * between operators. | |||
| * <li>{@link #NATIONAL_PREFIX_OPTIONAL}: True if the national prefix is optional throughout the | |||
| * numbering plan (e.g. a prefix is defined, but does not have to be present when numbers are | |||
| * used). | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||
| * <ol> | |||
| * <li>{@code Calling Code}: The country calling code. | |||
| * </ol> | |||
| */ | |||
| public final class MetadataTableSchema { | |||
| /** Values in the "REGIONS" column are a sorted list of region codes. */ | |||
| public static final class Regions extends MultiValue<PhoneRegion, Regions> { | |||
| private static final Regions EMPTY = new Regions(ImmutableSet.of()); | |||
| public static Column<Regions> column(String name) { | |||
| return Column.create(Regions.class, name, EMPTY, Regions::new); | |||
| } | |||
| public static Regions of(PhoneRegion... regions) { | |||
| return new Regions(Arrays.asList(regions)); | |||
| } | |||
| public static Regions of(Iterable<PhoneRegion> regions) { | |||
| return new Regions(regions); | |||
| } | |||
| private Regions(Iterable<PhoneRegion> regions) { | |||
| super(regions, ',', naturalOrder(), true); | |||
| } | |||
| private Regions(String s) { | |||
| super(s, PhoneRegion::of, ',', naturalOrder(), true); | |||
| } | |||
| } | |||
| /** | |||
| * Values in the "NATIONAL_PREFIX" column are an (unsorted) list of prefixes, with the preferred | |||
| * prefix first. | |||
| */ | |||
| public static final class DigitSequences extends MultiValue<DigitSequence, DigitSequences> { | |||
| private static final DigitSequences EMPTY = new DigitSequences(ImmutableSet.of()); | |||
| public static Column<DigitSequences> column(String name) { | |||
| return Column.create(DigitSequences.class, name, EMPTY, DigitSequences::new); | |||
| } | |||
| public static DigitSequences of(DigitSequence... numbers) { | |||
| return new DigitSequences(Arrays.asList(numbers)); | |||
| } | |||
| private DigitSequences(Iterable<DigitSequence> numbers) { | |||
| super(numbers, ',', naturalOrder(), false); | |||
| } | |||
| private DigitSequences(String s) { | |||
| super(s, DigitSequence::of, ',', naturalOrder(), false); | |||
| } | |||
| } | |||
| /** The primary region associated with a calling code (e.g. "US" for NANPA). */ | |||
| public static final Column<PhoneRegion> MAIN_REGION = | |||
| Column.create(PhoneRegion.class, "Main Region", PhoneRegion.getUnknown(), PhoneRegion::of); | |||
| /** A comma separated list of expected regions for the calling code. */ | |||
| public static final Column<Regions> EXTRA_REGIONS = Regions.column("Extra Regions"); | |||
| /** | |||
| * A list of prefixes used when dialling national numbers (e.g. "0" for "US"). If more than one | |||
| * prefix is given, the first prefix is assumed to be "preferred" and the others are considered | |||
| * alternatives. Having multiple prefixes is useful if a country switches between prefixes and | |||
| * a period of "parallel running" is needed. | |||
| */ | |||
| public static final Column<DigitSequences> NATIONAL_PREFIX = | |||
| DigitSequences.column("National Prefix"); | |||
| /** | |||
| * The default international dialling (IDD) prefix. This is a string, rather than a digit | |||
| * sequence, because it can optionally contain a single '~' character to indicate a pause while | |||
| * dialling (e.g. "8~10" in Russia). This is stripped everywhere except when used to populate | |||
| * the "preferredInternationalPrefix" attribute in the libphonenumber XML file. | |||
| */ | |||
| public static final Column<String> IDD_PREFIX = Column.ofString("IDD Prefix"); | |||
| /** | |||
| * The default value for the "Timezone" column in the ranges table (in many regions, this is a | |||
| * single constant value). | |||
| */ | |||
| public static final Column<Timezones> TIMEZONE = RangesTableSchema.TIMEZONE; | |||
| /** A comma separated list of regions in which mobile numbers are portable between carriers. */ | |||
| public static final Column<Regions> MOBILE_PORTABLE_REGIONS = | |||
| Regions.column("Mobile Portable Regions"); | |||
| /** Describes whether the "national prefix" is optional when parsing a national number. */ | |||
| public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL = | |||
| Column.ofBoolean("National Prefix Optional"); | |||
| /** The preferred prefix for specifying extensions to numbers (e.g. "ext" for "1234 ext 56"). */ | |||
| public static final Column<String> EXTENSION_PREFIX = Column.ofString("Extension Prefix"); | |||
| private static final CsvKeyMarshaller<DigitSequence> MARSHALLER = new CsvKeyMarshaller<>( | |||
| k -> Stream.of(k.toString()), | |||
| p -> DigitSequence.of(p.get(0)), | |||
| Optional.of(Comparator.comparing(Object::toString)), | |||
| "Calling Code"); | |||
| private static final Schema COLUMNS = Schema.builder() | |||
| .add(MAIN_REGION) | |||
| .add(EXTRA_REGIONS) | |||
| .add(NATIONAL_PREFIX) | |||
| .add(IDD_PREFIX) | |||
| .add(TIMEZONE) | |||
| .add(MOBILE_PORTABLE_REGIONS) | |||
| .add(NATIONAL_PREFIX_OPTIONAL) | |||
| .add(EXTENSION_PREFIX) | |||
| .build(); | |||
| /** Schema instance defining the metadata CSV table. */ | |||
| public static final CsvSchema<DigitSequence> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||
| private MetadataTableSchema() {} | |||
| } | |||
| @ -0,0 +1,750 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.AREA_CODE_LENGTH; | |||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.FORMAT; | |||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.NATIONAL_ONLY; | |||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.PER_REGION_COLUMNS; | |||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.REGIONS; | |||
| import static java.lang.Boolean.TRUE; | |||
| import static java.util.Comparator.comparing; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.Joiner; | |||
| import com.google.common.base.Splitter; | |||
| import com.google.common.collect.ContiguousSet; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedMap; | |||
| import com.google.common.collect.ImmutableSortedSet; | |||
| import com.google.common.collect.ImmutableTable; | |||
| import com.google.common.collect.Ordering; | |||
| import com.google.common.collect.Sets; | |||
| import com.google.common.collect.Table; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.ArrayList; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.Map; | |||
| import java.util.Optional; | |||
| import java.util.Set; | |||
| import java.util.TreeSet; | |||
| /** | |||
| * An abstraction of all the phone number metadata known about for a single calling code. | |||
| * <p> | |||
| * Note that there is no builder for NumberingScheme. The expectation is that CSV tables and other | |||
| * primary sources will be used to build numbering schemes at a single point in the business logic. | |||
| * Handling incremental modification of a builder, or partially built schemes just isn't something | |||
| * that's expected to be needed (though there is {@code TestNumberingScheme} for use in unit tests. | |||
| */ | |||
| @AutoValue | |||
| public abstract class NumberingScheme { | |||
| // Bitmask for [1-9] (bits 1..9 set, bit 0 clear). | |||
| private static final int NOT_ZERO_MASK = 0x3FE; | |||
| /** Top level information about a numbering scheme. */ | |||
| @AutoValue | |||
| public abstract static class Attributes { | |||
| /** Returns a new attributes instance for the given data. */ | |||
| public static Attributes create( | |||
| DigitSequence cc, | |||
| PhoneRegion mainRegion, | |||
| Set<PhoneRegion> extraRegions, | |||
| ImmutableSet<DigitSequence> nationalPrefix, | |||
| RangeTree carrierPrefixes, | |||
| String defaultIddPrefix, | |||
| RangeTree allIddRanges, | |||
| String extensionPrefix, | |||
| Set<PhoneRegion> mobilePortableRegions) { | |||
| // In theory there could be IDD prefix for a non-geographic region (and this check could be | |||
| // removed) but it's not something we've ever seen and don't have any expectation of. | |||
| checkMetadata(!mainRegion.equals(PhoneRegion.getWorld()) || allIddRanges.isEmpty(), | |||
| "[%s] IDD prefixes must not be present for non-geographic regions", cc); | |||
| checkMetadata(mainRegion.equals(PhoneRegion.getWorld()) || !allIddRanges.isEmpty(), | |||
| "[%s] IDD prefixes must be present for all geographic regions", cc); | |||
| checkMetadata(nationalPrefix.stream().noneMatch(allIddRanges::contains), | |||
| "[%s] National prefix %s and IDD prefixes (%s) must be disjoint", | |||
| cc, nationalPrefix, allIddRanges); | |||
| checkMetadata(nationalPrefix.stream().noneMatch(carrierPrefixes::contains), | |||
| "[%s] National prefix %s and carrier prefixes (%s) must be disjoint", | |||
| cc, nationalPrefix, carrierPrefixes); | |||
| // Allow exactly one '~' to separate the prefix digits to indicate a pause during dialling | |||
| // (this check could be relaxed in future, but it's currently true for all data). | |||
| checkMetadata(defaultIddPrefix.isEmpty() || defaultIddPrefix.matches("[0-9]+(?:~[0-9]+)?"), | |||
| "[%s] Invalid IDD prefix: %s", cc, defaultIddPrefix); | |||
| DigitSequence iddPrefix = DigitSequence.of(defaultIddPrefix.replace("~", "")); | |||
| checkMetadata(iddPrefix.isEmpty() || allIddRanges.contains(iddPrefix), | |||
| "[%s] IDD ranges must contain the default prefix: %s", cc, iddPrefix); | |||
| checkMetadata(!extraRegions.contains(mainRegion), | |||
| "[%s] duplicated main region '%s' in extra regions: %s", | |||
| cc, mainRegion, extraRegions); | |||
| // Main region comes first in iteration order, remaining regions are ordered naturally. | |||
| ImmutableSet.Builder<PhoneRegion> set = ImmutableSet.builder(); | |||
| set.add(mainRegion); | |||
| extraRegions.stream().sorted().forEach(set::add); | |||
| ImmutableSet<PhoneRegion> allRegions = set.build(); | |||
| checkMetadata(allRegions.containsAll(mobilePortableRegions), | |||
| "invalid mobile portable regions: %s", mobilePortableRegions); | |||
| return new AutoValue_NumberingScheme_Attributes( | |||
| cc, | |||
| allRegions, | |||
| nationalPrefix, | |||
| carrierPrefixes, | |||
| defaultIddPrefix, | |||
| allIddRanges, | |||
| !extensionPrefix.isEmpty() ? Optional.of(extensionPrefix) : Optional.empty(), | |||
| ImmutableSortedSet.copyOf(Ordering.natural(), mobilePortableRegions)); | |||
| } | |||
| /** Returns the unique calling code of this numbering scheme. */ | |||
| public abstract DigitSequence getCallingCode(); | |||
| /** | |||
| * Returns the regions represented by this numbering scheme. The main region is always present | |||
| * and listed first, and remaining regions are listed in "natural" order. | |||
| */ | |||
| public abstract ImmutableSet<PhoneRegion> getRegions(); | |||
| /** | |||
| * Returns the "main" region for this numbering scheme. The notion of a main region for a | |||
| * country calling code is slightly archaic and mostly comes from the way in which the legacy | |||
| * XML data is structured. However there are a few places in the public API where the "main" | |||
| * region is returned in situations of ambiguity, so it can be useful to know it. | |||
| */ | |||
| public final PhoneRegion getMainRegion() { | |||
| return getRegions().asList().get(0); | |||
| } | |||
| /** | |||
| * Returns all possible national prefixes which can be used when dialling national numbers. In | |||
| * most cases this set just contains the preferred prefix, but alternate values may be present | |||
| * when a region switches between prefixes or for other reasons. Any "non preferred" prefixes | |||
| * are recognized only during parsing, and otherwise ignored. | |||
| * | |||
| * <p>If there is a preferred prefix, it is listed first, otherwise the set is empty. | |||
| */ | |||
| public abstract ImmutableSet<DigitSequence> getNationalPrefixes(); | |||
| /** | |||
| * Returns the (possibly empty) prefix used when dialling national numbers (e.g. "0" for "US"). | |||
| * Not all regions require a prefix for national dialling. | |||
| */ | |||
| public DigitSequence getPreferredNationalPrefix() { | |||
| ImmutableSet<DigitSequence> prefixes = getNationalPrefixes(); | |||
| return prefixes.isEmpty() ? DigitSequence.empty() : prefixes.iterator().next(); | |||
| } | |||
| /** | |||
| * Returns all carrier prefixes for national dialling. This range must not contain the national | |||
| * prefix. | |||
| */ | |||
| public abstract RangeTree getCarrierPrefixes(); | |||
| /** | |||
| * Returns the (possible empty) default international dialling (IDD) prefix, possibly | |||
| * containing a '~' to indicate a pause during dialling (e.g. "8~10" for Russia). | |||
| */ | |||
| public abstract String getDefaultIddPrefix(); | |||
| /** | |||
| * Returns all IDD prefixes which may be used for international dialling. If the default prefix | |||
| * is not empty it must be contained in this range. | |||
| */ | |||
| public abstract RangeTree getIddPrefixes(); | |||
| /** Returns the preferred label to use for indicating extensions for numbers. */ | |||
| public abstract Optional<String> getExtensionPrefix(); | |||
| /** Returns the regions in which mobile numbers are portable between carriers. */ | |||
| public abstract ImmutableSet<PhoneRegion> getMobilePortableRegions(); | |||
| } | |||
| /** | |||
| * Creates a numbering scheme from a range table and example numbers. No rules are applied to the | |||
| * data in the tables, and they are assumed to be complete. | |||
| */ | |||
| public static NumberingScheme from( | |||
| Attributes attributes, | |||
| RangeTable xmlTable, | |||
| Map<PhoneRegion, RangeTable> shortcodeMap, | |||
| Map<String, FormatSpec> formats, | |||
| ImmutableList<AltFormatSpec> altFormats, | |||
| Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers, | |||
| List<Comment> comments) { | |||
| checkPossibleRegions(attributes.getRegions(), xmlTable); | |||
| checkNationalOnly(attributes, xmlTable); | |||
| checkUnambiguousIdd(attributes, xmlTable, formats); | |||
| ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes = | |||
| checkShortCodeConsistency(shortcodeMap, xmlTable); | |||
| return new AutoValue_NumberingScheme( | |||
| attributes, | |||
| xmlTable, | |||
| shortcodes, | |||
| checkFormatConsistency(attributes, formats, xmlTable, shortcodes), | |||
| checkAltFormatConsistency(altFormats, formats, xmlTable), | |||
| checkExampleNumbers(attributes.getRegions(), xmlTable, exampleNumbers), | |||
| addSyntheticComments(comments, attributes)); | |||
| } | |||
| // Adds the first comments for main and auxiliary regions, giving the English name and detailing | |||
| // auxiliary region information if necessary. | |||
| private static ImmutableList<Comment> addSyntheticComments( | |||
| List<Comment> comments, Attributes attributes) { | |||
| PhoneRegion mainRegion = attributes.getMainRegion(); | |||
| if (!mainRegion.equals(PhoneRegion.getWorld())) { | |||
| List<Comment> modified = new ArrayList<>(getRegionNameComments(mainRegion)); | |||
| List<PhoneRegion> auxRegions = | |||
| attributes.getRegions().asList().subList(1, attributes.getRegions().size()); | |||
| if (!auxRegions.isEmpty()) { | |||
| String comment = String.format("Main region for '%s'", Joiner.on(',').join(auxRegions)); | |||
| modified.add(Comment.create(Comment.anchor(mainRegion), ImmutableList.of(comment))); | |||
| for (PhoneRegion r : auxRegions) { | |||
| modified.addAll(getRegionNameComments(r)); | |||
| String auxComment = | |||
| String.format("Calling code and formatting shared with '%s'", mainRegion); | |||
| modified.add(Comment.create(Comment.anchor(r), ImmutableList.of(auxComment))); | |||
| } | |||
| } | |||
| // Do this last, since order matters (because anchors are not unique) and we want the | |||
| // synthetic comments to come first. | |||
| modified.addAll(comments); | |||
| comments = modified; | |||
| } | |||
| return ImmutableList.copyOf(comments); | |||
| } | |||
| private static List<Comment> getRegionNameComments(PhoneRegion region) { | |||
| ImmutableList<String> enName = ImmutableList.of(region.getEnglishNameForXmlComments()); | |||
| return ImmutableList.of( | |||
| Comment.create(Comment.anchor(region), enName), | |||
| Comment.create(Comment.shortcodeAnchor(region), enName)); | |||
| } | |||
| private static void checkPossibleRegions(Set<PhoneRegion> regions, RangeTable xmlTable) { | |||
| ImmutableSet<PhoneRegion> actual = REGIONS.extractGroupColumns(xmlTable.getColumns()).keySet(); | |||
| // Allow no region column in the table if there's only one region (since it's implicit). | |||
| checkState((actual.isEmpty() && regions.size() == 1) || actual.equals(regions), | |||
| "regions added to range table do not match the expected numbering scheme regions\n" | |||
| + "expected: %s\n" | |||
| + "actual: %s\n", | |||
| regions, actual); | |||
| } | |||
| // An assumption has generally been that if a range is "national only" then it either: | |||
| // a) belongs to only one region (the one it's national only for) | |||
| // b) belongs to at least the main region (since in some schemes ranges mostly just overlap all | |||
| // possible regions). | |||
| // Thus we preclude the possibility of having a "national only" number that appears in multiple | |||
| // regions, but not the main region. | |||
| // | |||
| // If this check is ever removed (because there is real data where this is not the case), then | |||
| // the code which generates the "<noInternationalDialling>" patterns will have to be revisited. | |||
| private static void checkNationalOnly(Attributes attributes, RangeTable xmlTable) { | |||
| RangeTree allNationalOnly = xmlTable.getRanges(NATIONAL_ONLY, true); | |||
| if (allNationalOnly.isEmpty()) { | |||
| return; | |||
| } | |||
| ImmutableList<PhoneRegion> regions = attributes.getRegions().asList(); | |||
| PhoneRegion main = regions.get(0); | |||
| // Anything assigned to the main region can be ignored as we allow it to have multiple regions. | |||
| // Now we have to ensure that these ranges are assigned to exactly one auxiliary region. | |||
| RangeTree remaining = | |||
| allNationalOnly.subtract(xmlTable.getRanges(REGIONS.getColumn(main), true)); | |||
| if (remaining.isEmpty()) { | |||
| return; | |||
| } | |||
| DigitSequence cc = attributes.getCallingCode(); | |||
| for (PhoneRegion r : regions.subList(1, regions.size())) { | |||
| RangeTree auxNationalOnly = | |||
| xmlTable.getRanges(REGIONS.getColumn(r), true).intersect(allNationalOnly); | |||
| // Anything already removed from "remaining" was already accounted for by another region. | |||
| checkMetadata(remaining.containsAll(auxNationalOnly), | |||
| "[%s] %s has national-only ranges which overlap other regions: %s", | |||
| cc, r, auxNationalOnly.subtract(remaining)); | |||
| remaining = remaining.subtract(auxNationalOnly); | |||
| } | |||
| // This is not data issue since it should have been checked already, this is bug. | |||
| checkState(remaining.isEmpty(), "[%s] ranges not assigned to any region: %s", cc, remaining); | |||
| } | |||
| /** | |||
| * Ensures no national range can start with an IDD (international dialling code of any kind). | |||
| * This is slightly more complex than just looking for any IDD prefix at the start of a range | |||
| * because of cases like India, where "00800..." is a valid range and does start with IDD. | |||
| * | |||
| * <p>We allow this because: | |||
| * <ol> | |||
| * <li>The number is required to have the national prefix in front, so must be dialled as | |||
| * {@code 000800...} (according to the Indian numbering plan) | |||
| * <li>and {@code 000...} is not a valid sequence that would lead to dialing into another region, | |||
| * because all calling codes start with {@code [1-9]}. | |||
| * </ol> | |||
| */ | |||
| private static void checkUnambiguousIdd( | |||
| Attributes attributes, RangeTable xmlTable, Map<String, FormatSpec> formats) { | |||
| // It can be empty for non-geographic (world) numbering schemes. | |||
| if (attributes.getIddPrefixes().isEmpty()) { | |||
| return; | |||
| } | |||
| // All IDDs extended by one non-zero digit. These are the prefixes which if dialled may end | |||
| // up in another region, so they cannot be allowed at the start of any national number. | |||
| RangeTree iddPlusOneDigit = attributes.getIddPrefixes().map(r -> r.extendByMask(NOT_ZERO_MASK)); | |||
| // We only care about ranges up to this length, which can speed things up. | |||
| int maxPrefixLength = iddPlusOneDigit.getLengths().last(); | |||
| // Now prefix any ranges which could be dialled with a national prefix with all possible | |||
| // national prefixes, based on how they are formatted (and assume that no format means no | |||
| // national prefix). | |||
| RangeTree withNationalPrefix = RangeTree.empty(); | |||
| RangeTree withoutNationalPrefix = xmlTable.getRanges(FORMAT, FORMAT.defaultValue()); | |||
| for (String fid : formats.keySet()) { | |||
| FormatSpec spec = formats.get(fid); | |||
| // Only bother with ranges up to the maximum prefix length we care about. | |||
| RangeTree r = xmlTable.getRanges(FORMAT, fid).slice(0, maxPrefixLength); | |||
| if (spec.nationalPrefixOptional()) { | |||
| withNationalPrefix = withNationalPrefix.union(r); | |||
| withoutNationalPrefix = withoutNationalPrefix.union(r); | |||
| } else if (spec.national().hasNationalPrefix()) { | |||
| withNationalPrefix = withNationalPrefix.union(r); | |||
| } else { | |||
| withoutNationalPrefix = withoutNationalPrefix.union(r); | |||
| } | |||
| } | |||
| // Only here due to lambdas requiring an effectively final field (this makes me sad). | |||
| RangeTree withNationalPrefixCopy = withNationalPrefix; | |||
| RangeTree allDiallablePrefixes = | |||
| withoutNationalPrefix | |||
| .union(attributes.getNationalPrefixes().stream() | |||
| .map(RangeSpecification::from) | |||
| .map(p -> withNationalPrefixCopy.prefixWith(p)) | |||
| .reduce(RangeTree.empty(), RangeTree::union)); | |||
| // These are prefixes which are claimed to be nationally diallable but overlap with the IDD. | |||
| RangeTree iddOverlap = PrefixTree.from(iddPlusOneDigit).retainFrom(allDiallablePrefixes); | |||
| checkMetadata(iddOverlap.isEmpty(), | |||
| "[%s] ranges cannot start with IDD: %s", attributes.getCallingCode(), iddOverlap); | |||
| } | |||
| /** | |||
| * Ensures the shortcodes are disjoint from main ranges and consistent with each other by format | |||
| * (since format information isn't held separately for each shortcode table). | |||
| */ | |||
| private static ImmutableSortedMap<PhoneRegion, RangeTable> checkShortCodeConsistency( | |||
| Map<PhoneRegion, RangeTable> shortcodeMap, RangeTable table) { | |||
| ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes = | |||
| ImmutableSortedMap.copyOf(shortcodeMap); | |||
| shortcodes.forEach((region, shortcodeTable) -> { | |||
| RangeTree overlap = table.getAllRanges().intersect(shortcodeTable.getAllRanges()); | |||
| checkMetadata(overlap.isEmpty(), | |||
| "Shortcode and national numbers overlap for %s: %s", region, overlap); | |||
| }); | |||
| return shortcodes; | |||
| } | |||
| private static final Schema FORMAT_SCHEMA = | |||
| Schema.builder().add(AREA_CODE_LENGTH).add(FORMAT).build(); | |||
| // We actually explicitly permit duplicate formats (for now) since the XML has them. Later, once | |||
| // everything is settled, it might be possible to add a check here. | |||
| private static ImmutableMap<String, FormatSpec> checkFormatConsistency( | |||
| Attributes attributes, | |||
| Map<String, FormatSpec> formatMap, | |||
| RangeTable table, | |||
| Map<PhoneRegion, RangeTable> shortcodes) { | |||
| DigitSequence cc = attributes.getCallingCode(); | |||
| RangeTable.Builder allFormats = RangeTable.builder(FORMAT_SCHEMA); | |||
| allFormats.copyNonDefaultValues(AREA_CODE_LENGTH, table, OverwriteMode.ALWAYS); | |||
| allFormats.copyNonDefaultValues(FORMAT, table, OverwriteMode.ALWAYS); | |||
| // Throws a RangeException (IllegalArgumentException) if inconsistent write occurs. | |||
| shortcodes.values() | |||
| .forEach(t -> allFormats.copyNonDefaultValues(FORMAT, t, OverwriteMode.SAME)); | |||
| RangeTable formatTable = allFormats.build(); | |||
| ImmutableMap<String, FormatSpec> formats = ImmutableMap.copyOf(formatMap); | |||
| // TODO: Make this "equals" eventually (since it currently sees "synthetic" IDs). | |||
| checkMetadata( | |||
| formats.keySet().containsAll(formatTable.getAssignedValues(FORMAT)), | |||
| "[%s] mismatched format IDs: %s", | |||
| cc, Sets.symmetricDifference(formatTable.getAssignedValues(FORMAT), formats.keySet())); | |||
| // If any of the checks relating to carrier formats are relaxed here, it might be necessary to | |||
| // re-evaluate the logic around regeneration of nationalPrefixForParsing (so be careful!). | |||
| boolean carrierTemplatesExist = false; | |||
| boolean nationalPrefixExistsForFormatting = false; | |||
| boolean nationalPrefixSometimesOptional = false; | |||
| for (String id : formats.keySet()) { | |||
| FormatSpec spec = formats.get(id); | |||
| RangeTree assigned = allFormats.getRanges(FORMAT, id); | |||
| checkMetadata(!assigned.isEmpty(), | |||
| "[%s] format specifier '%s' not assigned to any range: %s", cc, id, spec); | |||
| checkFormatLengths(cc, spec, assigned); | |||
| checkLocalFormatLengths(cc, formatTable, spec, assigned); | |||
| carrierTemplatesExist |= spec.carrier().isPresent(); | |||
| nationalPrefixExistsForFormatting |= | |||
| spec.national().hasNationalPrefix() | |||
| || spec.carrier().map(FormatTemplate::hasNationalPrefix).orElse(false); | |||
| nationalPrefixSometimesOptional |= spec.nationalPrefixOptional(); | |||
| } | |||
| checkMetadata(attributes.getCarrierPrefixes().isEmpty() || carrierTemplatesExist, | |||
| "[%s] carrier prefixes exist but no formats have carrier templates: %s", | |||
| cc, formats.values()); | |||
| checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixExistsForFormatting, | |||
| "[%s] if no national prefix exists, it cannot be specified in any format template: %s", | |||
| cc, formats.values()); | |||
| checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixSometimesOptional, | |||
| "[%s] if no national prefix exists, it cannot be optional for formatting: %s", | |||
| cc, formats.values()); | |||
| return formats; | |||
| } | |||
| // Checks that the ranges to which formats are assigned don't have lengths outside the possible | |||
| // lengths of that format (e.g. we don't have "12xx" assigned to the format "XXX-XXX"). | |||
| private static void checkFormatLengths(DigitSequence cc, FormatSpec spec, RangeTree assigned) { | |||
| TreeSet<Integer> unexpected = new TreeSet<>(assigned.getLengths()); | |||
| unexpected.removeAll(ContiguousSet.closed(spec.minLength(), spec.maxLength())); | |||
| if (!unexpected.isEmpty()) { | |||
| RangeTree bad = RangeTree.empty(); | |||
| for (int n : unexpected) { | |||
| bad = bad.union(assigned.intersect(RangeTree.from(RangeSpecification.any(n)))); | |||
| } | |||
| throw new IllegalArgumentException(String.format( | |||
| "[%s] format %s assigned to ranges of invalid length: %s", cc, spec, bad)); | |||
| } | |||
| } | |||
| // Checks that the local lengths for ranges (as determined by area code length) is compatible | |||
| // with the assigned local format specifier. Note that it is allowed to have an area code length | |||
| // of zero and still be assigned a format with a local specifier (the specifier may be shared | |||
| // with other ranges which do have an area code length). | |||
| private static void checkLocalFormatLengths( | |||
| DigitSequence cc, RangeTable formatTable, FormatSpec spec, RangeTree assigned) { | |||
| if (!spec.local().isPresent()) { | |||
| return; | |||
| } | |||
| ImmutableSet<Integer> lengths = | |||
| formatTable.subTable(assigned, AREA_CODE_LENGTH).getAssignedValues(AREA_CODE_LENGTH); | |||
| FormatTemplate local = spec.local().get(); | |||
| // Format specifiers either vary length in the area code or the local number, but not both. | |||
| int localLength = local.minLength(); | |||
| int localVariance = local.maxLength() - local.minLength(); | |||
| if (localVariance == 0) { | |||
| // If there's no length variation in the "local" part, it means the area code length can | |||
| // be variable. | |||
| ContiguousSet<Integer> acls = | |||
| ContiguousSet.closed(spec.minLength() - localLength, spec.maxLength() - localLength); | |||
| checkMetadata(acls.containsAll(lengths), | |||
| "[%s] area code lengths '%s' not supported by format: %s", cc, acls, spec); | |||
| } else { | |||
| // If the length variation of the format is in the trailing "local" part, we expect the a | |||
| // unique area code length (only one "group" in the format can be variable). | |||
| checkMetadata((spec.maxLength() - spec.minLength()) == localVariance, | |||
| "[%s] invalid local format (bad length) in format specifier %s", cc, spec); | |||
| int acl = spec.minLength() - localLength; | |||
| checkMetadata(lengths.size() == 1 && lengths.contains(acl), | |||
| "[%s] implied area code length(s) %s does not match expected length (%s) of format: %s", | |||
| cc, lengths, acl, spec); | |||
| } | |||
| } | |||
| private static ImmutableList<AltFormatSpec> checkAltFormatConsistency( | |||
| ImmutableList<AltFormatSpec> altFormats, | |||
| Map<String, FormatSpec> formats, | |||
| RangeTable xmlTable) { | |||
| for (AltFormatSpec altFormat : altFormats) { | |||
| String parentId = altFormat.parentFormatId(); | |||
| FormatSpec parent = formats.get(parentId); | |||
| checkMetadata(parent != null, "unknown parent format ID in alternate format: %s", altFormat); | |||
| Set<Integer> altLengths = getLengths(altFormat.template()); | |||
| checkMetadata(getLengths(parent.national()).containsAll(altLengths), | |||
| "alternate format lengths must be bounded by parent format lengths: %s", altFormat); | |||
| // Only care about the parent ranges which have the same length(s) as the alt format. | |||
| RangeTree lengthMask = RangeTree.from(altLengths.stream().map(RangeSpecification::any)); | |||
| RangeTree ranges = xmlTable.getRanges(FORMAT, parentId).intersect(lengthMask); | |||
| RangeTree captured = PrefixTree.from(altFormat.prefix()).retainFrom(ranges); | |||
| checkMetadata(!captured.isEmpty(), | |||
| "alternate format must capture some of the parent format ranges: %s", altFormat); | |||
| int prefixLength = altFormat.prefix().length(); | |||
| if (prefixLength > 0) { | |||
| // A really ugly, but useful check to find if there's a better prefix. Specifically, it | |||
| // determines if the given prefix is "over-capturing" ranges (e.g. prefix is "1[2-8]" but | |||
| // only "1[3-6]" exists in the parent format's assigned ranges). Since this is an odd, non | |||
| // set-like operation, it's just done "manually" using bit masks. It's not a union of the | |||
| // paths, it's a "squashing" (since it results in the smallest single range specification). | |||
| // | |||
| // Start with all the paths trimmed to the prefix length (e.g. "123", "145", "247"). All | |||
| // range specifications in the slice are the same length as the prefix we started with. | |||
| RangeTree slice = captured.slice(prefixLength); | |||
| // Now union the digit masks at each depth for all paths in the slice (in theory there | |||
| // could be a "squash" operation on RangeSpecification to do all this). | |||
| int[] masks = new int[prefixLength]; | |||
| slice.asRangeSpecifications().forEach(s -> { | |||
| for (int n = 0; n < prefixLength; n++) { | |||
| masks[n] |= s.getBitmask(n); | |||
| } | |||
| }); | |||
| // Now reconstruct the single "squashed" range specification (e.g. "[12][24][357]"). | |||
| RangeSpecification minSpec = RangeSpecification.empty(); | |||
| for (int n = 0; n < prefixLength; n++) { | |||
| minSpec = minSpec.extendByMask(masks[n]); | |||
| } | |||
| checkMetadata(minSpec.equals(altFormat.prefix()), | |||
| "alternate format prefix '%s' is too broad, it should be '%s' for: %s", | |||
| altFormat.prefix(), minSpec, altFormat); | |||
| } | |||
| } | |||
| return altFormats; | |||
| } | |||
| private static Set<Integer> getLengths(FormatTemplate t) { | |||
| return ContiguousSet.closed(t.minLength(), t.maxLength()); | |||
| } | |||
| // Checks that example numbers are valid numbers in the ranges for their type. | |||
| private static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> checkExampleNumbers( | |||
| Set<PhoneRegion> regions, | |||
| RangeTable table, | |||
| Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers) { | |||
| for (PhoneRegion r : regions) { | |||
| RangeTable regionTable = | |||
| table.subTable(table.getRanges(REGIONS.getColumn(r), TRUE), XmlRangesSchema.TYPE); | |||
| Map<ValidNumberType, DigitSequence> regionExamples = exampleNumbers.row(r); | |||
| ImmutableSet<ValidNumberType> types = regionTable.getAssignedValues(XmlRangesSchema.TYPE); | |||
| checkMetadata(types.equals(regionExamples.keySet()), | |||
| "mismatched types for example numbers in region %s\nExpected: %s\nActual: %s", | |||
| r, types, regionExamples); | |||
| for (ValidNumberType t : types) { | |||
| DigitSequence exampleNumber = regionExamples.get(t); | |||
| RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, t); | |||
| // Special case, since we permit example numbers for fixed line/mobile to be valid for the | |||
| // combined range as well. | |||
| // | |||
| // This logic smells, since it reveals information about the XML structure (in which fixed | |||
| // line and mobile ranges can overlap). However if we insist that a fixed line examples are | |||
| // in the "fixed line only" range, we end up with problems if (mobile == fixed line), since | |||
| // there is no "fixed line only" range (but there is an example number in the XML). | |||
| if (t == ValidNumberType.MOBILE || t == ValidNumberType.FIXED_LINE) { | |||
| ranges = ranges.union( | |||
| regionTable.getRanges(XmlRangesSchema.TYPE, ValidNumberType.FIXED_LINE_OR_MOBILE)); | |||
| } | |||
| checkMetadata(ranges.contains(exampleNumber), | |||
| "invalid example number '%s' of type %s in region %s", exampleNumber, t, r); | |||
| } | |||
| } | |||
| return ImmutableTable.copyOf(exampleNumbers); | |||
| } | |||
| public abstract Attributes getAttributes(); | |||
| // TODO: Inline the wrapper methods below. | |||
| /** Returns the unique calling code of this numbering scheme. */ | |||
| public DigitSequence getCallingCode() { | |||
| return getAttributes().getCallingCode(); | |||
| } | |||
| /** | |||
| * Returns the regions represented by this numbering scheme. The main region is always present | |||
| * and listed first, and remaining regions are listed in "natural" order. | |||
| */ | |||
| public ImmutableSet<PhoneRegion> getRegions() { | |||
| return getAttributes().getRegions(); | |||
| } | |||
| /** | |||
| * Returns a range table containing per-range attributes according to | |||
| * {@link XmlRangesSchema#COLUMNS}. | |||
| */ | |||
| public abstract RangeTable getTable(); | |||
| /** | |||
| * Returns a RangeTable restricted to the given region, which conforms to the | |||
| * {@link XmlRangesSchema} schema, with the exception that no region columns exist. | |||
| */ | |||
| public final RangeTable getTableFor(PhoneRegion region) { | |||
| checkArgument(getRegions().contains(region), | |||
| "invalid region '%s' for calling code '%s'", region, getCallingCode()); | |||
| return getTable() | |||
| .subTable(getTable().getRanges(REGIONS.getColumn(region), TRUE), PER_REGION_COLUMNS); | |||
| } | |||
| public abstract ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodes(); | |||
| /** Returns the RangeTable for the shortcodes of the given region. */ | |||
| public final Optional<RangeTable> getShortcodesFor(PhoneRegion region) { | |||
| checkArgument(getRegions().contains(region), | |||
| "invalid region '%s' for calling code '%s'", region, getCallingCode()); | |||
| return Optional.ofNullable(getShortcodes().get(region)); | |||
| } | |||
| /** Returns the map of format ID to format specifier. */ | |||
| public abstract ImmutableMap<String, FormatSpec> getFormats(); | |||
| /** Returns a list of alternate formats which are also expected for this numbering scheme. */ | |||
| public abstract ImmutableList<AltFormatSpec> getAlternateFormats(); | |||
| /** Returns a table of example numbers for each region code and number type. */ | |||
| public abstract ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExampleNumbers(); | |||
| /** | |||
| * Returns all comments known about by this numbering scheme. Internal method, callers should | |||
| * always use {@link #getComments(Anchor)} instead. | |||
| */ | |||
| abstract ImmutableList<Comment> getAllComments(); | |||
| /** Returns comments with a specified anchor for this numbering scheme. */ | |||
| public ImmutableList<Comment> getComments(Anchor anchor) { | |||
| checkArgument(getAttributes().getRegions().contains(anchor.region()), | |||
| "invalid region: %s", anchor.region()); | |||
| return getAllComments().stream() | |||
| .filter(c -> c.getAnchor().equals(anchor)) | |||
| .collect(toImmutableList()); | |||
| } | |||
| /** | |||
| * An encapsulation of a comment to be associated with an element in the XML. Rather than have | |||
| * many APIs for setting/getting comments on a {@link NumberingScheme}, the approach taken here | |||
| * is to let comments describe for themselves where they go but keep them in one big bucket. | |||
| * <p> | |||
| * This simplifies a lot of the intermediate APIs in the builders, but is less efficient (since | |||
| * finding comments is now a linear search). If this is ever an issue, they should be mapped by | |||
| * key, using a {@code ListMultimap<String, Comment>} (since comments are also ordered by their | |||
| * number). | |||
| */ | |||
| @AutoValue | |||
| public abstract static class Comment { | |||
| private static final Joiner JOIN_LINES = Joiner.on('\n'); | |||
| private static final Splitter SPLIT_LINES = Splitter.on('\n'); | |||
| /** An anchor defining which element, in which territory, a comment should be attached to. */ | |||
| @AutoValue | |||
| public abstract static class Anchor implements Comparable<Anchor> { | |||
| // Special anchor for comments that are not stored in the comment table, but are attached to | |||
| // data directly (e.g. formats). | |||
| private static final Anchor ANONYMOUS = of(PhoneRegion.getUnknown(), ""); | |||
| private static final Comparator<Anchor> ORDERING = | |||
| comparing(Anchor::region).thenComparing(Anchor::label); | |||
| /** Creates a comment anchor from a region and xml type. */ | |||
| static Anchor of(PhoneRegion region, String label) { | |||
| // TODO: Add check for valid label. | |||
| return anchor(region, label); | |||
| } | |||
| /** The region of the territory this comment should be attached to. */ | |||
| public abstract PhoneRegion region(); | |||
| /** | |||
| * The type in the territory this comment should be attached to. If missing, attach this | |||
| * comment to the main comment block for the territory. | |||
| */ | |||
| public abstract String label(); | |||
| @Override | |||
| public int compareTo(Anchor that) { | |||
| return ORDERING.compare(this, that); | |||
| } | |||
| } | |||
| // Private since we want to funnel people through type safe factory methods. | |||
| private static Anchor anchor(PhoneRegion region, String label) { | |||
| return new AutoValue_NumberingScheme_Comment_Anchor(region, label); | |||
| } | |||
| /** Returns a key identifying a comment for a region. */ | |||
| public static Anchor anchor(PhoneRegion region) { | |||
| return anchor(region, "XML"); | |||
| } | |||
| /** Returns a key identifying a comment for the validation range of a given type in a region. */ | |||
| public static Anchor anchor(PhoneRegion region, XmlNumberType xmlType) { | |||
| return anchor(region, xmlType.toString()); | |||
| } | |||
| /** | |||
| * Returns a key identifying a comment for the validation range of a given shortcode type in | |||
| * a region. | |||
| */ | |||
| public static Anchor shortcodeAnchor(PhoneRegion region) { | |||
| return anchor(region, "SC"); | |||
| } | |||
| /** | |||
| * Returns a key identifying a comment for the validation range of a given shortcode type in | |||
| * a region. | |||
| */ | |||
| public static Anchor shortcodeAnchor(PhoneRegion region, XmlShortcodeType xmlType) { | |||
| return anchor(region, xmlType.toString()); | |||
| } | |||
| /** Creates a comment the applies to data identified by the specified key. */ | |||
| public static Comment create(Anchor anchor, List<String> lines) { | |||
| return new AutoValue_NumberingScheme_Comment(anchor, ImmutableList.copyOf(lines)); | |||
| } | |||
| /** Creates a comment the applies to data identified by the specified key. */ | |||
| public static Comment createAnonymous(List<String> lines) { | |||
| return new AutoValue_NumberingScheme_Comment(Anchor.ANONYMOUS, ImmutableList.copyOf(lines)); | |||
| } | |||
| public static Comment fromText(Anchor anchor, String text) { | |||
| return create(anchor, SPLIT_LINES.splitToList(text)); | |||
| } | |||
| public static Comment fromText(String text) { | |||
| return createAnonymous(SPLIT_LINES.splitToList(text)); | |||
| } | |||
| /** | |||
| * Returns the key which defines what this comment relates to (and thus where it should appear | |||
| * in the XML file). | |||
| */ | |||
| public abstract Anchor getAnchor(); | |||
| /** The lines of a single mulit-line comment. */ | |||
| // TODO: Switch to a single string (with newlines) which is what's done elsewhere. | |||
| public abstract ImmutableList<String> getLines(); | |||
| public String toText() { | |||
| return JOIN_LINES.join(getLines()); | |||
| } | |||
| // Visible for AutoValue. | |||
| Comment() {} | |||
| } | |||
| // Visible for AutoValue. | |||
| NumberingScheme() {} | |||
| } | |||
| @ -0,0 +1,63 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.MetadataKey; | |||
| import java.util.List; | |||
| /** | |||
| * Collection of numbering schemes, mapped primarily by calling code, but available via other | |||
| * mappings (e.g. metadata key) for convenience. | |||
| */ | |||
| // TODO: Delete this (it's hardly used and very little more than a simple collection). | |||
| @AutoValue | |||
| public abstract class NumberingSchemes { | |||
| /** | |||
| * Aggregates a list of numbering schemes into a single collection which mirrors the structure and | |||
| * mapping of the libphonenumber XML metadata file. | |||
| */ | |||
| public static NumberingSchemes from(List<NumberingScheme> schemes) { | |||
| ImmutableMap<DigitSequence, NumberingScheme> map = | |||
| schemes.stream().collect(toImmutableMap(NumberingScheme::getCallingCode, identity())); | |||
| ImmutableSet<MetadataKey> allKeys = map.values().stream() | |||
| .flatMap(s -> s.getRegions().stream().map(r -> MetadataKey.create(r, s.getCallingCode()))) | |||
| .collect(toImmutableSet()); | |||
| return new AutoValue_NumberingSchemes(map, allKeys); | |||
| } | |||
| /** Returns a mapping of top-level numbering schemes by calling code. */ | |||
| // TODO: Rename to getSchemeMap() since it's confusing, or add a direct getter. | |||
| public abstract ImmutableMap<DigitSequence, NumberingScheme> getSchemes(); | |||
| /** Returns the set of all calling codes for top-level schemes in this collection. */ | |||
| public ImmutableSet<DigitSequence> getCallingCodes() { | |||
| return getSchemes().keySet(); | |||
| } | |||
| /** Returns the set of all metadata keys for regional schemes in this collection. */ | |||
| public abstract ImmutableSet<MetadataKey> getKeys(); | |||
| // Visible for AutoValue. | |||
| NumberingSchemes() {} | |||
| } | |||
| @ -0,0 +1,88 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| /** | |||
| * The schema of the "Operators" table with rows keyed by operator ID and columns: | |||
| * <ol> | |||
| * <li>{@link #SELECTION_CODES}: Operator selection codes for national dialling. | |||
| * <li>{@link #IDD_PREFIXES}: International direct dialling codes. | |||
| * <li>{@link #NAMES}: A group of columns containing the name of the operator, potential in | |||
| * multiple languages. Note that English translations for all operators need not be present. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||
| * <ol> | |||
| * <li>{@code Id}: The operator ID. | |||
| * </ol> | |||
| * | |||
| * <p>The default IDD prefix should not be in this table, but is instead stored in the top-level | |||
| * {@link MetadataTableSchema#IDD_PREFIX} column. | |||
| * | |||
| * <p>Note that there is a special case in which we need to store a selection code or IDD code, but | |||
| * it does not below to a operator with an assigned range (e.g. it's a universally available code). | |||
| * In these situations, you should ensure that the operator ID starts with "__" (double underscore) | |||
| * to prevent consistency checks from complaining about unassigned operators. You can also omit a | |||
| * name for the row, but should probably add a comment. | |||
| */ | |||
| public final class OperatorsTableSchema { | |||
| /** | |||
| * A comma separated list of "selection codes" (as range specifications) which are added to | |||
| * national numbers (not always as a prefix) to select an operator for national dialling. | |||
| * This will often contain many of the same values as IDD_CODES but need not be identical. | |||
| * | |||
| * <p>Note that while a single operator may have more than one code associated with it, the same | |||
| * code cannot appear in more than one row in this table. | |||
| */ | |||
| public static final Column<String> SELECTION_CODES = Column.ofString("Domestic Selection Codes"); | |||
| /** | |||
| * A comma separated list of "International Direct Dialing" codes (as range specifications) which | |||
| * are prefixes for international dialling. This will often contain many of the same prefixes as | |||
| * SELECTION_CODES but need not be identical. | |||
| * | |||
| * <p>Note that while a single operator may have more than one code associated with it, the same | |||
| * code cannot appear in more than one row in this table. | |||
| */ | |||
| public static final Column<String> IDD_PREFIXES = Column.ofString("International Dialling Codes"); | |||
| /** The "Name:XXX" column group in the operator table. */ | |||
| public static final ColumnGroup<SimpleLanguageTag, String> NAMES = | |||
| ColumnGroup.byLanguage(Column.ofString("Name")); | |||
| public static final Column<String> COMMENT = RangesTableSchema.COMMENT; | |||
| private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id"); | |||
| private static final Schema COLUMNS = Schema.builder() | |||
| .add(SELECTION_CODES) | |||
| .add(IDD_PREFIXES) | |||
| .add(NAMES) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** Schema instance defining the operators CSV table. */ | |||
| public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||
| private OperatorsTableSchema() {} | |||
| } | |||
| @ -0,0 +1,396 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.collect.DiscreteDomain.integers; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||
| import static java.util.Comparator.comparing; | |||
| import static java.util.function.Function.identity; | |||
| import static java.util.stream.Collectors.joining; | |||
| import com.google.common.base.Splitter; | |||
| import com.google.common.collect.ContiguousSet; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableRangeSet; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedSet; | |||
| import com.google.common.collect.Range; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||
| import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.Change; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.MultiValue; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.time.ZoneId; | |||
| import java.util.List; | |||
| import java.util.NavigableSet; | |||
| import java.util.Optional; | |||
| import java.util.TreeSet; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * The schema of the standard "Ranges" table with rows keyed by {@link RangeKey} and columns: | |||
| * <ol> | |||
| * <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not | |||
| * the same a XmlNumberType or ValidNumberType). All ranges should be assigned a type. | |||
| * <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF | |||
| * can yield the internal ValidNumberType). All ranges should be assigned a tariff. | |||
| * <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from | |||
| * numbers in a range for local dialling. Local only lengths are derived using this column. | |||
| * <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its | |||
| * region. The "noInternationalDialling" ranges are derived from this column. | |||
| * <li>{@link #SMS}: True if numbers in a range are expected to support SMS. | |||
| * <li>{@link #OPERATOR}: The expected operator (carrier) ID for a range (or empty if no carrier | |||
| * is known). | |||
| * <li>{@link #FORMAT}: The expected format ID for a range (or empty if no formatting should be | |||
| * applied). | |||
| * <li>{@link #TIMEZONE}: The timezone names for a range (or empty to imply the default | |||
| * timezones). Multiple timezones can be specific if separated by {@code '&'}. | |||
| * <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are | |||
| * set {@code true} that range is valid within the region {@code XX}. | |||
| * <li>{@link #GEOCODES}: A group of String columns in the form "Geocode:XXX" containing the | |||
| * geocode string for a range, where {@code XXX} is the language code of the string. | |||
| * <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid. | |||
| * <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||
| * <ol> | |||
| * <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]"). | |||
| * <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9"). | |||
| * </ol> | |||
| */ | |||
| public final class RangesTableSchema { | |||
| /** | |||
| * External number type enum. This is technically much better than ValidNumberType since it | |||
| * splits type and cost properly. Unfortunately the internal logic of the phonenumber library | |||
| * doesn't really cope with this, which is why we convert to {@code XmlRangesSchema} before | |||
| * creating legacy data structures. | |||
| * | |||
| * <p>This enum can be modified as new types are requested from data providers, providing the | |||
| * type mapping to ValidNumberType is updated appropriately. Note that until it's clear that | |||
| * mapping types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we | |||
| * should be very careful about using the additional types. Additional types need to be removed | |||
| * before the generated table can be turned into a {@link NumberingScheme}. | |||
| */ | |||
| public enum ExtType { | |||
| /** Default value not permitted in real data. */ | |||
| UNKNOWN, | |||
| /** Maps to {@link ValidNumberType#FIXED_LINE}. */ | |||
| FIXED_LINE, | |||
| /** Maps to {@link ValidNumberType#MOBILE}. */ | |||
| MOBILE, | |||
| /** Maps to {@link ValidNumberType#FIXED_LINE_OR_MOBILE}. */ | |||
| FIXED_LINE_OR_MOBILE, | |||
| /** Maps to {@link ValidNumberType#VOIP}. */ | |||
| VOIP, | |||
| /** Maps to {@link ValidNumberType#PAGER}. */ | |||
| PAGER, | |||
| /** Maps to {@link ValidNumberType#PERSONAL_NUMBER}. */ | |||
| PERSONAL_NUMBER, | |||
| /** Maps to {@link ValidNumberType#UAN}. */ | |||
| UAN, | |||
| /** Maps to {@link ValidNumberType#VOICEMAIL}. */ | |||
| VOICEMAIL, | |||
| /** Machine-to-machine numbers (additional type for future support). */ | |||
| M2M, | |||
| /** ISP dial-up numbers (additional type for future support). */ | |||
| ISP; | |||
| private static final ImmutableMap<ExtType, ValidNumberType> TYPE_MAP = | |||
| Stream.of( | |||
| ExtType.FIXED_LINE, | |||
| ExtType.MOBILE, | |||
| ExtType.FIXED_LINE_OR_MOBILE, | |||
| ExtType.PAGER, | |||
| ExtType.PERSONAL_NUMBER, | |||
| ExtType.UAN, | |||
| ExtType.VOICEMAIL, | |||
| ExtType.VOIP) | |||
| .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name()))); | |||
| public Optional<ValidNumberType> toValidNumberType() { | |||
| return Optional.ofNullable(TYPE_MAP.get(this)); | |||
| } | |||
| } | |||
| /** | |||
| * External tariff enum. By splitting tariff information out from the "line type", we can | |||
| * represent a much wider (and more realistic) set of combinations for number ranges. When | |||
| * combined with {@link ExtType}, this maps back to {@code ValidNumberType}. | |||
| */ | |||
| public enum ExtTariff { | |||
| /** Does not affect ValidNumberType mapping. */ | |||
| STANDARD_RATE, | |||
| /** Maps to {@link ValidNumberType#TOLL_FREE}. */ | |||
| TOLL_FREE, | |||
| /** Maps to {@link ValidNumberType#SHARED_COST}. */ | |||
| SHARED_COST, | |||
| /** Maps to {@link ValidNumberType#PREMIUM_RATE}. */ | |||
| PREMIUM_RATE; | |||
| private static final ImmutableMap<ExtTariff, ValidNumberType> TARIFF_MAP = | |||
| Stream.of(ExtTariff.TOLL_FREE, ExtTariff.SHARED_COST, ExtTariff.PREMIUM_RATE) | |||
| .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name()))); | |||
| public Optional<ValidNumberType> toValidNumberType() { | |||
| return Optional.ofNullable(TARIFF_MAP.get(this)); | |||
| } | |||
| } | |||
| /** The value in the "TIMEZONE" column, which is effectively a list of timezone strings. */ | |||
| public static final class Timezones extends MultiValue<ZoneId, Timezones> { | |||
| public static Column<Timezones> column(String name) { | |||
| return Column.create(Timezones.class, name, new Timezones(""), Timezones::new); | |||
| } | |||
| public Timezones(Iterable<ZoneId> ids) { | |||
| super(ids, '&', comparing(ZoneId::getId), true); | |||
| } | |||
| public Timezones(String s) { | |||
| super(s, ZoneId::of, '&', comparing(ZoneId::getId), true); | |||
| } | |||
| } | |||
| public static final Column<ExtType> TYPE = Column.of(ExtType.class, "Type", ExtType.UNKNOWN); | |||
| public static final Column<ExtTariff> TARIFF = | |||
| Column.of(ExtTariff.class, "Tariff", ExtTariff.STANDARD_RATE); | |||
| /** | |||
| * The "Area Code Length" column in the range table, denoting the length of a prefix which can | |||
| * be removed from all numbers in a range to obtain locally diallable numbers. If an | |||
| * "area code" is not optional for dialling, then no value should be set here. | |||
| */ | |||
| public static final Column<Integer> AREA_CODE_LENGTH = | |||
| Column.ofUnsignedInteger("Area Code Length"); | |||
| /** Denotes ranges which cannot be dialled internationally. */ | |||
| public static final Column<Boolean> NATIONAL_ONLY = Column.ofBoolean("National Only"); | |||
| /** Denotes ranges which can reasonably be expected to receive SMS. */ | |||
| public static final Column<Boolean> SMS = Column.ofBoolean("Sms"); | |||
| /** The ID of the primary/original operator assigned to a range. */ | |||
| public static final Column<String> OPERATOR = Column.ofString("Operator"); | |||
| /** The ID of the format assigned to a range. */ | |||
| public static final Column<String> FORMAT = Column.ofString("Format"); | |||
| /** An '&'-separated list of timezone IDs associated with this range. */ | |||
| public static final Column<Timezones> TIMEZONE = Timezones.column("Timezone"); | |||
| /** The "Region:XX" column group in the range table. */ | |||
| public static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||
| /** The "Regions" column in the CSV table. */ | |||
| public static final Column<Regions> CSV_REGIONS = Regions.column("Regions"); | |||
| /** The "Geocode:XXX" column group in the range table. */ | |||
| public static final ColumnGroup<SimpleLanguageTag, String> GEOCODES = | |||
| ColumnGroup.byLanguage(Column.ofString("Geocode")); | |||
| /** The provenance column indicating why a range is considered valid. */ | |||
| public static final Column<Provenance> PROVENANCE = | |||
| Column.of(Provenance.class, "Provenance", Provenance.UNKNOWN); | |||
| /** An arbitrary text comment, usually (at least) supplying information about the provenance. */ | |||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||
| /** Marshaller for constructing CsvTable from RangeTable. */ | |||
| private static final CsvKeyMarshaller<RangeKey> MARSHALLER = new CsvKeyMarshaller<>( | |||
| RangesTableSchema::write, | |||
| RangesTableSchema::read, | |||
| Optional.of(RangeKey.ORDERING), | |||
| "Prefix", | |||
| "Length"); | |||
| /** The non-key columns of a range table. */ | |||
| public static final Schema TABLE_COLUMNS = | |||
| Schema.builder() | |||
| .add(TYPE) | |||
| .add(TARIFF) | |||
| .add(AREA_CODE_LENGTH) | |||
| .add(NATIONAL_ONLY) | |||
| .add(SMS) | |||
| .add(OPERATOR) | |||
| .add(FORMAT) | |||
| .add(TIMEZONE) | |||
| .add(REGIONS) | |||
| .add(GEOCODES) | |||
| .add(PROVENANCE) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** | |||
| * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced | |||
| * by the CSV regions multi-value. This allows region codes to be serialize in a single column | |||
| * (which is far nicer when looking at data in a spreadsheet). In the range table, this is | |||
| * normalized into the boolean column group (because that's far nicer to work with). | |||
| */ | |||
| private static final Schema CSV_COLUMNS = | |||
| Schema.builder() | |||
| .add(TYPE) | |||
| .add(TARIFF) | |||
| .add(AREA_CODE_LENGTH) | |||
| .add(NATIONAL_ONLY) | |||
| .add(SMS) | |||
| .add(OPERATOR) | |||
| .add(FORMAT) | |||
| .add(TIMEZONE) | |||
| .add(CSV_REGIONS) | |||
| .add(GEOCODES) | |||
| .add(PROVENANCE) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** Schema instance defining the ranges CSV table. */ | |||
| public static final CsvSchema<RangeKey> SCHEMA = CsvSchema.of(MARSHALLER, CSV_COLUMNS); | |||
| /** | |||
| * Converts a {@link RangeTable} to a {@link CsvTable}, using {@link RangeKey}s as row keys and | |||
| * preserving the original table columns. The {@link CsvSchema} of the returned table is not | |||
| * guaranteed to be the {@link #SCHEMA} instance if the given table had different columns. | |||
| */ | |||
| @SuppressWarnings("unchecked") | |||
| public static CsvTable<RangeKey> toCsv(RangeTable table) { | |||
| CsvTable.Builder<RangeKey> csv = CsvTable.builder(SCHEMA); | |||
| ImmutableSet<Column<Boolean>> regionColumns = | |||
| REGIONS.extractGroupColumns(table.getColumns()).values(); | |||
| TreeSet<PhoneRegion> regions = new TreeSet<>(); | |||
| for (Change c : table.toChanges()) { | |||
| for (RangeKey k : RangeKey.decompose(c.getRanges())) { | |||
| regions.clear(); | |||
| c.getAssignments().forEach(a -> { | |||
| // We special case the regions column, converting a group of boolean columns into a | |||
| // multi-value of region codes. If the column is in the group, it must hold Booleans. | |||
| if (regionColumns.contains(a.column())) { | |||
| if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) { | |||
| regions.add(REGIONS.getKey(a.column())); | |||
| } | |||
| } else { | |||
| csv.put(k, a); | |||
| } | |||
| }); | |||
| // We can do this out-of-sequence because the table will order its columns. | |||
| if (!regions.isEmpty()) { | |||
| csv.put(k, CSV_REGIONS, Regions.of(regions)); | |||
| } | |||
| } | |||
| } | |||
| return csv.build(); | |||
| } | |||
| /** | |||
| * Converts a {@link RangeKey} based {@link CsvTable} to a {@link RangeTable}, preserving the | |||
| * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be | |||
| * the {@link #SCHEMA} instance if the given table had different columns. | |||
| */ | |||
| public static RangeTable toRangeTable(CsvTable<RangeKey> csv) { | |||
| RangeTable.Builder out = RangeTable.builder(TABLE_COLUMNS); | |||
| for (RangeKey k : csv.getKeys()) { | |||
| Change.Builder change = Change.builder(k.asRangeTree()); | |||
| csv.getRow(k).forEach((c, v) -> { | |||
| // We special case the regions column, converting a comma separated list of region codes | |||
| // into a series of boolean column assignments. | |||
| if (c.equals(CSV_REGIONS)) { | |||
| CSV_REGIONS.cast(v).getValues().forEach(r -> change.assign(REGIONS.getColumn(r), true)); | |||
| } else { | |||
| change.assign(c, v); | |||
| } | |||
| }); | |||
| out.apply(change.build(), OverwriteMode.NEVER); | |||
| } | |||
| return out.build(); | |||
| } | |||
| // Shared by ShortcodeTableSchema | |||
| public static Stream<String> write(RangeKey key) { | |||
| return Stream.of(key.getPrefix().toString(), formatLength(key.getLengths())); | |||
| } | |||
| // Shared by ShortcodeTableSchema | |||
| public static RangeKey read(List<String> parts) { | |||
| return RangeKey.create(RangeSpecification.parse(parts.get(0)), parseLengths(parts.get(1))); | |||
| } | |||
| private static String formatLength(ImmutableSortedSet<Integer> lengthSet) { | |||
| checkArgument(!lengthSet.isEmpty()); | |||
| ImmutableRangeSet<Integer> r = | |||
| ImmutableRangeSet.unionOf( | |||
| lengthSet.stream() | |||
| .map(n -> Range.singleton(n).canonical(integers())) | |||
| .collect(toImmutableList())); | |||
| return r.asRanges().stream().map(RangesTableSchema::formatRange).collect(joining(",")); | |||
| } | |||
| private static String formatRange(Range<Integer> r) { | |||
| ContiguousSet<Integer> s = ContiguousSet.create(r, integers()); | |||
| switch (s.size()) { | |||
| case 1: | |||
| return String.valueOf(s.first()); | |||
| case 2: | |||
| return s.first() + "," + s.last(); | |||
| default: | |||
| return s.first() + "-" + s.last(); | |||
| } | |||
| } | |||
| private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults(); | |||
| private static final Splitter RANGE_SPLITTER = Splitter.on('-').trimResults().limit(2); | |||
| private static NavigableSet<Integer> parseLengths(String s) { | |||
| NavigableSet<Integer> lengths = new TreeSet<>(); | |||
| for (String lengthOrRange : COMMA_SPLITTER.split(s)) { | |||
| if (lengthOrRange.contains("-")) { | |||
| List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange); | |||
| int lo = parseInt(lohi.get(0)); | |||
| int hi = parseInt(lohi.get(1)); | |||
| checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi); | |||
| checkArgument(lengths.isEmpty() || lo > lengths.last(), "Overlapping ranges: %s", s); | |||
| lengths.addAll(ContiguousSet.closed(lo, hi)); | |||
| } else { | |||
| int length = parseInt(lengthOrRange); | |||
| checkArgument(lengths.isEmpty() || length > lengths.last(), "Overlapping ranges: %s", s); | |||
| lengths.add(length); | |||
| } | |||
| } | |||
| return lengths; | |||
| } | |||
| private static int parseInt(String s) { | |||
| return Integer.parseUnsignedInt(s, 10); | |||
| } | |||
| private RangesTableSchema() {} | |||
| } | |||
| @ -0,0 +1,228 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||
| import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EMERGENCY; | |||
| import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EXPANDED_EMERGENCY; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableBiMap; | |||
| import com.google.common.collect.ImmutableSortedMap; | |||
| import com.google.common.collect.Maps; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||
| import com.google.i18n.phonenumbers.metadata.table.Change; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.Comparator; | |||
| import java.util.LinkedHashMap; | |||
| import java.util.List; | |||
| import java.util.Map; | |||
| import java.util.Optional; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * The schema of the standard "Shortcodes" table with rows keyed by {@link RangeKey} and columns: | |||
| * <ol> | |||
| * <li>{@link #TYPE}: The semantic type of numbers in a range. All ranges should be assigned a | |||
| * type. | |||
| * <li>{@link #TARIFF}: The expected cost of numbers in a range. All ranges should be assigned a | |||
| * tariff. | |||
| * <li>{@link #SMS}: True if numbers in a range are expected to support SMS. | |||
| * <li>{@link #SUBREGION}: True if numbers in a range are expected to be only diallable from a | |||
| * geographic subregion (rather than the whole region). | |||
| * <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid. | |||
| * <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance. | |||
| * </ol> | |||
| * | |||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||
| * <ol> | |||
| * <li>{@code Region}: The region code for which this range applies. | |||
| * <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]"). | |||
| * <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9"). | |||
| * </ol> | |||
| * | |||
| * <p>Note that the region must be part of the key, since some shortcodes have different types | |||
| * between different regions. | |||
| */ | |||
| public final class ShortcodesTableSchema { | |||
| /** | |||
| * The row key of the shortcode table, specifying region and range key. This permits all | |||
| * shortcodes to be stored in a single table (which is very helpful in NANPA, where there are | |||
| * many regions, most with only a tiny amount of shortcode information). | |||
| */ | |||
| @AutoValue | |||
| public abstract static class ShortcodeKey { | |||
| private static final Comparator<ShortcodeKey> ORDERING = Comparator | |||
| .comparing(ShortcodeKey::getRegion) | |||
| .thenComparing(ShortcodeKey::getRangeKey, RangeKey.ORDERING); | |||
| private static final CsvKeyMarshaller<ShortcodeKey> MARSHALLER = new CsvKeyMarshaller<>( | |||
| ShortcodeKey::write, | |||
| ShortcodeKey::read, | |||
| Optional.of(ShortcodeKey.ORDERING), | |||
| "Region", | |||
| "Prefix", | |||
| "Length"); | |||
| private static Stream<String> write(ShortcodeKey key) { | |||
| return Stream.concat( | |||
| Stream.of(key.getRegion().toString()), | |||
| RangesTableSchema.write(key.getRangeKey())); | |||
| } | |||
| private static ShortcodeKey read(List<String> parts) { | |||
| return ShortcodeKey.create( | |||
| PhoneRegion.of(parts.get(0)), | |||
| RangesTableSchema.read(parts.subList(1, parts.size()))); | |||
| } | |||
| public static ShortcodeKey create(PhoneRegion region, RangeKey rangeKey) { | |||
| checkArgument(!region.equals(PhoneRegion.getUnknown()), "region must be valid"); | |||
| return new AutoValue_ShortcodesTableSchema_ShortcodeKey(region, rangeKey); | |||
| } | |||
| public abstract PhoneRegion getRegion(); | |||
| public abstract RangeKey getRangeKey(); | |||
| } | |||
| /** Shortcode type enum. */ | |||
| public enum ShortcodeType { | |||
| /** Default value not permitted in real data. */ | |||
| UNKNOWN, | |||
| /** | |||
| * General purpose non-governmental services including commercial or charity services. This is | |||
| * the default type for shortcodes if no other category is more applicable. | |||
| */ | |||
| COMMERCIAL, | |||
| /** | |||
| * Non-emergency, government run public services (e.g. directory enquiries). | |||
| */ | |||
| PUBLIC_SERVICE, | |||
| /** | |||
| * Public services which provide important non-emergency information for health or safety | |||
| * (e.g. https://www.police.uk/contact/101/). | |||
| */ | |||
| EXPANDED_EMERGENCY, | |||
| /** | |||
| * Primary public emergency numbers (i.e. police, fire or ambulance) which are available to | |||
| * everyone. Numbers in this category must be toll-free and not carrier specific. Mobile phone | |||
| * manufacturers will often allow these numbers to be dialled from a locked device, so it's | |||
| * important that they work for everyone. | |||
| */ | |||
| EMERGENCY; | |||
| } | |||
| private static final ImmutableBiMap<ExtTariff, XmlShortcodeType> XML_TARIFF_MAP = | |||
| Stream.of(ExtTariff.TOLL_FREE, ExtTariff.STANDARD_RATE, ExtTariff.PREMIUM_RATE) | |||
| .collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name()))); | |||
| private static final ImmutableBiMap<ShortcodeType, XmlShortcodeType> XML_TYPE_MAP = | |||
| Stream.of(EXPANDED_EMERGENCY, EMERGENCY) | |||
| .collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name()))); | |||
| /** Return the known mapping from the schema shortcode types to the XML type. */ | |||
| public static Optional<XmlShortcodeType> getXmlType(ShortcodeType type) { | |||
| return Optional.ofNullable(XML_TYPE_MAP.get(type)); | |||
| } | |||
| /** Return the mapping from the schema tariff to the XML type. */ | |||
| public static XmlShortcodeType getXmlType(ExtTariff tariff) { | |||
| XmlShortcodeType xmlType = XML_TARIFF_MAP.get(tariff); | |||
| checkArgument(xmlType != null, "shortcodes do not support tariff: %s", tariff); | |||
| return xmlType; | |||
| } | |||
| public static final Column<ShortcodeType> TYPE = | |||
| Column.of(ShortcodeType.class, "Type", ShortcodeType.UNKNOWN); | |||
| public static final Column<ExtTariff> TARIFF = RangesTableSchema.TARIFF; | |||
| public static final Column<Boolean> SMS = RangesTableSchema.SMS; | |||
| public static final Column<Boolean> CARRIER_SPECIFIC = Column.ofBoolean("Carrier Specific"); | |||
| public static final Column<Boolean> SUBREGION = Column.ofBoolean("Subregion"); | |||
| public static final Column<String> FORMAT = RangesTableSchema.FORMAT; | |||
| public static final Column<Provenance> PROVENANCE = RangesTableSchema.PROVENANCE; | |||
| public static final Column<String> COMMENT = RangesTableSchema.COMMENT; | |||
| private static final Schema COLUMNS = | |||
| Schema.builder() | |||
| .add(TYPE) | |||
| .add(TARIFF) | |||
| .add(SMS) | |||
| .add(CARRIER_SPECIFIC) | |||
| .add(SUBREGION) | |||
| .add(FORMAT) | |||
| .add(PROVENANCE) | |||
| .add(COMMENT) | |||
| .build(); | |||
| /** Schema instance defining the "Shortcodes" CSV table. */ | |||
| public static final CsvSchema<ShortcodeKey> SCHEMA = | |||
| CsvSchema.of(ShortcodeKey.MARSHALLER, COLUMNS); | |||
| /** | |||
| */ | |||
| public static CsvTable<ShortcodeKey> toCsv(Map<PhoneRegion, RangeTable> tables) { | |||
| CsvTable.Builder<ShortcodeKey> csv = CsvTable.builder(SCHEMA); | |||
| tables.forEach((r, t) -> { | |||
| for (Change c : t.toChanges()) { | |||
| for (RangeKey k : RangeKey.decompose(c.getRanges())) { | |||
| csv.put(ShortcodeKey.create(r, k), c.getAssignments()); | |||
| } | |||
| } | |||
| }); | |||
| return csv.build(); | |||
| } | |||
| /** | |||
| * Maps a single shortcode CSV table into a map of region specific range tables. Note that the | |||
| * ranges in these tables do not need to be consistent across regions (e.g. "toll free" in one | |||
| * might be "premium rate" in the other). | |||
| */ | |||
| public static ImmutableSortedMap<PhoneRegion, RangeTable> toShortcodeTables( | |||
| CsvTable<ShortcodeKey> csv) { | |||
| // Retain order of regions in the CSV table (not natural region order). | |||
| Map<PhoneRegion, RangeTable.Builder> builderMap = new LinkedHashMap<>(); | |||
| for (ShortcodeKey k : csv.getKeys()) { | |||
| // Basically the same as for RangesTableSchema, except that we deal with region codes in the | |||
| // key. | |||
| Change.Builder change = Change.builder(k.getRangeKey().asRangeTree()); | |||
| csv.getRow(k).forEach(change::assign); | |||
| PhoneRegion region = k.getRegion(); | |||
| RangeTable.Builder table = builderMap.get(region); | |||
| if (table == null) { | |||
| table = RangeTable.builder(COLUMNS); | |||
| builderMap.put(region, table); | |||
| } | |||
| table.apply(change.build(), OverwriteMode.NEVER); | |||
| } | |||
| return ImmutableSortedMap.copyOf(Maps.transformValues(builderMap, RangeTable.Builder::build)); | |||
| } | |||
| private ShortcodesTableSchema() {} | |||
| } | |||
| @ -0,0 +1,154 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||
| import static com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode.NEVER; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||
| import java.util.Optional; | |||
| /** | |||
| * A schema describing the columns which are required for creating a {@link NumberingScheme}. | |||
| * <ol> | |||
| * <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not the same as | |||
| * an {@code XmlNumberType}). All ranges should be assigned a validation type. | |||
| * <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from | |||
| * numbers in a range for local dialling. Local only lengths are derived using this column. | |||
| * <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its | |||
| * region. The "noInternationalDialling" ranges are derived from this column. | |||
| * <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are | |||
| * set {@code true} that range is valid within the region {@code XX}. | |||
| * </ol> | |||
| * | |||
| * <p>This schema is sufficient for generating {@link NumberingScheme} instances, but isn't what we | |||
| * expect to import data from (which is why it doesn't have a {@code CsvKeyMarshaller} associated | |||
| * with it. That's covered by the {@code RangesTableSchema}. | |||
| */ | |||
| public final class XmlRangesSchema { | |||
| /** | |||
| * The internal "Type" column in the range table This is present in the schema and used is a lot | |||
| * of places, but it is not what the type/tariff data is imported as (it's derived from other | |||
| * columns). | |||
| */ | |||
| public static final Column<ValidNumberType> TYPE = | |||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||
| /** | |||
| * The "Area Code Length" column in the range table, denoting the length of a prefix which can | |||
| * be removed from all numbers in a range to obtain locally diallable numbers. If an | |||
| * "area code" is not optional for dialling, then no value should be set here. | |||
| */ | |||
| public static final Column<Integer> AREA_CODE_LENGTH = RangesTableSchema.AREA_CODE_LENGTH; | |||
| /** Denotes ranges which cannot be dialled internationally. */ | |||
| public static final Column<Boolean> NATIONAL_ONLY = RangesTableSchema.NATIONAL_ONLY; | |||
| /** Format specifier IDs. */ | |||
| public static final Column<String> FORMAT = RangesTableSchema.FORMAT; | |||
| /** The "Region:XX" column group in the range table. */ | |||
| public static final ColumnGroup<PhoneRegion, Boolean> REGIONS = RangesTableSchema.REGIONS; | |||
| /** The standard columns required for generating a {@link NumberingScheme}. */ | |||
| public static final Schema COLUMNS = | |||
| Schema.builder() | |||
| .add(TYPE) | |||
| .add(AREA_CODE_LENGTH) | |||
| .add(NATIONAL_ONLY) | |||
| .add(FORMAT) | |||
| .add(REGIONS) | |||
| .build(); | |||
| /** Columns for per-region tables (just {@link #COLUMNS} without {@link #REGIONS}). */ | |||
| public static final Schema PER_REGION_COLUMNS = | |||
| Schema.builder() | |||
| .add(TYPE) | |||
| .add(AREA_CODE_LENGTH) | |||
| .add(NATIONAL_ONLY) | |||
| .add(FORMAT) | |||
| .build(); | |||
| public static RangeTable fromExternalTable(RangeTable src) { | |||
| checkArgument(RangesTableSchema.TABLE_COLUMNS.isSubSchemaOf(src.getSchema()), | |||
| "unexpected schema for source table, should be subschema of %s", | |||
| RangesTableSchema.TABLE_COLUMNS); | |||
| RangeTree unknown = src.getRanges(RangesTableSchema.TYPE, ExtType.UNKNOWN); | |||
| checkMetadata(unknown.isEmpty(), "source table contains unknown type for ranges\n%s", unknown); | |||
| checkSourceColumn(src, RangesTableSchema.TYPE); | |||
| checkSourceColumn(src, RangesTableSchema.TARIFF); | |||
| // We can copy most columns verbatim. | |||
| RangeTable.Builder dst = RangeTable.builder(COLUMNS); | |||
| copyColumn(src, dst, AREA_CODE_LENGTH); | |||
| copyColumn(src, dst, NATIONAL_ONLY); | |||
| copyColumn(src, dst, FORMAT); | |||
| REGIONS.extractGroupColumns(src.getColumns()).values().forEach(c -> copyColumn(src, dst, c)); | |||
| // But the type column must be inferred from a combination of the external type and tariff. | |||
| // Tariff takes precedence, so we do type first and then overwrite ranges for tariff. | |||
| // We also capture unsupported ranges as they must be ignored in this conversion. | |||
| RangeTree unsupportedRanges = RangeTree.empty(); | |||
| for (ExtType extType : src.getAssignedValues(RangesTableSchema.TYPE)) { | |||
| RangeTree ranges = src.getRanges(RangesTableSchema.TYPE, extType); | |||
| Optional<ValidNumberType> t = extType.toValidNumberType(); | |||
| if (t.isPresent()) { | |||
| dst.assign(TYPE, t.get(), ranges, OverwriteMode.NEVER); | |||
| } else { | |||
| unsupportedRanges = unsupportedRanges.union(ranges); | |||
| } | |||
| } | |||
| // Because we know that both the type and tariff columns have assignments for every range (and | |||
| // there's no "unknown" values for these) we can just ignore "standard rate" tariff ranges | |||
| // since they must have had a type assigned above already. | |||
| for (ExtTariff extTariff : src.getAssignedValues(RangesTableSchema.TARIFF)) { | |||
| // Ignore unsupported ranges here (since otherwise they could add ranges based only on the | |||
| // tariff, which would be wrong). For example, a toll free ISP number range should NOT be | |||
| // in the table as TOLL_FREE, since ISP numbers should not be in the table at all (until | |||
| // such time as they are a fully supported type). | |||
| RangeTree ranges = | |||
| src.getRanges(RangesTableSchema.TARIFF, extTariff).subtract(unsupportedRanges); | |||
| extTariff.toValidNumberType() | |||
| .ifPresent(t -> dst.assign(TYPE, t, ranges, OverwriteMode.ALWAYS)); | |||
| } | |||
| return dst.build(); | |||
| } | |||
| private static void checkSourceColumn(RangeTable table, Column<?> col) { | |||
| checkMetadata(table.getAssignedRanges(col).equals(table.getAllRanges()), | |||
| "table is missing assignments in column %s for ranges\n%s", | |||
| col, table.getAllRanges().subtract(table.getAssignedRanges(col))); | |||
| } | |||
| private static void copyColumn(RangeTable src, RangeTable.Builder dst, Column<?> col) { | |||
| if (src.getColumns().contains(col)) { | |||
| src.getAssignedValues(col).forEach(v -> dst.assign(col, v, src.getRanges(col, v), NEVER)); | |||
| } | |||
| } | |||
| private XmlRangesSchema() {} | |||
| } | |||
| @ -0,0 +1,92 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.Splitter; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * A single assignment of a column to a value. This can be used to change values in a | |||
| * {@code RangeTable} and well as query for ranges with its value. | |||
| */ | |||
| @AutoValue | |||
| public abstract class Assignment<T extends Comparable<T>> { | |||
| private static final Splitter SPLITTER = Splitter.on("=").limit(2).trimResults(); | |||
| /** | |||
| * Parses a string of the form {@code "<column>=<value>"} to create an assignment using the given | |||
| * schema. The named column must exist in the schema, and the associated value must be a valid | |||
| * value within that column. | |||
| * <p> | |||
| * Whitespace before and after the column or value is ignored. If the value is omitted, then an | |||
| * unassignment is returned. | |||
| */ | |||
| public static Assignment<?> parse(String s, Schema schema) { | |||
| List<String> parts = SPLITTER.splitToList(s); | |||
| checkArgument(parts.size() == 2, "invalid assigment string: %s", s); | |||
| Column<?> column = schema.getColumn(parts.get(0)); | |||
| return create(column, column.parse(parts.get(1))); | |||
| } | |||
| // Type capture around AutoValue is a little painful, so this static helper ... helps. | |||
| private static <T extends Comparable<T>> Assignment<T> create(Column<T> c, @Nullable Object v) { | |||
| T value = c.cast(v); | |||
| return new AutoValue_Assignment<>(c, Optional.ofNullable(value)); | |||
| } | |||
| /** | |||
| * Returns an assignment in the given column for the specified, non null, value. | |||
| * <p> | |||
| * Note that an assignment for the default value of a column will return an explicit assignment | |||
| * for that value, rather than an "unassignment" in that column; so | |||
| * {@code Assignment.of(c, c.defaultValue())} is not equal to {@code unassign(c)}, even though | |||
| * they may have the same effect when applied to a range table, and may even have the same | |||
| * {@link #toString()} representation (in the case of String columns). | |||
| */ | |||
| public static <T extends Comparable<T>> Assignment<T> of(Column<T> c, Object v) { | |||
| return new AutoValue_Assignment<>(c, Optional.of(c.cast(v))); | |||
| } | |||
| @SuppressWarnings("unchecked") | |||
| public static <T extends Comparable<T>> Assignment<T> ofOptional(Column<T> c, Optional<?> v) { | |||
| // Casting the value makes the optional cast below safe. | |||
| v.ifPresent(c::cast); | |||
| return new AutoValue_Assignment<>(c, (Optional<T>) v); | |||
| } | |||
| /** | |||
| * Returns an unassignment in the given column. The {@link #value()} of this assignment is empty. | |||
| */ | |||
| public static <T extends Comparable<T>> Assignment<T> unassign(Column<T> c) { | |||
| return new AutoValue_Assignment<>(c, Optional.empty()); | |||
| } | |||
| /** The column in which the assignment applies. */ | |||
| public abstract Column<T> column(); | |||
| /** The value in the column, or empty to signify unassignment. */ | |||
| public abstract Optional<T> value(); | |||
| @Override | |||
| public final String toString() { | |||
| return String.format("%s=%s", column().getName(), value().map(Object::toString).orElse("")); | |||
| } | |||
| } | |||
| @ -0,0 +1,131 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import java.util.Arrays; | |||
| import java.util.LinkedHashMap; | |||
| import java.util.Map; | |||
| import java.util.Optional; | |||
| /** | |||
| * A change which can be applied to a range table. Changes are applied sequentially to build a | |||
| * range table and new changes overwrite existing mappings. Changes are additive, and cannot be | |||
| * used to remove ranges from a table (but they can unassign previous assignments). | |||
| */ | |||
| @AutoValue | |||
| public abstract class Change { | |||
| private static final Change EMPTY = of(RangeTree.empty(), ImmutableList.of()); | |||
| /** A builder for changes that supports assigning and unassigning column values for a range. */ | |||
| public static final class Builder { | |||
| private final RangeTree ranges; | |||
| private final Map<Column<?>, Assignment<?>> assignments = new LinkedHashMap<>(); | |||
| private Builder(RangeTree ranges) { | |||
| this.ranges = checkNotNull(ranges); | |||
| } | |||
| /** | |||
| * Assigns the optional value in the given column for the ranges of this builder (an empty | |||
| * value has the effect of unassigning the value in the table that this change is applied to). | |||
| */ | |||
| public Builder assign(Assignment<?> assignment) { | |||
| checkArgument(assignments.put(assignment.column(), assignment) == null, | |||
| "Column already assigned: %s", assignment.column()); | |||
| return this; | |||
| } | |||
| /** Assigns the non-null value in the given column for the ranges of this builder. */ | |||
| public Builder assign(Column<?> column, Object value) { | |||
| return assign(Assignment.of(column, value)); | |||
| } | |||
| /** Unassigns any values in the given column for the ranges of this builder. */ | |||
| public Builder unassign(Column<?> column) { | |||
| return assign(Assignment.unassign(column)); | |||
| } | |||
| /** Builds an immutable change from the current state of this builder. */ | |||
| public Change build() { | |||
| return Change.of(ranges, assignments.values()); | |||
| } | |||
| } | |||
| public static Builder builder(RangeTree ranges) { | |||
| return new Builder(ranges); | |||
| } | |||
| /** Returns the empty change which has no effect when applied to any table. */ | |||
| public static Change empty() { | |||
| return EMPTY; | |||
| } | |||
| /** Builds a change from a set of assignments (columns must be unique). */ | |||
| public static Change of(RangeTree ranges, Iterable<Assignment<?>> assignments) { | |||
| ImmutableList<Assignment<?>> a = ImmutableList.copyOf(assignments); | |||
| checkArgument(a.size() == a.stream().map(Assignment::column).distinct().count(), | |||
| "cannot supply different assignments for the same column: %s", a); | |||
| return new AutoValue_Change(ranges, a); | |||
| } | |||
| /** | |||
| * Returns the ranges affected by this change. These ranges are added to the table and | |||
| * optionally assigned category values according to {@link #getAssignments()}. No other ranges | |||
| * will be affected by this change. | |||
| */ | |||
| public abstract RangeTree getRanges(); | |||
| /** | |||
| * Returns a list of assignments to be applied for this change. Note that the set of columns for | |||
| * these assignments is itself also a set (i.e. no two assignments in a change ever share the | |||
| * same column). | |||
| */ | |||
| public abstract ImmutableList<Assignment<?>> getAssignments(); | |||
| /** Returns whether this change contains any of the specified values in a given column. */ | |||
| @SafeVarargs | |||
| public final <T extends Comparable<T>> boolean hasAssignment(Column<T> column, T... values) { | |||
| for (Assignment<?> a : getAssignments()) { | |||
| if (column.equals(a.column())) { | |||
| return a.value().map(v -> Arrays.asList(values).contains(column.cast(v))).orElse(false); | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| /** | |||
| * Returns the value of the column in this change (or empty if there was not value or the value | |||
| * was empty. This because it conflates "no value" and "explicitly empty value", this method | |||
| * might not be suitable for Changes that unassign values. | |||
| */ | |||
| public final <T extends Comparable<T>> Optional<T> getAssignment(Column<T> column) { | |||
| for (Assignment<?> a : getAssignments()) { | |||
| if (column.equals(a.column())) { | |||
| return a.value().map(column::cast); | |||
| } | |||
| } | |||
| return Optional.empty(); | |||
| } | |||
| // Visible for AutoValue. | |||
| Change() {} | |||
| } | |||
| @ -0,0 +1,217 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.CharMatcher.inRange; | |||
| import static com.google.common.base.CharMatcher.whitespace; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static java.lang.Boolean.FALSE; | |||
| import static java.lang.Boolean.TRUE; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.CaseFormat; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import java.util.function.Function; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * A column specifier which holds a set of values that are allowed with a column. | |||
| */ | |||
| @AutoValue | |||
| public abstract class Column<T extends Comparable<T>> { | |||
| private static final ImmutableMap<String, Boolean> BOOLEAN_MAP = | |||
| ImmutableMap.of("true", TRUE, "TRUE", TRUE, "false", FALSE, "FALSE", false); | |||
| private static final CharMatcher ASCII_LETTER_OR_DIGIT = | |||
| inRange('a', 'z').or(inRange('A', 'Z')).or(inRange('0', '9')); | |||
| private static final CharMatcher LOWER_ASCII_LETTER_OR_DIGIT = | |||
| inRange('a', 'z').or(inRange('0', '9')); | |||
| private static final CharMatcher LOWER_UNDERSCORE = | |||
| CharMatcher.is('_').or(LOWER_ASCII_LETTER_OR_DIGIT); | |||
| /** | |||
| * Returns a column for the specified type with a given parsing function. Use alternate helper | |||
| * methods for creating columns of common types. | |||
| */ | |||
| public static <T extends Comparable<T>> Column<T> create( | |||
| Class<T> clazz, String name, T defaultValue, Function<String, T> parseFn) { | |||
| return new AutoValue_Column<>( | |||
| checkName(name), clazz, parseFn, String::valueOf, defaultValue, null); | |||
| } | |||
| /** | |||
| * Returns a column for the specified enum type. The string representation of a value in this | |||
| * column is just the {@code toString()} value of the enum. | |||
| */ | |||
| public static <T extends Enum<T>> Column<T> of(Class<T> clazz, String name, T defaultValue) { | |||
| return create(clazz, name, defaultValue, s -> Enum.valueOf(clazz, toEnumName(s))); | |||
| } | |||
| /** | |||
| * Returns a column for strings. In there serialized form, strings do not preserve leading or | |||
| * trailing whitespace, unless surrounded by double-quotes (e.g. {@code " foo "}). The quotes are | |||
| * stripped on parsing and added back for any String value with leading/trailing whitespace. The | |||
| * default value is the empty string. | |||
| */ | |||
| public static Column<String> ofString(String name) { | |||
| return new AutoValue_Column<>( | |||
| checkName(name), String.class, Column::trimOrUnquote, Column::maybeQuote, "", null); | |||
| } | |||
| /** | |||
| * Returns a column for unsigned integers. The string representation of a value in this column | |||
| * matches the {@link Integer#toString(int)} value. The default value is {@code 0}. | |||
| */ | |||
| public static Column<Integer> ofUnsignedInteger(String name) { | |||
| return create(Integer.class, name, 0, Integer::parseUnsignedInt); | |||
| } | |||
| /** | |||
| * Returns a column for booleans. The string representation of a value in this column can be any | |||
| * of "true", "false", "TRUE", "FALSE" (but not things like "True", "T" or "YES"). The default | |||
| * value is {@code false}. | |||
| */ | |||
| public static Column<Boolean> ofBoolean(String name) { | |||
| return create(Boolean.class, name, false, BOOLEAN_MAP::get); | |||
| } | |||
| private static String checkName(String name) { | |||
| checkArgument(name.indexOf(':') == -1, "invalid column name: %s", name); | |||
| return name; | |||
| } | |||
| // Converts to UPPER_UNDERSCORE naming for enums. | |||
| private static String toEnumName(String name) { | |||
| // Allow conversion for lower_underscore and lowerCamel, since UPPER_UNDERSCORE is so "LOUD". | |||
| // We can be sloppy with respect to errors here since all runtime exceptions are handled. | |||
| if (LOWER_ASCII_LETTER_OR_DIGIT.matches(name.charAt(0))) { | |||
| if (LOWER_UNDERSCORE.matchesAllOf(name)) { | |||
| name = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.UPPER_UNDERSCORE, name); | |||
| } else if (ASCII_LETTER_OR_DIGIT.matchesAllOf(name)) { | |||
| name = CaseFormat.LOWER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, name); | |||
| } else { | |||
| // Message/type not important here since all exceptions are replaced anyway. | |||
| throw new IllegalArgumentException(); | |||
| } | |||
| } | |||
| return name; | |||
| } | |||
| // Trims whitespace from a serialize string, unless the value is surrounded by double-quotes (in | |||
| // which case the quotes are removed). This is done to permit the rare use of leading/trailing | |||
| // whitespace in data in a visually distinct and deliberate way. | |||
| private static String trimOrUnquote(String s) { | |||
| if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | |||
| return s.substring(1, s.length() - 1); | |||
| } | |||
| return whitespace().trimFrom(s); | |||
| } | |||
| // Surrounds any string with whitespace at either end with double quotes. | |||
| private static String maybeQuote(String s) { | |||
| if (s.length() > 0 | |||
| && (whitespace().matches(s.charAt(0)) || whitespace().matches(s.charAt(s.length() - 1)))) { | |||
| return '"' + s + '"'; | |||
| } | |||
| return s; | |||
| } | |||
| /** Returns the column name (which can be used as a human readable title if needed). */ | |||
| public abstract String getName(); | |||
| abstract Class<T> type(); | |||
| // The parsing function from a string to a value. | |||
| abstract Function<String, T> parseFn(); | |||
| // The serialization function from a value to a String. This must be the inverse of the parseFn. | |||
| abstract Function<T, String> serializeFn(); | |||
| /** Default value for this column (inferred for unassigned ranges when a snapshot is built). */ | |||
| public abstract T defaultValue(); | |||
| // This is very private and should only be used in this class. | |||
| @Nullable abstract Column<T> owningGroup(); | |||
| /** Attempts to cast the given instance to the runtime type of this column. */ | |||
| @Nullable public final T cast(@Nullable Object value) { | |||
| return type().cast(value); | |||
| } | |||
| /** | |||
| * Returns the value of this column based on its serialized representation (which is not | |||
| * necessarily its {@code toString()} representation). | |||
| */ | |||
| @Nullable public final T parse(String id) { | |||
| if (id.isEmpty()) { | |||
| return null; | |||
| } | |||
| try { | |||
| // TODO: Technically wrong, since for String columns this will unquote strings. | |||
| // Hopefully this won't be an issue, since quoting is really only likely to be used for | |||
| // preserving whitespace (which i | |||
| T value = parseFn().apply(id); | |||
| if (value != null) { | |||
| return value; | |||
| } | |||
| } catch (RuntimeException e) { | |||
| // fall through | |||
| } | |||
| throw new IllegalArgumentException( | |||
| String.format("unknown value '%s' in column '%s'", id, getName())); | |||
| } | |||
| /** | |||
| * Returns the serialized representation of a value in this column. This is the stored | |||
| * representation of the value, not the value itself. | |||
| */ | |||
| public final String serialize(@Nullable Object value) { | |||
| return (value != null) ? serializeFn().apply(cast(value)) : ""; | |||
| } | |||
| // Only to be called by ColumnGroup. | |||
| final Column<T> fromPrototype(String suffix) { | |||
| String name = getName() + ":" + checkName(suffix); | |||
| return new AutoValue_Column<T>(name, type(), parseFn(), serializeFn(), defaultValue(), this); | |||
| } | |||
| final boolean isIn(ColumnGroup<?, ?> group) { | |||
| return group.prototype().equals(owningGroup()); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| return "Column{'" + getName() + "'}"; | |||
| } | |||
| @Override | |||
| public final boolean equals(Object obj) { | |||
| if (!(obj instanceof Column<?>)) { | |||
| return false; | |||
| } | |||
| Column<?> c = (Column<?>) obj; | |||
| return c.getName().equals(getName()) && c.type().equals(type()); | |||
| } | |||
| @Override | |||
| public final int hashCode() { | |||
| return getName().hashCode() ^ type().hashCode(); | |||
| } | |||
| // Visible only for AutoValue | |||
| Column() {} | |||
| } | |||
| @ -0,0 +1,100 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableBiMap; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||
| import java.util.Set; | |||
| import java.util.function.Function; | |||
| /** A group of {@link RangeTable} columns. */ | |||
| @AutoValue | |||
| public abstract class ColumnGroup<K, T extends Comparable<T>> { | |||
| /** | |||
| * Returns a group for columns with the same type as the given "prototype" column and which has a | |||
| * a prefix that's the name of the prototype. Suffix values are parsed using the given function. | |||
| */ | |||
| public static <K, T extends Comparable<T>> ColumnGroup<K, T> of( | |||
| Column<T> prototype, Function<String, K> parseFn) { | |||
| return new AutoValue_ColumnGroup<>(prototype, parseFn); | |||
| } | |||
| /** Returns a group for the specified prototype column keyed by {@link PhoneRegion}. */ | |||
| public static <T extends Comparable<T>> ColumnGroup<PhoneRegion, T> byRegion( | |||
| Column<T> prototype) { | |||
| return of(prototype, PhoneRegion::of); | |||
| } | |||
| /** Returns a group for the specified prototype column keyed by {@link SimpleLanguageTag}. */ | |||
| public static <T extends Comparable<T>> ColumnGroup<SimpleLanguageTag, T> byLanguage( | |||
| Column<T> prototype) { | |||
| return of(prototype, SimpleLanguageTag::of); | |||
| } | |||
| // Internal use only. | |||
| abstract Column<T> prototype(); | |||
| abstract Function<String, K> parseFn(); | |||
| /** Returns the column for a specified key. */ | |||
| public Column<T> getColumn(K key) { | |||
| // The reason this does not just call "prototype().fromPrototype(...)" is that the key may not | |||
| // be parsable by the function just because it's the "right" type. This allows people to pass | |||
| // in a function that limits columns to some subset of the domain (e.g. a subset of region | |||
| // codes). | |||
| return getColumnFromId(key.toString()); | |||
| } | |||
| /** Returns the column for a specified ID string. */ | |||
| public Column<T> getColumnFromId(String id) { | |||
| try { | |||
| Object unused = parseFn().apply(id); | |||
| } catch (RuntimeException e) { | |||
| throw new IllegalArgumentException( | |||
| String.format("invalid column %s, not in group: %s", id, this), e); | |||
| } | |||
| return prototype().fromPrototype(id); | |||
| } | |||
| /** Returns the key of a column in this group. */ | |||
| @SuppressWarnings("unchecked") | |||
| public K getKey(Column<?> c) { | |||
| checkArgument(c.isIn(this), "column %s in not group %s", c, this); | |||
| // Cast is safe since any column in this group is a Column<T>. | |||
| return extractKey((Column<T>) c); | |||
| } | |||
| /** Returns a bidirectional mapping from group key to column, for columns in this group. */ | |||
| @SuppressWarnings("unchecked") | |||
| public ImmutableBiMap<K, Column<T>> extractGroupColumns(Set<Column<?>> columns) { | |||
| return columns.stream() | |||
| .filter(c -> c.isIn(this)) | |||
| // Cast is safe since any column in this group is a Column<T>. | |||
| .map(c -> (Column<T>) c) | |||
| .collect(toImmutableBiMap(this::extractKey, identity())); | |||
| } | |||
| // Assumes we've already verified that the column is in this group. | |||
| private K extractKey(Column<T> column) { | |||
| String name = column.getName(); | |||
| return parseFn().apply(name.substring(name.lastIndexOf(':') + 1)); | |||
| } | |||
| } | |||
| @ -0,0 +1,74 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static java.util.Comparator.naturalOrder; | |||
| import com.google.common.collect.ImmutableList; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.function.Function; | |||
| import java.util.stream.Stream; | |||
| /** Marshaller to handle key serialization and ordering in {@code CsvTable}. */ | |||
| public final class CsvKeyMarshaller<K> { | |||
| private final Function<K, Stream<String>> serialize; | |||
| private final Function<List<String>, K> deserialize; | |||
| private final Optional<Comparator<K>> ordering; | |||
| private final ImmutableList<String> columns; | |||
| public static CsvKeyMarshaller<String> ofSortedString(String columnName) { | |||
| return new CsvKeyMarshaller<String>( | |||
| Stream::of, p -> p.get(0), Optional.of(naturalOrder()), columnName); | |||
| } | |||
| public CsvKeyMarshaller( | |||
| Function<K, Stream<String>> serialize, | |||
| Function<List<String>, K> deserialize, | |||
| Optional<Comparator<K>> ordering, | |||
| String... columns) { | |||
| this(serialize, deserialize, ordering, ImmutableList.copyOf(columns)); | |||
| } | |||
| public CsvKeyMarshaller( | |||
| Function<K, Stream<String>> serialize, | |||
| Function<List<String>, K> deserialize, | |||
| Optional<Comparator<K>> ordering, | |||
| List<String> columns) { | |||
| this.serialize = checkNotNull(serialize); | |||
| this.deserialize = checkNotNull(deserialize); | |||
| this.ordering = checkNotNull(ordering); | |||
| this.columns = ImmutableList.copyOf(columns); | |||
| } | |||
| public ImmutableList<String> getColumns() { | |||
| return columns; | |||
| } | |||
| Stream<String> serialize(K key) { | |||
| return serialize.apply(key); | |||
| } | |||
| K deserialize(List<String> keyParts) { | |||
| return deserialize.apply(keyParts); | |||
| } | |||
| Optional<Comparator<K>> ordering() { | |||
| return ordering; | |||
| } | |||
| } | |||
| @ -0,0 +1,241 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.CharMatcher.isNot; | |||
| import static com.google.common.base.CharMatcher.javaIsoControl; | |||
| import static com.google.common.base.CharMatcher.whitespace; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.Streams; | |||
| import java.util.ArrayList; | |||
| import java.util.Iterator; | |||
| import java.util.List; | |||
| import java.util.function.Consumer; | |||
| import java.util.stream.Stream; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * An efficient, fluent CSV parser which operates on a {@link Stream} of lines. It handles quoting | |||
| * of values, whitespace trimming and mapping values via a "schema" row. | |||
| * | |||
| * <p>This class is sadly necessary since the one in {@code com.google.common.text} doesn't support | |||
| * ignoring whitespace (and making it do so would take longer than writing this). | |||
| * | |||
| * <p>This class is immutable and thread-safe. | |||
| */ | |||
| // TODO: Investigate other "standard" CSV parsers such as org.apache.commons.csv. | |||
| public final class CsvParser { | |||
| /** | |||
| * A consumer for CSV rows which can automatically map values according to a header row. | |||
| * | |||
| * <p>This class is immutable and thread-safe. | |||
| */ | |||
| public static final class RowMapper { | |||
| @Nullable private final Consumer<ImmutableList<String>> headerHandler; | |||
| private RowMapper(Consumer<ImmutableList<String>> headerHandler) { | |||
| this.headerHandler = headerHandler; | |||
| } | |||
| public Consumer<Stream<String>> mapTo(Consumer<ImmutableMap<String, String>> handler) { | |||
| return new Consumer<Stream<String>>() { | |||
| private ImmutableList<String> header = null; | |||
| @Override | |||
| public void accept(Stream<String> row) { | |||
| if (header == null) { | |||
| // Can contain duplicates (but that's bad for mapping). | |||
| header = row.collect(toImmutableList()); | |||
| checkArgument( | |||
| header.size() == header.stream().distinct().count(), | |||
| "duplicate values in CSV header: %s", | |||
| header); | |||
| if (headerHandler != null) { | |||
| headerHandler.accept(header); | |||
| } | |||
| } else { | |||
| ImmutableMap.Builder<String, String> map = ImmutableMap.builder(); | |||
| int i = 0; | |||
| for (String v : Streams.iterating(row)) { | |||
| checkArgument(i < header.size(), | |||
| "too many columns (expected %s): %s", header.size(), map); | |||
| if (!v.isEmpty()) { | |||
| map.put(header.get(i++), v); | |||
| } | |||
| } | |||
| handler.accept(map.build()); | |||
| } | |||
| } | |||
| }; | |||
| } | |||
| } | |||
| private static final CharMatcher NON_WHITESPACE = CharMatcher.whitespace().negate(); | |||
| private static final char QUOTE = '"'; | |||
| private static final CharMatcher VALID_DELIMITER_CHAR = | |||
| NON_WHITESPACE.and(javaIsoControl().negate()).and(isNot(QUOTE)).or(CharMatcher.anyOf(" \t")); | |||
| public static CsvParser withSeparator(char delimiter) { | |||
| return new CsvParser(delimiter, false, false); | |||
| } | |||
| public static CsvParser commaSeparated() { | |||
| return withSeparator(','); | |||
| } | |||
| public static CsvParser tabSeparated() { | |||
| return withSeparator('\t'); | |||
| } | |||
| public static RowMapper rowMapper() { | |||
| return new RowMapper(null); | |||
| } | |||
| public static RowMapper rowMapper(Consumer<ImmutableList<String>> headerHandler) { | |||
| return new RowMapper(headerHandler); | |||
| } | |||
| private final char delimiter; | |||
| private final boolean trimWhitespace; | |||
| private final boolean allowMultiline; | |||
| private CsvParser(char delimiter, boolean trimWhitespace, boolean allowMultiline) { | |||
| checkArgument(VALID_DELIMITER_CHAR.matches(delimiter), | |||
| "invalid delimiter: %s", delimiter); | |||
| this.delimiter = delimiter; | |||
| this.trimWhitespace = trimWhitespace; | |||
| this.allowMultiline = allowMultiline; | |||
| } | |||
| public CsvParser trimWhitespace() { | |||
| checkArgument(NON_WHITESPACE.matches(delimiter), | |||
| "cannot trim whitespace if delimiter is whitespace"); | |||
| return new CsvParser(delimiter, true, allowMultiline); | |||
| } | |||
| public CsvParser allowMultiline() { | |||
| return new CsvParser(delimiter, trimWhitespace, true); | |||
| } | |||
| public void parse(Stream<String> lines, Consumer<Stream<String>> rowCallback) { | |||
| // Allow whitespace delimiter if we aren't also trimming whitespace. | |||
| List<String> row = new ArrayList<>(); | |||
| StringBuilder buffer = new StringBuilder(); | |||
| Iterator<String> it = lines.iterator(); | |||
| while (parseRow(it, row, buffer)) { | |||
| rowCallback.accept(row.stream()); | |||
| row.clear(); | |||
| } | |||
| } | |||
| private boolean parseRow(Iterator<String> lines, List<String> row, StringBuilder buffer) { | |||
| if (!lines.hasNext()) { | |||
| return false; | |||
| } | |||
| // First line of potentially several which make up this row. | |||
| String line = lines.next(); | |||
| int start = maybeTrimWhitespace(line, 0); | |||
| while (start < line.length()) { | |||
| // "start" is the start of the next part and must be a valid index into current "line". | |||
| // Could be high or low surrogate if badly formed string, or just point at the delimiter. | |||
| char c = line.charAt(start); | |||
| int pos; | |||
| if (c == QUOTE) { | |||
| // Quoted value, maybe parse and unescape multiple lines here. | |||
| pos = ++start; | |||
| while (true) { | |||
| if (pos == line.length()) { | |||
| buffer.append(line, start, pos); | |||
| checkArgument(allowMultiline && lines.hasNext(), | |||
| "unterminated quoted value: %s", buffer); | |||
| buffer.append('\n'); | |||
| line = lines.next(); | |||
| start = 0; | |||
| pos = 0; | |||
| } | |||
| c = line.charAt(pos); | |||
| if (c == QUOTE) { | |||
| buffer.append(line, start, pos++); | |||
| if (pos == line.length()) { | |||
| break; | |||
| } | |||
| if (line.charAt(pos) != QUOTE) { | |||
| pos = maybeTrimWhitespace(line, pos); | |||
| checkArgument(pos == line.length() || line.codePointAt(pos) == delimiter, | |||
| "unexpected character (expected delimiter) in: %s", line); | |||
| break; | |||
| } | |||
| // "Double double quotes, what does it mean?" (oh yeah, a single double quote). | |||
| buffer.append(QUOTE); | |||
| start = pos + 1; | |||
| } | |||
| pos++; | |||
| } | |||
| row.add(buffer.toString()); | |||
| buffer.setLength(0); | |||
| } else if (c == delimiter) { | |||
| // Empty unquoted empty value (e.g. "foo,,bar"). | |||
| row.add(""); | |||
| pos = start; | |||
| } else { | |||
| // Non-empty unquoted value. | |||
| pos = line.indexOf(delimiter, start + 1); | |||
| if (pos == -1) { | |||
| pos = line.length(); | |||
| } | |||
| String value = line.substring(start, maybeTrimTrailingWhitespace(line, pos)); | |||
| checkArgument(value.indexOf(QUOTE) == -1, | |||
| "quotes cannot appear in unquoted values: %s", value); | |||
| row.add(value); | |||
| } | |||
| if (pos == line.length()) { | |||
| // We hit end-of-line at the end of a value, so just return (no trailing empty value). | |||
| return true; | |||
| } | |||
| // If not end-of-line, "pos" points at the last delimiter, so we can find the next start. | |||
| start = maybeTrimWhitespace(line, pos + 1); | |||
| } | |||
| // We hit end-of-line either immediately, or after a delimiter. Either way we always need to | |||
| // add a trailing empty value for consistency. | |||
| row.add(""); | |||
| return true; | |||
| } | |||
| private int maybeTrimWhitespace(String s, int i) { | |||
| if (trimWhitespace) { | |||
| i = NON_WHITESPACE.indexIn(s, i); | |||
| if (i == -1) { | |||
| i = s.length(); | |||
| } | |||
| } | |||
| return i; | |||
| } | |||
| private int maybeTrimTrailingWhitespace(String s, int i) { | |||
| if (trimWhitespace) { | |||
| // There is no "lastIndexIn(String, int)" sadly. | |||
| while (i > 0 && whitespace().matches(s.charAt(i - 1))) { | |||
| i--; | |||
| } | |||
| } | |||
| return i; | |||
| } | |||
| } | |||
| @ -0,0 +1,108 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableList; | |||
| import java.io.IOException; | |||
| import java.io.Reader; | |||
| import java.nio.file.Files; | |||
| import java.nio.file.Path; | |||
| import java.util.ArrayList; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.function.BiConsumer; | |||
| /** | |||
| * A CSV schema is a combination of a key marshaller and table columns. A CSV schema defines a | |||
| * CSV table with key columns, followed by non-key columns. | |||
| */ | |||
| @AutoValue | |||
| public abstract class CsvSchema<K> { | |||
| /** | |||
| * Returns a schema for a CSV file using the given marshaller to define key columns, and a table | |||
| * schema to define any additional columns in a row. | |||
| */ | |||
| public static <K> CsvSchema<K> of(CsvKeyMarshaller<K> marshaller, Schema columns) { | |||
| return new AutoValue_CsvSchema<>(marshaller, columns); | |||
| } | |||
| /** The marshaller defining table keys and how they are serialized in CSV. */ | |||
| public abstract CsvKeyMarshaller<K> keyMarshaller(); | |||
| /** The table schema defining non-key columns in the table. */ | |||
| public abstract Schema columns(); | |||
| /** Returns the ordering for keys in the CSV table, as defined by the key marshaller. */ | |||
| public Optional<Comparator<K>> rowOrdering() { | |||
| return keyMarshaller().ordering(); | |||
| } | |||
| /** | |||
| * Returns the ordering for additional non-key columns in the CSV table as defined by the table | |||
| * schema. | |||
| */ | |||
| public Comparator<Column<?>> columnOrdering() { | |||
| return columns().ordering(); | |||
| } | |||
| /** | |||
| * Extracts the non-key columns of a table from the header row. The header row is expected to | |||
| * contain the names of all columns (including key columns) in the CSV table and this method | |||
| * verifies that the key columns are present as expected before resolving the non-key columns | |||
| * in order. | |||
| */ | |||
| public ImmutableList<Column<?>> parseHeader(List<String> header) { | |||
| int hsize = keyMarshaller().getColumns().size(); | |||
| checkArgument(header.size() >= hsize, "CSV header too short: %s", header); | |||
| checkArgument(header.subList(0, hsize).equals(keyMarshaller().getColumns()), | |||
| "Invalid CSV header: %s", header); | |||
| ImmutableList.Builder<Column<?>> columns = ImmutableList.builder(); | |||
| header.subList(hsize, header.size()).forEach(s -> columns.add(columns().getColumn(s))); | |||
| return columns.build(); | |||
| } | |||
| /** Parses a row from a CSV table containing unescaped values. */ | |||
| public void parseRow( | |||
| ImmutableList<Column<?>> columns, List<String> row, BiConsumer<K, List<Assignment<?>>> fn) { | |||
| int hsize = keyMarshaller().getColumns().size(); | |||
| checkArgument(row.size() >= hsize, "CSV row too short: %s", row); | |||
| K key = keyMarshaller().deserialize(row.subList(0, hsize)); | |||
| List<Assignment<?>> rowAssignments = new ArrayList<>(); | |||
| for (int n = 0; n < row.size() - hsize; n++) { | |||
| Column<?> c = columns.get(n); | |||
| rowAssignments.add( | |||
| Assignment.ofOptional(c, Optional.ofNullable(c.parse(row.get(n + hsize))))); | |||
| } | |||
| fn.accept(key, rowAssignments); | |||
| } | |||
| public CsvTable<K> load(Path file) throws IOException { | |||
| if (!Files.exists(file)) { | |||
| return CsvTable.builder(this).build(); | |||
| } | |||
| try (Reader csv = Files.newBufferedReader(file)) { | |||
| return CsvTable.importCsv(this, csv); | |||
| } | |||
| } | |||
| public CsvTable<K> load(Reader reader) throws IOException { | |||
| return CsvTable.importCsv(this, reader); | |||
| } | |||
| } | |||
| @ -0,0 +1,589 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||
| import static com.google.common.collect.ImmutableSortedSet.toImmutableSortedSet; | |||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_CHANGED; | |||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_ONLY; | |||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_CHANGED; | |||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_ONLY; | |||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.UNCHANGED; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedSet; | |||
| import com.google.common.collect.Maps; | |||
| import com.google.common.collect.Ordering; | |||
| import com.google.common.collect.Sets; | |||
| import com.google.common.collect.Table; | |||
| import com.google.common.collect.Tables; | |||
| import com.google.common.collect.TreeBasedTable; | |||
| import com.google.common.escape.CharEscaperBuilder; | |||
| import com.google.common.escape.Escaper; | |||
| import java.io.BufferedReader; | |||
| import java.io.IOException; | |||
| import java.io.Reader; | |||
| import java.io.StringWriter; | |||
| import java.io.Writer; | |||
| import java.util.Arrays; | |||
| import java.util.Collections; | |||
| import java.util.LinkedHashMap; | |||
| import java.util.List; | |||
| import java.util.Map; | |||
| import java.util.Objects; | |||
| import java.util.Optional; | |||
| import java.util.Set; | |||
| import java.util.TreeMap; | |||
| import java.util.function.Consumer; | |||
| import java.util.function.Predicate; | |||
| import java.util.function.Supplier; | |||
| import java.util.stream.Stream; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * A general tabular representation of {@link Column} based data, which can include range data | |||
| * (via {@link RangeTable}) or other tabular data using a specified row key implementation. | |||
| * | |||
| * @param <K> the row key type. | |||
| */ | |||
| @AutoValue | |||
| public abstract class CsvTable<K> { | |||
| // Trim whitespace (since CSV files may be textually aligned) but don't allow multiline values | |||
| // (we handle that by JSON style escaping to keep the "one row per line" assumption true). | |||
| public static final String DEFAULT_DELIMETER = ";"; | |||
| private static final CsvParser CSV_PARSER = | |||
| CsvParser.withSeparator(DEFAULT_DELIMETER.charAt(0)).trimWhitespace(); | |||
| /** | |||
| * Mode to control how diffs are generated. If a diff table, rows have an additional | |||
| * {@code Status} applied to describe whether they are unchanged, modified or exclusive (i.e. | |||
| * exist only in one of the source tables). | |||
| */ | |||
| public enum DiffMode { | |||
| /** Include all rows in the "diff table" (unchanged, modified or exclusive). */ | |||
| ALL, | |||
| /** Include only changed rows in the "diff table" (modified or exclusive). */ | |||
| CHANGES, | |||
| /** Include only left-hand-side rows in the "diff table" (unchanged, modified or exclusive). */ | |||
| LHS, | |||
| /** Include only right-hand-side rows in the "diff table" (unchanged, modified or exclusive). */ | |||
| RHS, | |||
| } | |||
| /** A simple builder for programmatic generation of CSV tables. */ | |||
| public static final class Builder<T> { | |||
| private final CsvSchema<T> schema; | |||
| private final Table<T, Column<?>, Object> table; | |||
| private Builder(CsvSchema<T> schema) { | |||
| this.schema = checkNotNull(schema); | |||
| // Either use insertion order or sorted order for rows (depends on schema). | |||
| if (schema.rowOrdering().isPresent()) { | |||
| this.table = TreeBasedTable.create(schema.rowOrdering().get(), schema.columnOrdering()); | |||
| } else { | |||
| this.table = Tables.newCustomTable( | |||
| new LinkedHashMap<>(), | |||
| () -> new TreeMap<>(schema.columnOrdering())); | |||
| } | |||
| } | |||
| /** | |||
| * Puts a row into the table using the specific mappings (potentially overwriting any existing | |||
| * row). | |||
| */ | |||
| public Builder<T> putRow(T key, Map<Column<?>, ?> row) { | |||
| table.rowMap().remove(key); | |||
| return addRow(key, row); | |||
| } | |||
| /** | |||
| * Adds a new row to the table using the specific mappings (the row must not already be | |||
| * present). | |||
| */ | |||
| public Builder<T> addRow(T key, Map<Column<?>, ?> row) { | |||
| checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this); | |||
| row.forEach((c, v) -> table.put(key, c, v)); | |||
| return this; | |||
| } | |||
| /** | |||
| * Adds a new row to the table using the specific mappings (the row must not already be | |||
| * present). | |||
| */ | |||
| public Builder<T> addRow(T key, List<Assignment<?>> row) { | |||
| checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this); | |||
| put(key, row); | |||
| return this; | |||
| } | |||
| /** Puts (overwrites) a single value in the table. */ | |||
| public <V extends Comparable<V>> Builder<T> put(T key, Column<V> c, @Nullable V v) { | |||
| schema.columns().checkColumn(c); | |||
| if (v != null) { | |||
| table.put(key, c, c.cast(v)); | |||
| } else { | |||
| table.remove(key, c); | |||
| } | |||
| return this; | |||
| } | |||
| /** Puts (overwrites) a sequence of values in the table. */ | |||
| public Builder<T> put(T key, Iterable<Assignment<?>> assign) { | |||
| for (Assignment<?> a : assign) { | |||
| if (a.value().isPresent()) { | |||
| table.put(key, a.column(), a.value().get()); | |||
| } else { | |||
| table.remove(key, a.column()); | |||
| } | |||
| } | |||
| return this; | |||
| } | |||
| /** Puts (overwrites) a sequence of values in the table. */ | |||
| public Builder<T> put(T key, Assignment<?>... assign) { | |||
| return put(key, Arrays.asList(assign)); | |||
| } | |||
| /** Returns an unmodifiable view of the keys for the table. */ | |||
| public Set<T> getKeys() { | |||
| return Collections.unmodifiableSet(table.rowKeySet()); | |||
| } | |||
| /** Gets a single value in the table (or null). */ | |||
| public <V extends Comparable<V>> V get(T key, Column<V> c) { | |||
| return c.cast(table.get(key, c)); | |||
| } | |||
| /** Removes an entire row from the table (does nothing if the row did no exist). */ | |||
| public Builder<T> removeRow(T key) { | |||
| table.rowKeySet().remove(key); | |||
| return this; | |||
| } | |||
| /** Filters the rows of a table, keeping those which match the given predicate. */ | |||
| public Builder<T> filterRows(Predicate<T> predicate) { | |||
| Set<T> rows = table.rowKeySet(); | |||
| // Copy to avoid concurrent modification exception. | |||
| for (T key : ImmutableSet.copyOf(table.rowKeySet())) { | |||
| if (!predicate.test(key)) { | |||
| rows.remove(key); | |||
| } | |||
| } | |||
| return this; | |||
| } | |||
| /** Filters the columns of a table, keeping only those which match the given predicate. */ | |||
| public Builder<T> filterColumns(Predicate<Column<?>> predicate) { | |||
| Set<Column<?>> toRemove = | |||
| table.columnKeySet().stream().filter(predicate.negate()).collect(toImmutableSet()); | |||
| table.columnKeySet().removeAll(toRemove); | |||
| return this; | |||
| } | |||
| /** Builds the immutable CSV table. */ | |||
| public CsvTable<T> build() { | |||
| return from(schema, table); | |||
| } | |||
| @Override | |||
| public String toString() { | |||
| return build().toString(); | |||
| } | |||
| } | |||
| /** Returns a builder for a CSV table with the expected key and column semantics. */ | |||
| public static <K> Builder<K> builder(CsvSchema<K> schema) { | |||
| return new Builder<>(schema); | |||
| } | |||
| /** Returns a CSV table based on the given table with the expected key and column semantics. */ | |||
| public static <K> CsvTable<K> from(CsvSchema<K> schema, Table<K, Column<?>, Object> table) { | |||
| ImmutableSet<Column<?>> columns = table.columnKeySet().stream() | |||
| .sorted(schema.columnOrdering()) | |||
| .collect(toImmutableSet()); | |||
| columns.forEach(schema.columns()::checkColumn); | |||
| return new AutoValue_CsvTable<>( | |||
| schema, | |||
| ImmutableMap.copyOf(Maps.transformValues(table.rowMap(), ImmutableMap::copyOf)), | |||
| columns); | |||
| } | |||
| /** | |||
| * Imports a semicolon separated CSV file. The CSV file needs to have the following layout: | |||
| * <pre> | |||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||
| * ... | |||
| * </pre> | |||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key | |||
| * {@link CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} | |||
| * via the column names. | |||
| * <p> | |||
| * Column values are represented in a semi-typed fashion according to the associated column (some | |||
| * columns require values to be escaped, others do not). Note that it's the column that defines | |||
| * whether the value needs escaping, not the content of the value itself (all values in a String | |||
| * column are required to be quoted). | |||
| */ | |||
| public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv) throws IOException { | |||
| return importCsv(schema, csv, CSV_PARSER); | |||
| } | |||
| /** Imports a CSV file using a specified parser. */ | |||
| public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv, CsvParser csvParser) | |||
| throws IOException { | |||
| TableParser<K> parser = new TableParser<>(schema); | |||
| try (BufferedReader r = new BufferedReader(csv)) { | |||
| csvParser.parse( | |||
| r.lines(), | |||
| row -> parser.accept( | |||
| row.map(CsvTable::unescapeSingleLineCsvText).collect(toImmutableList()))); | |||
| } | |||
| return parser.done(); | |||
| } | |||
| /** | |||
| * Imports a sequence of rows to create a CSV table. The values in the rows are unescaped and | |||
| * require no explicit parsing. | |||
| */ | |||
| public static <K> CsvTable<K> importRows(CsvSchema<K> schema, Supplier<List<String>> rows) { | |||
| TableParser<K> parser = new TableParser<>(schema); | |||
| List<String> row; | |||
| while ((row = rows.get()) != null) { | |||
| parser.accept(row); | |||
| } | |||
| return parser.done(); | |||
| } | |||
| /** | |||
| * Creates a "diff table" based on the given left and right table inputs. The resulting table | |||
| * has a new key column which indicates (via the {@code Status} enum) how rows difference between | |||
| * the left and right tables. | |||
| */ | |||
| public static <K> CsvTable<DiffKey<K>> diff(CsvTable<K> lhs, CsvTable<K> rhs, DiffMode mode) { | |||
| checkArgument(lhs.getSchema().equals(rhs.getSchema()), "Cannot diff with different schemas"); | |||
| checkNotNull(mode, "Must specify a diff mode"); | |||
| CsvKeyMarshaller<DiffKey<K>> marshaller = DiffKey.wrap(lhs.getSchema().keyMarshaller()); | |||
| CsvSchema<DiffKey<K>> diffSchema = CsvSchema.of(marshaller, lhs.getSchema().columns()); | |||
| Builder<DiffKey<K>> diff = CsvTable.builder(diffSchema); | |||
| if (mode != DiffMode.RHS) { | |||
| Sets.difference(lhs.getKeys(), rhs.getKeys()) | |||
| .forEach(k -> diff.addRow(DiffKey.of(LHS_ONLY, k), lhs.getRow(k))); | |||
| } | |||
| if (mode != DiffMode.LHS) { | |||
| Sets.difference(rhs.getKeys(), lhs.getKeys()) | |||
| .forEach(k -> diff.addRow(DiffKey.of(RHS_ONLY, k), rhs.getRow(k))); | |||
| } | |||
| for (K key : Sets.intersection(lhs.getKeys(), rhs.getKeys())) { | |||
| Map<Column<?>, Object> lhsRow = lhs.getRow(key); | |||
| Map<Column<?>, Object> rhsRow = rhs.getRow(key); | |||
| if (lhsRow.equals(rhsRow)) { | |||
| if (mode != DiffMode.CHANGES) { | |||
| diff.addRow(DiffKey.of(UNCHANGED, key), lhsRow); | |||
| } | |||
| } else { | |||
| if (mode != DiffMode.RHS) { | |||
| diff.addRow(DiffKey.of(LHS_CHANGED, key), lhsRow); | |||
| } | |||
| if (mode != DiffMode.LHS) { | |||
| diff.addRow(DiffKey.of(RHS_CHANGED, key), rhsRow); | |||
| } | |||
| } | |||
| } | |||
| return diff.build(); | |||
| } | |||
| /** Returns the schema for this table. */ | |||
| public abstract CsvSchema<K> getSchema(); | |||
| /** Returns the rows of the table (not public to avoid access to untyped access). */ | |||
| // Note that this cannot easily be replaced by ImmutableTable (as of Jan 2019) because | |||
| // ImmutableTable has severe limitations on how row/column ordering is handled that make the | |||
| // row/column ordering required in CsvTable currently impossible. | |||
| abstract ImmutableMap<K, ImmutableMap<Column<?>, Object>> getRows(); | |||
| /** | |||
| * Returns the set of columns for the table (excluding the synthetic key columns, which are | |||
| * handled by the marshaller). | |||
| */ | |||
| public abstract ImmutableSet<Column<?>> getColumns(); | |||
| /** Returns whether a row is in the table. */ | |||
| public boolean isEmpty() { | |||
| return getRows().isEmpty(); | |||
| } | |||
| /** Returns the set of keys for the table. */ | |||
| public ImmutableSet<K> getKeys() { | |||
| return getRows().keySet(); | |||
| } | |||
| /** Returns a single row as a map of column assignments. */ | |||
| public ImmutableMap<Column<?>, Object> getRow(K rowKey) { | |||
| ImmutableMap<Column<?>, Object> row = getRows().get(rowKey); | |||
| return row != null ? row : ImmutableMap.of(); | |||
| } | |||
| /** Returns whether a row is in the table. */ | |||
| public boolean containsRow(K rowKey) { | |||
| return getKeys().contains(rowKey); | |||
| } | |||
| public Builder<K> toBuilder() { | |||
| Builder<K> builder = builder(getSchema()); | |||
| getRows().forEach(builder::putRow); | |||
| return builder; | |||
| } | |||
| /** Returns the table column names, including the key columns, in schema order. */ | |||
| public Stream<String> getCsvHeader() { | |||
| return Stream.concat( | |||
| getSchema().keyMarshaller().getColumns().stream(), | |||
| getColumns().stream().map(Column::getName)); | |||
| } | |||
| /** Returns the unescaped CSV values for the specified row, in order. */ | |||
| public Stream<String> getCsvRow(K key) { | |||
| checkArgument(getKeys().contains(key), "no such row: %s", key); | |||
| // Note that we pass the raw value (possibly null) to serialize so that we don't conflate | |||
| // missing and default values. | |||
| return Stream.concat( | |||
| getSchema().keyMarshaller().serialize(key), | |||
| getColumns().stream().map(c -> c.serialize(getOrNull(key, c)))); | |||
| } | |||
| /** | |||
| * Exports the given table by writing its values as semicolon separated "CSV", with or without | |||
| * alignment. For example (with alignment): | |||
| * | |||
| * <pre> | |||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||
| * ... | |||
| * </pre> | |||
| * | |||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key {@link | |||
| * CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the | |||
| * column names. | |||
| */ | |||
| public boolean exportCsv(Writer writer, boolean align) { | |||
| return exportCsvHelper(writer, align, getColumns()); | |||
| } | |||
| /** | |||
| * Exports the given table by writing its values as semicolon separated "CSV", with or without | |||
| * alignment. For example (with alignment): | |||
| * | |||
| * <pre> | |||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||
| * ... | |||
| * </pre> | |||
| * | |||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key {@link | |||
| * CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the | |||
| * column names. This will add columns that are part of the schema for the given table but have no | |||
| * assigned values. | |||
| */ | |||
| public boolean exportCsvWithEmptyColumnsPresent(Writer writer, boolean align) { | |||
| return exportCsvHelper( | |||
| writer, | |||
| align, | |||
| Stream.concat(getSchema().columns().getColumns().stream(), getColumns().stream()) | |||
| .collect(ImmutableSet.toImmutableSet())); | |||
| } | |||
| private boolean exportCsvHelper( | |||
| Writer writer, boolean align, ImmutableSet<Column<?>> columnsToExport) { | |||
| if (isEmpty()) { | |||
| // Exit for empty tables (CSV file is truncated). The caller may then delete the empty file. | |||
| return false; | |||
| } | |||
| CsvTableCollector collector = new CsvTableCollector(align); | |||
| collector.accept( | |||
| Stream.concat( | |||
| getSchema().keyMarshaller().getColumns().stream(), | |||
| columnsToExport.stream().map(Column::getName)) | |||
| .distinct()); | |||
| for (K k : getKeys()) { | |||
| // Format raw values (possibly null) to avoid default values everywhere. | |||
| collector.accept( | |||
| Stream.concat( | |||
| getSchema().keyMarshaller().serialize(k), | |||
| columnsToExport.stream().map(c -> formatValue(c, getOrNull(k, c))))); | |||
| } | |||
| collector.writeCsv(writer); | |||
| return true; | |||
| } | |||
| @Nullable private <T extends Comparable<T>> T getOrNull(K rowKey, Column<T> column) { | |||
| return column.cast(getRow(rowKey).get(column)); | |||
| } | |||
| /** | |||
| * Returns the value from the underlying table for the given row and column if present. | |||
| */ | |||
| public <T extends Comparable<T>> Optional<T> get(K rowKey, Column<T> column) { | |||
| return Optional.ofNullable(getOrNull(rowKey, column)); | |||
| } | |||
| /** | |||
| * Returns the value from the underlying table for the given row and column, or the (non-null) | |||
| * default value. | |||
| */ | |||
| public <T extends Comparable<T>> T getOrDefault(K rowKey, Column<T> column) { | |||
| T value = getOrNull(rowKey, column); | |||
| return value != null ? value : column.defaultValue(); | |||
| } | |||
| /** | |||
| * Returns the set of unique values in the given column. Note that if some rows do not have a | |||
| * value, then this will NOT result in the column default value being in the returned set. An | |||
| * empty column will result in an empty set being returned here. | |||
| */ | |||
| public <T extends Comparable<T>> ImmutableSortedSet<T> getValues(Column<T> column) { | |||
| return getKeys().stream() | |||
| .map(k -> getOrNull(k, column)) | |||
| .filter(Objects::nonNull) | |||
| .collect(toImmutableSortedSet(Ordering.natural())); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| StringWriter w = new StringWriter(); | |||
| exportCsv(w, true); | |||
| return w.toString(); | |||
| } | |||
| /** Parses CSV data on per-row basis, deserializing keys and adding values to a table. */ | |||
| static class TableParser<K> implements Consumer<List<String>> { | |||
| private final Builder<K> table; | |||
| // Set when the header row is processed. | |||
| private ImmutableList<Column<?>> columns = null; | |||
| TableParser(CsvSchema<K> schema) { | |||
| this.table = builder(schema); | |||
| } | |||
| @Override | |||
| public void accept(List<String> row) { | |||
| if (columns == null) { | |||
| columns = table.schema.parseHeader(row); | |||
| } else { | |||
| table.schema.parseRow(columns, row, table::addRow); | |||
| } | |||
| } | |||
| public CsvTable<K> done() { | |||
| return table.build(); | |||
| } | |||
| } | |||
| // Newlines can, in theory, be emitted "raw" in the CSV output inside a quoted string, but | |||
| // this breaks all sorts of nice properties of CSV files, since there's no longer one row per | |||
| // line. This export process escapes literal newlines and other control characters into Json | |||
| // like escape sequences ('\n', '\t', '\\' etc...). Unlike Json however, any double-quotes are | |||
| // _not_ escaped via '\' since the CSV way to escape those is via doubling. We leave other | |||
| // non-ASCII characters as-is, since this is meant to be as human readable as possible. | |||
| private static final Escaper ESCAPER = new CharEscaperBuilder() | |||
| .addEscape('\n', "\\n") | |||
| .addEscape('\r', "\\r") | |||
| .addEscape('\t', "\\t") | |||
| .addEscape('\\', "\\\\") | |||
| // This is a special case only required when writing CSV file (since the parser handles | |||
| // unescaping quotes when they are read back in). In theory it should be part of a separate | |||
| // step during CSV writing, but it's not worth splitting it out. This is not considered an | |||
| // unsafe char (since it definitely does appear). | |||
| .addEscape('"', "\"\"") | |||
| .toEscaper(); | |||
| private static final CharMatcher ESCAPED_CHARS = CharMatcher.anyOf("\n\r\t\\"); | |||
| private static final CharMatcher UNSAFE_CHARS = | |||
| CharMatcher.javaIsoControl().and(ESCAPED_CHARS.negate()); | |||
| private static String formatValue(Column<?> column, @Nullable Object value) { | |||
| String unescaped = column.serialize(value); | |||
| if (unescaped.isEmpty()) { | |||
| return unescaped; | |||
| } | |||
| // Slightly risky with enums, since an enum could have ';' in its toString() representation. | |||
| // However since columns and their semantics are tightly controlled, this should never happen. | |||
| if (Number.class.isAssignableFrom(column.type()) | |||
| || column.type() == Boolean.class | |||
| || column.type().isEnum()) { | |||
| checkArgument(ESCAPED_CHARS.matchesNoneOf(unescaped), "Bad 'safe' value: %s", unescaped); | |||
| return unescaped; | |||
| } | |||
| return escapeForSingleLineCsv(unescaped); | |||
| } | |||
| /** | |||
| * Escapes and quotes an arbitrary text string, ensuring it is safe for use as a single-line CSV | |||
| * value. Newlines, carriage returns and tabs are backslash escaped (as is backslash itself) and | |||
| * other ISO control characters are not permitted. | |||
| * | |||
| * <p>The purpose of this method is to make arbitrary Unicode text readable in a single line of | |||
| * a CSV file so that we can rely on per-line processing tools, such as "grep" or "sed" if needed | |||
| * without requiring expensive conversion to/from a spreadsheet. | |||
| */ | |||
| public static String escapeForSingleLineCsv(String unescaped) { | |||
| checkArgument(UNSAFE_CHARS.matchesNoneOf(unescaped), "Bad string value: %s", unescaped); | |||
| return '"' + ESCAPER.escape(unescaped) + '"'; | |||
| } | |||
| /** | |||
| * Unescapes a line of text escaped by {@link #escapeForSingleLineCsv(String)} to restore literal | |||
| * newlines and other backslash-escaped characters. Note that if the given string already has | |||
| * newlines present, they are preserved but will then be escaped if the text is re-escaped later. | |||
| */ | |||
| public static String unescapeSingleLineCsvText(String s) { | |||
| int i = s.indexOf('\\'); | |||
| if (i == -1) { | |||
| return s; | |||
| } | |||
| StringBuilder out = new StringBuilder(); | |||
| int start = 0; | |||
| do { | |||
| out.append(s, start, i); | |||
| char c = s.charAt(++i); | |||
| out.append(checkNotNull(UNESCAPE.get(c), "invalid escape sequence: \\%s", c)); | |||
| start = i + 1; | |||
| i = s.indexOf('\\', start); | |||
| } while (i != -1); | |||
| return out.append(s, start, s.length()).toString(); | |||
| } | |||
| private static final ImmutableMap<Character, Character> UNESCAPE = | |||
| ImmutableMap.<Character, Character>builder() | |||
| .put('n', '\n') | |||
| .put('r', '\r') | |||
| .put('t', '\t') | |||
| .put('\\', '\\') | |||
| .build(); | |||
| // Visible for AutoValue only. | |||
| CsvTable() {} | |||
| } | |||
| @ -0,0 +1,99 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static java.util.stream.Collectors.joining; | |||
| import com.google.common.collect.ImmutableList; | |||
| import java.io.PrintWriter; | |||
| import java.io.Writer; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| import java.util.NavigableMap; | |||
| import java.util.TreeMap; | |||
| import java.util.function.Consumer; | |||
| import java.util.stream.IntStream; | |||
| import java.util.stream.Stream; | |||
| /** Collects cell values and tracks maximum cell width to make it easy to output aligned CSV. */ | |||
| public final class CsvTableCollector implements Consumer<Stream<String>> { | |||
| private final NavigableMap<Integer, Integer> maxLengths = new TreeMap<>(); | |||
| private final List<List<String>> cells = new ArrayList<>(); | |||
| private final boolean align; | |||
| public CsvTableCollector(boolean align) { | |||
| this.align = align; | |||
| } | |||
| /** | |||
| * Writes the contents of this table, with optional alignment, as a CSV table. Returns whether | |||
| * anything was written. | |||
| */ | |||
| public void writeCsv(Writer writer) { | |||
| try (PrintWriter out = new PrintWriter(writer)) { | |||
| // Pad elements with whitespace when aligning (since we've gone to all the effort of padding | |||
| // everything else). | |||
| String joiner = align ? " ; " : ";"; | |||
| for (int rowIndex = 0; rowIndex < cells.size(); rowIndex++) { | |||
| // No need to use CharMatcher to trim "properly" since only ASCII space is possible. | |||
| out.println(getRow(rowIndex).collect(joining(joiner)).trim()); | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * Accepts the next row in the CSV table. Note that the first consumer returned is expected to | |||
| * have the title row written to it. | |||
| * | |||
| * <p>Values passed into the accept method of the returned consumer are expected to have already | |||
| * been escaped if necessary. The caller must call the {@link Consumer#accept(Object)} method for | |||
| * every column of the table, even if only to pass an empty string to indicate an empty cell. | |||
| */ | |||
| @Override | |||
| public void accept(Stream<String> row) { | |||
| ImmutableList<String> rowValues = row.collect(toImmutableList()); | |||
| for (int i = 0; i < rowValues.size(); i++) { | |||
| updateMaxLength(rowValues.get(i), i); | |||
| } | |||
| cells.add(rowValues); | |||
| } | |||
| private Stream<String> getRow(int index) { | |||
| List<String> row = cells.get(index); | |||
| int length = row.size(); | |||
| while (length > 0 && row.get(length - 1).isEmpty()) { | |||
| length--; | |||
| } | |||
| if (align) { | |||
| return IntStream.range(0, length).mapToObj(n -> pad(row.get(n), maxLength(n))); | |||
| } | |||
| return row.stream().limit(length); | |||
| } | |||
| private static String pad(String s, int len) { | |||
| return len > 0 ? String.format("%-" + len + "s", s) : ""; | |||
| } | |||
| private int maxLength(int index) { | |||
| return maxLengths.getOrDefault(index, 0); | |||
| } | |||
| private void updateMaxLength(String s, int index) { | |||
| // Note: This isn't Unicode aware, but in reality it's not that important. | |||
| maxLengths.put(index, Math.max(s.length(), maxLength(index))); | |||
| } | |||
| } | |||
| @ -0,0 +1,100 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.Maps; | |||
| import java.util.ArrayList; | |||
| import java.util.Comparator; | |||
| import java.util.EnumSet; | |||
| import java.util.List; | |||
| import java.util.Optional; | |||
| import java.util.function.Function; | |||
| import java.util.stream.Stream; | |||
| /** Key for use in "diff" tables, allowing rows to be marked with a diff status. */ | |||
| @AutoValue | |||
| public abstract class DiffKey<K> { | |||
| /** | |||
| * Status for rows in a "diff table". Every row in a diff table has a {@code DiffKey}, with a | |||
| * status. Modified rows appear twice in the diff table, once for the left-side row, and once for | |||
| * the right-side row. | |||
| */ | |||
| public enum Status { | |||
| /** A row which appears exclusively in the left-hand-side of the diff. */ | |||
| LHS_ONLY("----"), | |||
| /** A row which appears exclusively in the right-hand-side of the diff. */ | |||
| RHS_ONLY("++++"), | |||
| /** The left-hand-side row which was modified by the diff. */ | |||
| LHS_CHANGED("<<<<"), | |||
| /** The right-hand-side row which was modified by the diff. */ | |||
| RHS_CHANGED(">>>>"), | |||
| /** A row unchanged by the diff. */ | |||
| UNCHANGED("===="); | |||
| private static final ImmutableMap<String, Status> MAP = | |||
| Maps.uniqueIndex(EnumSet.allOf(Status.class), Status::getLabel); | |||
| private final String label; | |||
| Status(String label) { | |||
| this.label = label; | |||
| } | |||
| String getLabel() { | |||
| return label; | |||
| } | |||
| static Status parse(String s) { | |||
| return MAP.get(s); | |||
| } | |||
| } | |||
| static <K> CsvKeyMarshaller<DiffKey<K>> wrap(CsvKeyMarshaller<K> keyMarshaller) { | |||
| List<String> keyColumns = new ArrayList<>(); | |||
| keyColumns.add("Diff"); | |||
| keyColumns.addAll(keyMarshaller.getColumns()); | |||
| return new CsvKeyMarshaller<>( | |||
| serialize(keyMarshaller), deserialize(keyMarshaller), ordering(keyMarshaller), keyColumns); | |||
| } | |||
| static <K> DiffKey<K> of(Status status, K key) { | |||
| return new AutoValue_DiffKey<>(status, key); | |||
| } | |||
| public abstract Status getStatus(); | |||
| public abstract K getOriginalKey(); | |||
| private static <T> Function<DiffKey<T>, Stream<String>> serialize(CsvKeyMarshaller<T> m) { | |||
| return k -> Stream.concat(Stream.of(k.getStatus().getLabel()), m.serialize(k.getOriginalKey())); | |||
| } | |||
| private static <T> Function<List<String>, DiffKey<T>> deserialize(CsvKeyMarshaller<T> m) { | |||
| return r -> | |||
| new AutoValue_DiffKey<>(Status.parse(r.get(0)), m.deserialize(r.subList(1, r.size()))); | |||
| } | |||
| private static <T> Optional<Comparator<DiffKey<T>>> ordering(CsvKeyMarshaller<T> m) { | |||
| return m.ordering().map(o -> { | |||
| // Weird bug (possibly IntelliJ) means it really doesn't do well inferring types over lambdas | |||
| // for this sort of chained API call. Pulling into separate variables works fine. | |||
| Comparator<DiffKey<T>> keyFn = Comparator.comparing(DiffKey::getOriginalKey, o); | |||
| return keyFn.thenComparing(DiffKey::getStatus); | |||
| }); | |||
| } | |||
| } | |||
| @ -0,0 +1,186 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.common.collect.Maps.filterValues; | |||
| import static com.google.common.collect.Maps.transformValues; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedMap; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import java.util.Map.Entry; | |||
| import java.util.SortedMap; | |||
| import java.util.TreeMap; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * A mapping from category values to a set of disjoint ranges. This is used only by the RangeTable | |||
| * class to represent a column of values. | |||
| */ | |||
| final class DisjointRangeMap<T extends Comparable<T>> { | |||
| static final class Builder<T extends Comparable<T>> { | |||
| private final Column<T> column; | |||
| private final SortedMap<T, RangeTree> map = new TreeMap<>(); | |||
| // Cache of all assigned ranges, used repeatedly by RangeTable . This could be recalculated | |||
| // every time it's needed, but it's just as easy to keep it cached here. | |||
| private RangeTree assignedRanges = RangeTree.empty(); | |||
| Builder(Column<T> column) { | |||
| this.column = checkNotNull(column); | |||
| } | |||
| /** | |||
| * Returns the ranges assigned to the given value (returns the empty range if the given value | |||
| * is unassigned in this column). Note that unlike table operations, it makes no sense to allow | |||
| * {@code null} to be used to determine the unassigned ranges, since calculating that requires | |||
| * knowledge of the table in which this column exists. | |||
| */ | |||
| RangeTree getRanges(Object value) { | |||
| T checkedValue = column.cast(checkNotNull(value)); | |||
| return map.getOrDefault(checkedValue, RangeTree.empty()); | |||
| } | |||
| /** Returns the currently assigned ranges for this column. */ | |||
| RangeTree getAssignedRanges() { | |||
| return assignedRanges; | |||
| } | |||
| /** | |||
| * Checks whether the "proposed" assignment would succeed with the specified overwrite mode | |||
| * (assignments always succeed if the mode is {@link OverwriteMode#ALWAYS} ALWAYS). If the | |||
| * given value is {@code null} and the mode is not {@code ALWAYS}, this method ensures that | |||
| * none of the given ranges are assigned to any value in this column. | |||
| * <p> | |||
| * This is useful as a separate method when multiple changes are to be made which cannot be | |||
| * allowed to fail halfway through. | |||
| * | |||
| * @throws IllegalArgumentException if the value cannot be added to the column. | |||
| * @throws RangeException if the write is not possible with the given mode. | |||
| */ | |||
| T checkAssign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||
| // Always check the proposed value (for consistency). | |||
| T checkedValue = column.cast(value); | |||
| if (mode != OverwriteMode.ALWAYS) { | |||
| checkArgument(checkedValue != null, | |||
| "Assigning a null value (unassignment) with mode other than ALWAYS makes no sense: %s", | |||
| mode); | |||
| if (mode == OverwriteMode.SAME) { | |||
| // Don't care about ranges that are already in the map. | |||
| ranges = ranges.subtract(map.getOrDefault(checkedValue, RangeTree.empty())); | |||
| } | |||
| RangeException.checkDisjoint(column, checkedValue, assignedRanges, ranges, mode); | |||
| } | |||
| return checkedValue; | |||
| } | |||
| /** | |||
| * Assigns the given ranges to the specified value in this column. After a call to | |||
| * {@code assign()} with a non-null value it is true that: | |||
| * <ul> | |||
| * <li>The result of {@code getRanges(value)} will contain at least the given ranges. | |||
| * <li>No ranges assigned to any other category value will intersect with the given ranges. | |||
| * </ul> | |||
| * If ranges are "assigned" to {@code null}, it has the effect of unassigning them. | |||
| * | |||
| * @param value the category value to assign ranges to, or {@code null} to unassign. | |||
| * @param ranges the ranges to assign to the category value with ID {@code id}. | |||
| * @param mode the overwrite mode describing how to handle existing assignments. | |||
| * @throws IllegalArgumentException if the assignment violates the given {@link OverwriteMode}. | |||
| */ | |||
| void assign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||
| T checkedValue = checkAssign(value, ranges, mode); | |||
| // Now unassign the ranges for all other values (only necessary if mode is "ALWAYS" since in | |||
| // other modes we've already ensured there's no intersection). | |||
| if (mode == OverwriteMode.ALWAYS) { | |||
| RangeTree overlap = assignedRanges.intersect(ranges); | |||
| if (!overlap.isEmpty()) { | |||
| for (Entry<T, RangeTree> e : map.entrySet()) { | |||
| // Skip needless extra work for the value we are about to assign. | |||
| if (!e.getKey().equals(checkedValue)) { | |||
| e.setValue(e.getValue().subtract(overlap)); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| if (checkedValue != null) { | |||
| map.put(checkedValue, ranges.union(map.getOrDefault(checkedValue, RangeTree.empty()))); | |||
| assignedRanges = assignedRanges.union(ranges); | |||
| } else { | |||
| assignedRanges = assignedRanges.subtract(ranges); | |||
| } | |||
| } | |||
| /** Builds the range map. */ | |||
| DisjointRangeMap<T> build() { | |||
| return new DisjointRangeMap<T>(column, map, assignedRanges); | |||
| } | |||
| } | |||
| private final Column<T> column; | |||
| private final ImmutableSortedMap<T, RangeTree> map; | |||
| private final RangeTree assignedRanges; | |||
| private DisjointRangeMap( | |||
| Column<T> column, SortedMap<T, RangeTree> map, RangeTree assignedRanges) { | |||
| this.column = checkNotNull(column); | |||
| this.map = ImmutableSortedMap.copyOfSorted(filterValues(map, r -> !r.isEmpty())); | |||
| this.assignedRanges = assignedRanges; | |||
| } | |||
| /** | |||
| * Returns the ranges assigned to the given value. | |||
| * | |||
| * @throws IllegalArgumentException if {@code value} is not a value in this category. | |||
| */ | |||
| RangeTree getRanges(Object value) { | |||
| return map.get(column.cast(value)); | |||
| } | |||
| /** Returns all values assigned to non-empty ranges in this column. */ | |||
| ImmutableSet<T> getAssignedValues() { | |||
| return map.keySet(); | |||
| } | |||
| /** Returns the union of all assigned ranges in this column. */ | |||
| RangeTree getAssignedRanges() { | |||
| return assignedRanges; | |||
| } | |||
| /** Intersects this column with the given bounds. */ | |||
| DisjointRangeMap<T> intersect(RangeTree bounds) { | |||
| return new DisjointRangeMap<T>( | |||
| column, transformValues(map, r -> r.intersect(bounds)), assignedRanges.intersect(bounds)); | |||
| } | |||
| @Override | |||
| public boolean equals(Object obj) { | |||
| if (!(obj instanceof DisjointRangeMap<?>)) { | |||
| return false; | |||
| } | |||
| // No need to check "assignedRanges" since it's just a cache of other values anyway. | |||
| DisjointRangeMap<?> other = (DisjointRangeMap<?>) obj; | |||
| return this == other || (column.equals(other.column) && map.equals(other.map)); | |||
| } | |||
| @Override | |||
| public int hashCode() { | |||
| return column.hashCode() ^ map.hashCode(); | |||
| } | |||
| } | |||
| @ -0,0 +1,116 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.CharMatcher.whitespace; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import com.google.common.base.Joiner; | |||
| import com.google.common.base.Splitter; | |||
| import com.google.common.collect.Comparators; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableSortedSet; | |||
| import java.util.Comparator; | |||
| import java.util.function.Function; | |||
| /** | |||
| * A wrapper to permit sets of values to be specified as a single "cell" in a CsvTable or | |||
| * RangeTable. Currently only sets of values are permitted (not lists) so duplicate elements are | |||
| * not allowed. This is easy to change in future, but the real data suggests no use case for that. | |||
| * | |||
| * <p>The expectation of this class is that specific, non-generic subclasses will be made to | |||
| * "solidify" the choice of value type, separator and value ordering. This is why those specific | |||
| * attributes are not tested in the equals()/hashCode() methods, since they are expected to be | |||
| * constant for a given implementation. Subclasses should be final, and look something like: | |||
| * <pre> {@code | |||
| * public static final class Foos extends MultiValue<Foo, Foos> { | |||
| * private static final Foos EMPTY = new Foos(ImmutableSet.of()); | |||
| * | |||
| * public static Column<Foos> column(String name) { | |||
| * return Column.create(Foos.class, name, EMPTY, Foos::new); | |||
| * } | |||
| * | |||
| * public static Foos of(Iterable<Foo> foos) { | |||
| * return new Foos(foos); | |||
| * } | |||
| * | |||
| * private Foos(Iterable<Foo> foos) { super(foos, <separator>, <ordering>, <sorted>); } | |||
| * private Foos(String s) { super(s, <parseFn>, <separator>, <ordering>, <sorted>); } | |||
| * } | |||
| * }</pre> | |||
| * where {@code <separator>}, {@code <ordering>} and {@code <sorted>} are the same constants in | |||
| * both places. | |||
| */ | |||
| public abstract class MultiValue<T, M extends MultiValue<T, M>> | |||
| implements Comparable<M> { | |||
| private final ImmutableSet<T> values; | |||
| private final char separator; | |||
| private final Comparator<Iterable<T>> comparator; | |||
| protected MultiValue( | |||
| String s, Function<String, T> fn, char separator, Comparator<T> comparator, boolean sorted) { | |||
| this(parse(s, fn, separator), separator, comparator, sorted); | |||
| } | |||
| protected MultiValue( | |||
| Iterable<T> values, char separator, Comparator<T> comparator, boolean sorted) { | |||
| this.separator = separator; | |||
| this.values = | |||
| sorted ? ImmutableSortedSet.copyOf(comparator, values) : ImmutableSet.copyOf(values); | |||
| this.comparator = Comparators.lexicographical(comparator); | |||
| } | |||
| private static <T> ImmutableList<T> parse(String s, Function<String, T> fn, char separator) { | |||
| Splitter splitter = Splitter.on(separator).omitEmptyStrings().trimResults(whitespace()); | |||
| return splitter.splitToList(s).stream().map(fn).collect(toImmutableList()); | |||
| } | |||
| public final ImmutableSet<T> getValues() { | |||
| return values; | |||
| } | |||
| public final char separator() { | |||
| return separator; | |||
| } | |||
| @Override | |||
| public final int compareTo(M that) { | |||
| // The separator doesn't factor in here since it's always the same. | |||
| return comparator.compare(this.getValues(), that.getValues()); | |||
| } | |||
| @Override | |||
| @SuppressWarnings({"unchecked", "EqualsGetClass"}) | |||
| public final boolean equals(Object obj) { | |||
| // Check exact subclass, since we expect separators and ordering to always be the same. | |||
| if (obj == null || obj.getClass() != getClass()) { | |||
| return false; | |||
| } | |||
| return getValues().equals(((MultiValue<T, M>) obj).getValues()); | |||
| } | |||
| @Override | |||
| public final int hashCode() { | |||
| return getValues().hashCode(); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| return Joiner.on(separator()).join(getValues()); | |||
| } | |||
| } | |||
| @ -0,0 +1,74 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static java.util.stream.Collectors.joining; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import javax.annotation.Nullable; | |||
| /** A structured exception which should be used whenever structural errors occur in table data. */ | |||
| public final class RangeException extends IllegalArgumentException { | |||
| // Called when assigning ranges, depending on the overwrite mode. As more cases are added, | |||
| // consider refactoring and subclassing for clean semantics. | |||
| static <T extends Comparable<T>> void checkDisjoint( | |||
| Column<T> column, T value, RangeTree existing, RangeTree ranges, OverwriteMode mode) { | |||
| RangeTree intersection = existing.intersect(ranges); | |||
| if (!intersection.isEmpty()) { | |||
| // A non-empty intersection implies both inputs are also non-empty. | |||
| throw new RangeException(column, value, existing, ranges, intersection, mode); | |||
| } | |||
| } | |||
| RangeException(Column<?> column, | |||
| @Nullable Object value, | |||
| RangeTree existing, | |||
| RangeTree ranges, | |||
| RangeTree intersection, | |||
| OverwriteMode mode) { | |||
| super(explain(checkNotNull(column), value, existing, ranges, intersection, checkNotNull(mode))); | |||
| } | |||
| private static String explain( | |||
| Column<?> column, | |||
| @Nullable Object value, | |||
| RangeTree existing, | |||
| RangeTree ranges, | |||
| RangeTree intersection, | |||
| OverwriteMode mode) { | |||
| return String.format( | |||
| "cannot assign non-disjoint ranges for value '%s' in column '%s' using overwrite mode: %s\n" | |||
| + "overlapping ranges:\n%s" | |||
| + "existing ranges:\n%s" | |||
| + "new ranges:\n%s", | |||
| value, column, mode, toLines(intersection), toLines(existing), toLines(ranges)); | |||
| } | |||
| private static String toLines(RangeTree ranges) { | |||
| checkArgument(!ranges.isEmpty()); | |||
| return ranges.asRangeSpecifications().stream().map(s -> " " + s + "\n").collect(joining()); | |||
| } | |||
| // We suppress stack traces for "semantic" exceptions, since these aren't intended to indicate | |||
| // bugs, but rather user error (for which a stack trace is not very useful). | |||
| @Override | |||
| public synchronized Throwable fillInStackTrace() { | |||
| return this; | |||
| } | |||
| } | |||
| @ -0,0 +1,215 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; | |||
| import static java.lang.Integer.numberOfTrailingZeros; | |||
| import static java.util.Comparator.comparing; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableSortedSet; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy; | |||
| import java.util.ArrayList; | |||
| import java.util.Comparator; | |||
| import java.util.List; | |||
| import java.util.NavigableSet; | |||
| import java.util.Set; | |||
| /** | |||
| * A range key is somewhat similar to a {@link RangeSpecification}, except that it can encode | |||
| * multiple possible lengths for a single range prefix. Range keys are particularly useful as | |||
| * unique "row keys" when representing range trees as tabular data. | |||
| */ | |||
| @AutoValue | |||
| public abstract class RangeKey { | |||
| /** | |||
| * Order by prefix first and then minimum length. For row keys representing disjoint ranges, this | |||
| * will be a total ordering (since the comparison is really with the "shortest" digit sequence in | |||
| * the ranges, which must be distinct for disjoint ranges). | |||
| */ | |||
| public static final Comparator<RangeKey> ORDERING = | |||
| comparing(RangeKey::getPrefix, comparing(s -> s.min().toString())) | |||
| .thenComparing(RangeKey::getLengths, comparing(NavigableSet::first)); | |||
| /** | |||
| * Creates a range key representing ranges with a prefix of some set of lengths. The prefix must | |||
| * not be longer than the possible lengths and cannot end with an "any" edge (i.e. "x"). | |||
| */ | |||
| public static RangeKey create(RangeSpecification prefix, Set<Integer> lengths) { | |||
| checkArgument(prefix.length() == 0 || prefix.getBitmask(prefix.length() - 1) != ALL_DIGITS_MASK, | |||
| "prefix cannot end with an 'any' edge: %s", prefix); | |||
| ImmutableSortedSet<Integer> sorted = ImmutableSortedSet.copyOf(lengths); | |||
| checkArgument(sorted.first() >= prefix.length(), | |||
| "lengths cannot be shorter than the prefix: %s - %s", prefix, lengths); | |||
| return new AutoValue_RangeKey(prefix, sorted); | |||
| } | |||
| /** | |||
| * Decomposes the given range tree into a sorted sequence of keys, representing the same digit | |||
| * sequences. The resulting keys form a disjoint covering of the original range set, and no | |||
| * two keys will contain the same prefix (but prefixes of keys may overlap, even if the ranges | |||
| * they ultimately represent do not). The resulting sequence is ordered by {@link #ORDERING}. | |||
| */ | |||
| public static ImmutableList<RangeKey> decompose(RangeTree tree) { | |||
| List<RangeKey> keys = new ArrayList<>(); | |||
| // The ALLOW_EDGE_SPLITTING strategy works best for the case of generating row keys because it | |||
| // helps avoid having the same sequence appear in multiple rows. Note however than even this | |||
| // strategy isn't perfect, and partially overlapping ranges with different lengths can still | |||
| // cause issues. For example, 851 appears as a prefix for 2 rows in the following (real world) | |||
| // example. | |||
| // prefix=85[1-9], length=10 | |||
| // prefix=8[57]1, length=11 | |||
| // However a given digit sequence will still only appear in (at most) one range key based on | |||
| // its length. | |||
| for (RangeTree f : RangeTreeFactorizer.factor(tree, MergeStrategy.ALLOW_EDGE_SPLITTING)) { | |||
| KeyVisitor.visit(f, keys); | |||
| } | |||
| return ImmutableList.sortedCopyOf(ORDERING, keys); | |||
| } | |||
| // A recursive descent visitor that splits range keys from the visited tree on the upward phase | |||
| // of visitation. After finding the terminal node, the visitor tries to strip as much of the | |||
| // trailing "any" path as possible, to leave the prefix. Note that the visitor can never start | |||
| // another downward visitation while its processing the "any" paths, because if it walks up | |||
| // through an "any" path, the node it reaches cannot have any other edges coming from it (the | |||
| // "any" path is all the possible edges). | |||
| private static class KeyVisitor implements DfaVisitor { | |||
| private static void visit(RangeTree tree, List<RangeKey> keys) { | |||
| KeyVisitor v = new KeyVisitor(keys); | |||
| tree.accept(v); | |||
| // We may still need to emit a key for ranges with "any" paths that reach the root node. | |||
| int lengthMask = v.lengthMask; | |||
| // Shouldn't happen for phone numbers, since it implies the existence of "zero length" digit | |||
| // sequences. | |||
| if (tree.getInitial().canTerminate()) { | |||
| lengthMask |= 1; | |||
| } | |||
| if (lengthMask != 0) { | |||
| // Use the empty specification as a prefix since the ranges are defined purely by length. | |||
| keys.add(new AutoValue_RangeKey(RangeSpecification.empty(), buildLengths(lengthMask))); | |||
| } | |||
| } | |||
| // Collection of extracted keys. | |||
| private final List<RangeKey> keys; | |||
| // Current path from the root of the tree being visited. | |||
| private RangeSpecification path = RangeSpecification.empty(); | |||
| // Non-zero when we are in the "upward" phase of visitation, processing trailing "any" paths. | |||
| // When zero we are either in a "downward" phase or traversing up without stripping paths. | |||
| private int lengthMask = 0; | |||
| private KeyVisitor(List<RangeKey> keys) { | |||
| this.keys = checkNotNull(keys); | |||
| } | |||
| @Override | |||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||
| checkState(lengthMask == 0, | |||
| "during downward tree traversal, length mask should be zero (was %s)", lengthMask); | |||
| RangeSpecification oldPath = path; | |||
| path = path.extendByMask(edge.getDigitMask()); | |||
| if (target.equals(RangeTree.getTerminal())) { | |||
| lengthMask = (1 << path.length()); | |||
| // We might emit the key immediately for ranges without trailing paths (e.g. "1234"). | |||
| maybeEmitKey(); | |||
| } else { | |||
| target.accept(this); | |||
| // If we see a terminating node, we are either adding a new possible length to an existing | |||
| // key or starting to process a new key (we don't know and it doesn't matter providing we | |||
| // capture the current length in the mask). | |||
| if (target.canTerminate()) { | |||
| lengthMask |= (1 << path.length()); | |||
| } | |||
| maybeEmitKey(); | |||
| } | |||
| path = oldPath; | |||
| } | |||
| // Conditionally emits a key for the current path prefix and possible lengths if we've found | |||
| // the "end" of an "any" path (e.g. we have possible lengths and the edge above us is not an | |||
| // "any" path). | |||
| private void maybeEmitKey() { | |||
| if (lengthMask != 0 && path.getBitmask(path.length() - 1) != ALL_DIGITS_MASK) { | |||
| keys.add(new AutoValue_RangeKey(path, buildLengths(lengthMask))); | |||
| lengthMask = 0; | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * Returns the prefix for this range key. All digit sequences matches by this key are of the | |||
| * form {@code "<prefix>xxxx"} for some number of "any" edges. This prefix can be "empty" for | |||
| * ranges such as {@code "xxxx"}. | |||
| */ | |||
| public abstract RangeSpecification getPrefix(); | |||
| /** | |||
| * Returns the possible lengths for digit sequences matched by this key. The returned set is | |||
| * never empty. | |||
| */ | |||
| public abstract ImmutableSortedSet<Integer> getLengths(); | |||
| /** | |||
| * Converts the range key into a sequence of range specifications, ordered by length. The | |||
| * returned set is never empty. | |||
| */ | |||
| public final ImmutableList<RangeSpecification> asRangeSpecifications() { | |||
| RangeSpecification s = getPrefix(); | |||
| return getLengths().stream() | |||
| .map(n -> s.extendByLength(n - s.length())) | |||
| .collect(toImmutableList()); | |||
| } | |||
| public final RangeTree asRangeTree() { | |||
| RangeSpecification s = getPrefix(); | |||
| return RangeTree.from(getLengths().stream().map(n -> s.extendByLength(n - s.length()))); | |||
| } | |||
| /* | |||
| * Checks if the RangeKey contains a range represented by the given prefix and length. | |||
| */ | |||
| public boolean contains(DigitSequence prefix, Integer length) { | |||
| return asRangeSpecifications().stream() | |||
| .anyMatch( | |||
| specification -> | |||
| specification.matches( | |||
| prefix.extendBy(DigitSequence.zeros(length - prefix.length())))); | |||
| } | |||
| private static ImmutableSortedSet<Integer> buildLengths(int lengthMask) { | |||
| checkArgument(lengthMask != 0); | |||
| ImmutableSortedSet.Builder<Integer> lengths = ImmutableSortedSet.naturalOrder(); | |||
| do { | |||
| int length = numberOfTrailingZeros(lengthMask); | |||
| lengths.add(length); | |||
| // Clear each bit as we go. | |||
| lengthMask &= ~(1 << length); | |||
| } while (lengthMask != 0); | |||
| return lengths.build(); | |||
| } | |||
| } | |||
| @ -0,0 +1,951 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||
| import static com.google.common.collect.Iterables.transform; | |||
| import static com.google.common.collect.Maps.immutableEntry; | |||
| import static java.util.Comparator.comparing; | |||
| import static java.util.Map.Entry.comparingByKey; | |||
| import static java.util.stream.Collectors.joining; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.HashBasedTable; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.ImmutableTable; | |||
| import com.google.common.collect.Iterables; | |||
| import com.google.common.collect.Sets; | |||
| import com.google.common.collect.Table; | |||
| import com.google.common.collect.TreeBasedTable; | |||
| import com.google.common.collect.UnmodifiableIterator; | |||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import java.util.ArrayList; | |||
| import java.util.Collection; | |||
| import java.util.Comparator; | |||
| import java.util.Iterator; | |||
| import java.util.List; | |||
| import java.util.Map; | |||
| import java.util.Map.Entry; | |||
| import java.util.NoSuchElementException; | |||
| import java.util.Optional; | |||
| import java.util.Set; | |||
| import java.util.SortedMap; | |||
| import java.util.TreeMap; | |||
| import java.util.function.Function; | |||
| import javax.annotation.Nullable; | |||
| /** | |||
| * A tabular representation of attributes, assigned to number ranges. | |||
| * <p> | |||
| * A {@code RangeTable} is equivalent to {@code Table<RangeSpecification, Column, Value>}, | |||
| * but is expressed as a mapping of {@code (Column, Value) -> RangeTree} (since {@code RangeTree} | |||
| * is not a good key). To keep the data structurally equivalent to its tabular form, it's important | |||
| * that within a column, all assigned ranges are mutually disjoint (and thus a digit sequence can | |||
| * have at most one value assigned in any column). | |||
| * | |||
| * <h3>Table Schemas</h3> | |||
| * A table requires a {@link Schema}, which defines the columns which can be present and their | |||
| * order. Column ordering is important since it relates to how rules are applied (see below). | |||
| * | |||
| * <h3>Columns and Column Groups</h3> | |||
| * A {@link Column} defines a category of values of a particular type (e.g. String, Boolean, | |||
| * Integer or user specified enums) and a default value. New columns can be implemented easily and | |||
| * can choose to limit their values to some known set. | |||
| * <p> | |||
| * A {@link ColumnGroup} defines a related set of columns of the same type. The exact set of | |||
| * columns available in a group is not necessarily known in advance. A good example of a column | |||
| * group is having columns for names is different languages. A column group of "Name" could define | |||
| * columns such as "Name:en", "Name:fr", "Name:ja" etc. which contain the various translations of | |||
| * the value. The first time a value is added for a column inferred by a column group, that column | |||
| * is created. | |||
| * <p> | |||
| * An {@link Assignment} is a useful way to encapsulate "a value in a column" and can be used to | |||
| * assign or unassign values to ranges, or query for the ranges which have that assignment. | |||
| * | |||
| * <h3>Builders and Unassigned Values</h3> | |||
| * To allow a {@code RangeTable} to fully represent data in a tabular way, it must be possible to | |||
| * have rows in a table for which no value is assigned in any column. Unassigned ranges can be | |||
| * added to a builder via the {@link Builder#add(RangeTree)} method, and these "empty rows" are | |||
| * preserved in the final table. | |||
| * <p> | |||
| * This is useful since it allows a {@link Change} to affect no columns, but still have an effect | |||
| * on the final table. It's also useful when applying rules to infer values and fill-in column | |||
| * defaults. | |||
| */ | |||
| public final class RangeTable { | |||
| /** Overwrite rules for modifying range categorization. */ | |||
| public enum OverwriteMode { | |||
| /** Only assign ranges that were previously unassigned. */ | |||
| NEVER, | |||
| /** Only assign ranges that were either unassigned or had the same value. */ | |||
| SAME, | |||
| /** Always assign ranges (and unassign them from any other values in the same category). */ | |||
| ALWAYS; | |||
| } | |||
| /** A builder for an immutable range table to which changes and rules can be applied. */ | |||
| public static final class Builder { | |||
| // The schema for the table to be built. | |||
| private final Schema schema; | |||
| // The map of per-column ranges. | |||
| private final SortedMap<Column<?>, DisjointRangeMap.Builder<?>> columnRanges; | |||
| // The union of all ranges added to the builder (either by assignment or range addition). | |||
| // This is not just a cache of all the assigned ranges, since assigning and unassigning a range | |||
| // will not cause it to be removed from the table altogether (even if it is no longer assigned | |||
| // in any column). | |||
| private RangeTree allRanges = RangeTree.empty(); | |||
| private Builder(Schema schema) { | |||
| this.schema = checkNotNull(schema); | |||
| this.columnRanges = new TreeMap<>(schema.ordering()); | |||
| } | |||
| // Helper to return an on-demand builder for a column. | |||
| private <T extends Comparable<T>> DisjointRangeMap.Builder<T> getOrAddRangeMap(Column<T> c) { | |||
| // The generic type of the builder is defined by the column it's building for, and the map | |||
| // just uses that column as its key. Thus, if the given column is recognized by the schema, | |||
| // the returned builder must be of the same type. | |||
| @SuppressWarnings("unchecked") | |||
| DisjointRangeMap.Builder<T> ranges = (DisjointRangeMap.Builder<T>) | |||
| columnRanges.computeIfAbsent(schema.checkColumn(c), DisjointRangeMap.Builder::new); | |||
| return ranges; | |||
| } | |||
| // ---- Read-only API ---- | |||
| /** Returns the schema for this builder. */ | |||
| public Schema getSchema() { | |||
| return schema; | |||
| } | |||
| /** | |||
| * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned | |||
| * ranges in the column are returned. | |||
| */ | |||
| public RangeTree getRanges(Assignment<?> assignment) { | |||
| return getRanges(assignment.column(), assignment.value().orElse(null)); | |||
| } | |||
| /** | |||
| * Returns ranges for the given value in the specified column. If the value is {@code null}, | |||
| * then the unassigned ranges in the column are returned. If the column has no values assigned, | |||
| * then the empty range is returned (or, if {@code value == null}, all ranges in the table). | |||
| */ | |||
| public RangeTree getRanges(Column<?> column, @Nullable Object value) { | |||
| getSchema().checkColumn(column); | |||
| DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); | |||
| if (value != null) { | |||
| return rangeMap != null ? rangeMap.getRanges(value) : RangeTree.empty(); | |||
| } else { | |||
| RangeTree all = getAllRanges(); | |||
| return rangeMap != null ? all.subtract(rangeMap.getAssignedRanges()) : all; | |||
| } | |||
| } | |||
| /** | |||
| * Returns all assigned ranges in the specified column. If the column doesn't exist in the | |||
| * table, the empty range is returned). | |||
| */ | |||
| public RangeTree getAssignedRanges(Column<?> column) { | |||
| getSchema().checkColumn(column); | |||
| DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); | |||
| return rangeMap != null ? rangeMap.getAssignedRanges() : RangeTree.empty(); | |||
| } | |||
| /** | |||
| * Returns ranges which were added to this builder, either directly via {@link #add(RangeTree)} | |||
| * or indirectly via assignment. | |||
| */ | |||
| public RangeTree getAllRanges() { | |||
| return allRanges; | |||
| } | |||
| /** Returns all ranges present in this table which are not assigned in any column. */ | |||
| public RangeTree getUnassignedRanges() { | |||
| RangeTree allAssigned = columnRanges.values().stream() | |||
| .map(DisjointRangeMap.Builder::getAssignedRanges) | |||
| .reduce(RangeTree.empty(), RangeTree::union); | |||
| return allRanges.subtract(allAssigned); | |||
| } | |||
| /** | |||
| * Returns a snapshot of the columns in schema order (including empty columns which may have | |||
| * been added explicitly or exist due to values being unassigned). | |||
| */ | |||
| public ImmutableSet<Column<?>> getColumns() { | |||
| return columnRanges.entrySet().stream() | |||
| .map(Entry::getKey) | |||
| .collect(toImmutableSet()); | |||
| } | |||
| // ---- Range assignment/addition/removal ---- | |||
| /** | |||
| * Assigns the specified ranges to the given assignment. If the value is {@code empty}, then | |||
| * this has the effect of unassigning the given ranges, but does not remove them from the | |||
| * table. If {@code ranges} is empty, this method has no effect. | |||
| * | |||
| * @throws RangeException if assignment cannot be performed according to the overwrite mode | |||
| * (no change will have occurred in the table if this occurs). | |||
| */ | |||
| public Builder assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode) { | |||
| assign(assignment.column(), assignment.value().orElse(null), ranges, mode); | |||
| return this; | |||
| } | |||
| /** | |||
| * Assigns the specified ranges to a value within a column (other columns unaffected). If the | |||
| * value is {@code null}, then this has the effect of unassigning the given ranges, but does | |||
| * not remove them from the table. If {@code ranges} is empty, this method has no effect. | |||
| * | |||
| * @throws RangeException if assignment cannot be performed according to the overwrite mode | |||
| * (no change will have occurred in the table if this occurs). | |||
| */ | |||
| public Builder assign( | |||
| Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||
| if (!ranges.isEmpty()) { | |||
| getOrAddRangeMap(column).assign(value, ranges, mode); | |||
| allRanges = allRanges.union(ranges); | |||
| } | |||
| return this; | |||
| } | |||
| /** | |||
| * Unconditionally assigns all values, ranges and columns in the given table. This does not | |||
| * clear any already assigned ranges. | |||
| */ | |||
| public Builder add(RangeTable table) { | |||
| add(table.getAllRanges()); | |||
| add(table.getColumns()); | |||
| for (Column<?> column : table.getColumns()) { | |||
| for (Object value : table.getAssignedValues(column)) { | |||
| assign(column, value, table.getRanges(column, value), OverwriteMode.ALWAYS); | |||
| } | |||
| } | |||
| return this; | |||
| } | |||
| /** | |||
| * Ensures that the given ranges exist in the table, even if no assignments are ever made in | |||
| * any columns. | |||
| */ | |||
| public Builder add(RangeTree ranges) { | |||
| allRanges = allRanges.union(ranges); | |||
| return this; | |||
| } | |||
| /** Ensures that the given column exists in the table (even if there are no assignments). */ | |||
| public Builder add(Column<?> column) { | |||
| getOrAddRangeMap(checkNotNull(column)); | |||
| return this; | |||
| } | |||
| /** Ensures that the given columns exist in the table (even if there are no assignments). */ | |||
| public Builder add(Collection<Column<?>> columns) { | |||
| columns.forEach(this::add); | |||
| return this; | |||
| } | |||
| /** Removes the given ranges from the table, including all assignments in all columns. */ | |||
| public Builder remove(RangeTree ranges) { | |||
| for (DisjointRangeMap.Builder<?> rangeMap : columnRanges.values()) { | |||
| rangeMap.assign(null, ranges, OverwriteMode.ALWAYS); | |||
| } | |||
| allRanges = allRanges.subtract(ranges); | |||
| return this; | |||
| } | |||
| /** Removes the given column from the table (has no effect if the column is not present). */ | |||
| public Builder remove(Column<?> column) { | |||
| columnRanges.remove(checkNotNull(column)); | |||
| return this; | |||
| } | |||
| /** Removes the given columns from the table (has no effect if columns are not present). */ | |||
| public Builder remove(Collection<Column<?>> columns) { | |||
| columns.forEach(this::remove); | |||
| return this; | |||
| } | |||
| /** Copies the assigned, non-default, values of the specified column. */ | |||
| public <T extends Comparable<T>> Builder copyNonDefaultValues( | |||
| Column<T> column, RangeTable src, OverwriteMode mode) { | |||
| for (T v : src.getAssignedValues(column)) { | |||
| if (!column.defaultValue().equals(v)) { | |||
| assign(column, v, src.getRanges(column, v), mode); | |||
| } | |||
| } | |||
| return this; | |||
| } | |||
| // ---- Applying changes ---- | |||
| /** | |||
| * Unconditionally applies the given change to this range table. Unlike | |||
| * {@link #apply(Change, OverwriteMode)}, this method cannot fail, since changes are applied | |||
| * unconditionally. | |||
| */ | |||
| public Builder apply(Change change) { | |||
| return apply(change, OverwriteMode.ALWAYS); | |||
| } | |||
| /** | |||
| * Applies the given change to this range table. A change adds ranges to the table, optionally | |||
| * assigning them specific category values within columns. | |||
| * | |||
| * @throws RangeException if the overwrite mode prohibits the modification in this change (the | |||
| * builder remains unchanged). | |||
| */ | |||
| public Builder apply(Change change, OverwriteMode mode) { | |||
| RangeTree ranges = change.getRanges(); | |||
| if (!ranges.isEmpty()) { | |||
| // Check first that the assignments will succeed before attempting them (so as not to | |||
| // leave the builder in an inconsistent state if it fails). | |||
| if (mode != OverwriteMode.ALWAYS) { | |||
| for (Assignment<?> a : change.getAssignments()) { | |||
| getOrAddRangeMap(a.column()).checkAssign(a.value().orElse(null), ranges, mode); | |||
| } | |||
| } | |||
| for (Assignment<?> a : change.getAssignments()) { | |||
| getOrAddRangeMap(a.column()).assign(a.value().orElse(null), ranges, mode); | |||
| } | |||
| allRanges = allRanges.union(ranges); | |||
| } | |||
| return this; | |||
| } | |||
| // ---- Builder related methods ---- | |||
| /** Builds the range table from the current state of the builder. */ | |||
| public RangeTable build() { | |||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columnRanges.entrySet().stream() | |||
| .map(e -> immutableEntry(e.getKey(), e.getValue().build())) | |||
| .sorted(comparingByKey(schema.ordering())) | |||
| .collect(toImmutableMap(Entry::getKey, Entry::getValue)); | |||
| return new RangeTable(schema, columnMap, allRanges, getUnassignedRanges()); | |||
| } | |||
| /** | |||
| * Returns a new builder with the same state as the current builder. This is useful when state | |||
| * is being built up incrementally. | |||
| */ | |||
| public Builder copy() { | |||
| // Can be made more efficient if necessary... | |||
| return build().toBuilder(); | |||
| } | |||
| /** Builds a minimal version of this table in which empty columns are no longer present. */ | |||
| public RangeTable buildMinimal() { | |||
| ImmutableSet<Column<?>> empty = columnRanges.entrySet().stream() | |||
| .filter(e -> e.getValue().getAssignedRanges().isEmpty()) | |||
| .map(Entry::getKey) | |||
| .collect(toImmutableSet()); | |||
| remove(empty); | |||
| return build(); | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| return build().toString(); | |||
| } | |||
| } | |||
| /** Returns a builder for a range table with the specified column mapping. */ | |||
| public static Builder builder(Schema schema) { | |||
| return new Builder(schema); | |||
| } | |||
| public static RangeTable from( | |||
| Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t) { | |||
| Builder table = builder(schema); | |||
| for (Entry<RangeSpecification, Map<Column<?>, Optional<?>>> row : t.rowMap().entrySet()) { | |||
| List<Assignment<?>> assignments = row.getValue().entrySet().stream() | |||
| .map(e -> Assignment.ofOptional(e.getKey(), e.getValue())) | |||
| .collect(toImmutableList()); | |||
| table.apply(Change.of(RangeTree.from(row.getKey()), assignments)); | |||
| } | |||
| return table.build(); | |||
| } | |||
| // Definition of table columns. | |||
| private final Schema schema; | |||
| // Mapping to the assigned ranges for each column type. | |||
| private final ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges; | |||
| // All ranges in this table (possibly larger than union of all assigned ranges in all columns). | |||
| private final RangeTree allRanges; | |||
| // Ranges unassigned in any column (a subset of, or equal to allRanges). | |||
| private final RangeTree unassigned; | |||
| private RangeTable( | |||
| Schema schema, | |||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges, | |||
| RangeTree allRanges, | |||
| RangeTree unassigned) { | |||
| this.schema = checkNotNull(schema); | |||
| this.columnRanges = checkNotNull(columnRanges); | |||
| this.allRanges = checkNotNull(allRanges); | |||
| this.unassigned = checkNotNull(unassigned); | |||
| } | |||
| /** Returns a builder initialized to the ranges and assignements in this table. */ | |||
| public Builder toBuilder() { | |||
| // Any mode would work here (the builder is empty) but the "always overwrite" mode is fastest. | |||
| return new Builder(schema).add(this); | |||
| } | |||
| private Optional<DisjointRangeMap<?>> getRangeMap(Column<?> column) { | |||
| return Optional.ofNullable(columnRanges.get(schema.checkColumn(column))); | |||
| } | |||
| public Schema getSchema() { | |||
| return schema; | |||
| } | |||
| public ImmutableSet<Column<?>> getColumns() { | |||
| return columnRanges.keySet(); | |||
| } | |||
| /** | |||
| * Returns the set of values with assigned ranges in the given column. | |||
| * | |||
| * @throws IllegalArgumentException if the specified column does not exist in this table. | |||
| */ | |||
| public <T extends Comparable<T>> ImmutableSet<T> getAssignedValues(Column<T> column) { | |||
| getSchema().checkColumn(column); | |||
| // Safe since if the column is in the schema the values must have been checked when added. | |||
| @SuppressWarnings("unchecked") | |||
| DisjointRangeMap<T> rangeMap = | |||
| (DisjointRangeMap<T>) columnRanges.get(schema.checkColumn(column)); | |||
| return rangeMap != null ? rangeMap.getAssignedValues() : ImmutableSet.of(); | |||
| } | |||
| /** Returns all assigned ranges in the specified column. */ | |||
| public RangeTree getAssignedRanges(Column<?> column) { | |||
| return getRangeMap(column).map(DisjointRangeMap::getAssignedRanges).orElse(RangeTree.empty()); | |||
| } | |||
| /** | |||
| * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned | |||
| * ranges in the column are returned. | |||
| */ | |||
| public RangeTree getRanges(Assignment<?> assignment) { | |||
| return getRanges(assignment.column(), assignment.value().orElse(null)); | |||
| } | |||
| /** | |||
| * Returns ranges for the given value in the specified column. If the value is {@code null}, then | |||
| * the unassigned ranges in the column are returned. | |||
| */ | |||
| public RangeTree getRanges(Column<?> column, @Nullable Object value) { | |||
| getSchema().checkColumn(column); | |||
| if (value == null) { | |||
| return getAllRanges().subtract(getAssignedRanges(column)); | |||
| } else { | |||
| return getRangeMap(column).map(m -> m.getRanges(value)).orElse(RangeTree.empty()); | |||
| } | |||
| } | |||
| /** Returns all ranges present in this table. */ | |||
| public RangeTree getAllRanges() { | |||
| return allRanges; | |||
| } | |||
| /** Returns all ranges present in this table which are not assigned in any column. */ | |||
| public RangeTree getUnassignedRanges() { | |||
| return unassigned; | |||
| } | |||
| /** | |||
| * Returns whether this table contains no ranges (assigned or unassigned). Note that not all | |||
| * empty tables are equal, since they may still differ by the columns they have. | |||
| */ | |||
| public boolean isEmpty() { | |||
| return allRanges.isEmpty(); | |||
| } | |||
| /** | |||
| * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the | |||
| * returned table is the same as this table. | |||
| */ | |||
| public RangeTable subTable(RangeTree bounds, Set<Column<?>> columns) { | |||
| // Columns must be a subset of what's allowed in this schema. | |||
| columns.forEach(getSchema()::checkColumn); | |||
| return subTable(bounds, getSchema(), columns); | |||
| } | |||
| /** | |||
| * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the | |||
| * returned table is the same as this table. | |||
| */ | |||
| public RangeTable subTable(RangeTree bounds, Column<?> first, Column<?>... rest) { | |||
| return subTable(bounds, ImmutableSet.<Column<?>>builder().add(first).add(rest).build()); | |||
| } | |||
| /** | |||
| * Returns a table with rows and columns limited by the specified bounds. The schema of the | |||
| * returned table is the given sub-schema. | |||
| */ | |||
| public RangeTable subTable(RangeTree bounds, Schema subSchema) { | |||
| checkArgument(subSchema.isSubSchemaOf(getSchema()), | |||
| "expected sub-schema of %s, got %s", getSchema(), subSchema); | |||
| return subTable(bounds, subSchema, Sets.filter(getColumns(), subSchema::isValidColumn)); | |||
| } | |||
| // Callers MUST validate that the given set of columns are all valid in the subSchema. | |||
| private RangeTable subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns) { | |||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columns.stream() | |||
| // Bound the given columns which exist in this table. | |||
| .map(c -> immutableEntry(c, getRangeMap(c).map(r -> r.intersect(bounds)))) | |||
| // Reject columns we didn't already have (but allow empty columns if they exist). | |||
| .filter(e -> e.getValue().isPresent()) | |||
| // Sort to our schema (since the given set of columns is not required to be sorted). | |||
| .sorted(comparingByKey(schema.ordering())) | |||
| .collect(toImmutableMap(Entry::getKey, e -> e.getValue().get())); | |||
| return new RangeTable( | |||
| subSchema, columnMap, allRanges.intersect(bounds), unassigned.intersect(bounds)); | |||
| } | |||
| /** | |||
| * Returns the assigned rows of a RangeTable as a minimal list of disjoint changes, which can | |||
| * be applied to an empty table to recreate this table. No two changes affect the same columns | |||
| * in the same way and changes are ordered by the minimal values of their ranges. This is | |||
| * essentially the same information as returned in {@link #toImmutableTable()} but does not | |||
| * decompose ranges into range specifications, and it thus more amenable to compact | |||
| * serialization. | |||
| */ | |||
| // Note that the minimal nature of the returned changes is essential for some algorithms that | |||
| // operate on tables and this must not be changed. | |||
| public ImmutableList<Change> toChanges() { | |||
| Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); | |||
| for (Column<?> c : getColumns()) { | |||
| for (Object v : getAssignedValues(c)) { | |||
| table.put(c, Optional.of(v), getRanges(c, v)); | |||
| } | |||
| } | |||
| return toChanges(schema, table, getAllRanges()); | |||
| } | |||
| /** | |||
| * Returns a minimum set of changes based on a table of assignments (column plus value). This is | |||
| * not expected to be used often (since RangeTable is usually a better representation of the data | |||
| * but can be useful in representing things like updates and patches in which only some rows or | |||
| * columns are represented. | |||
| * @param schema a schema for the columns in the given Table (used to determine column order). | |||
| * @param table the table of assignments to assigned ranges. | |||
| * @param allRanges the set of all ranges affected by the changes (this might include ranges not | |||
| * present anywhere in the table, which correspond to empty rows). | |||
| */ | |||
| public static ImmutableList<Change> toChanges( | |||
| Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges) { | |||
| return ImmutableList.copyOf( | |||
| transform(toRows(table, allRanges, schema.ordering()), Row::toChange)); | |||
| } | |||
| /** | |||
| * Returns the data in this table represented as a {@link ImmutableTable}. Row keys are disjoint | |||
| * range specifications (in order). The returned table has the smallest number of rows necessary | |||
| * to represent the data in this range table. This is useful as a human readable serialized form | |||
| * since any digit sequence in the table is contained in a unique row. | |||
| */ | |||
| public ImmutableTable<RangeSpecification, Column<?>, Optional<?>> toImmutableTable() { | |||
| Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); | |||
| for (Column<?> c : getColumns()) { | |||
| for (Object v : getAssignedValues(c)) { | |||
| table.put(c, Optional.of(v), getRanges(c, v)); | |||
| } | |||
| RangeTree unassigned = getAllRanges().subtract(getAssignedRanges(c)); | |||
| if (!unassigned.isEmpty()) { | |||
| table.put(c, Optional.empty(), unassigned); | |||
| } | |||
| } | |||
| // Unique changes contain disjoint ranges, each associated with a unique combination of | |||
| // assignments. | |||
| TreeBasedTable<RangeSpecification, Column<?>, Optional<?>> out = | |||
| TreeBasedTable.create(comparing(RangeSpecification::min), schema.ordering()); | |||
| for (Change c : toChanges(schema, table, getAllRanges())) { | |||
| List<RangeSpecification> keys = c.getRanges().asRangeSpecifications(); | |||
| for (Assignment<?> a : c.getAssignments()) { | |||
| for (RangeSpecification k : keys) { | |||
| out.put(k, a.column(), a.value()); | |||
| } | |||
| } | |||
| } | |||
| return ImmutableTable.copyOf(out); | |||
| } | |||
| /** | |||
| * Extracts a map for a single column in this table containing the minimal prefix tree for each | |||
| * of the assigned values. The returned prefixes are the shortest prefixes possible for | |||
| * distinguishing each value in the column. This method is especially useful if you want to | |||
| * categorize partial digit sequences efficiently (i.e. prefix matching). | |||
| * | |||
| * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some | |||
| * circumstances. Note that returned prefixes are never zero length, so {@code 1} is the lowest | |||
| * meaningful value (although zero is still accepted to imply "no length restriction"). | |||
| * | |||
| * <p>Note that for some table data, it is technically impossible to obtain perfect prefix | |||
| * information and in cases where overlap occurs, this method returns the shortest prefixes. This | |||
| * means that for some valid inputs it might be true that more than one prefix is matched. It | |||
| * is therefore up to the caller to determine a "best order" for testing the prefixes if this | |||
| * matters. See {@link PrefixTree#minimal(RangeTree, RangeTree, int)} for more information. | |||
| * | |||
| * <p>An example of an "impossible" prefix would be if "123" has value A, "1234" has value B and | |||
| * "12345" has value A again. In this case there is no prefix which can distinguish A and B | |||
| * (the calculated map would be { "123" => A, "1234" => B }). In this situation, testing for the | |||
| * longer prefix would help preserve as much of the original mapping as possible, but it would | |||
| * never be possible to correctly distinguish all inputs. | |||
| */ | |||
| public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap( | |||
| Column<T> column, int minPrefixLength) { | |||
| ImmutableMap.Builder<T, PrefixTree> map = ImmutableMap.builder(); | |||
| // Important: Don't just use the assigned ranges in the column, use the assigned ranges of the | |||
| // entire table. This ensures unassigned ranges in the column are not accidentally captured by | |||
| // any of the generated prefixes. | |||
| RangeTree allRanges = getAllRanges(); | |||
| for (T value : getAssignedValues(column)) { | |||
| RangeTree include = getRanges(column, value); | |||
| map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength)); | |||
| } | |||
| return map.build(); | |||
| } | |||
| // Constants for the simplification routine below. | |||
| // Use -1 for unassigned rows (these are the "overlap" ranges and they don't have an index). | |||
| private static final Column<Integer> INDEX = | |||
| Column.create(Integer.class, "Change Index", -1, Integer::parseInt); | |||
| private static final Schema INDEX_SCHEMA = Schema.builder().add(INDEX).build(); | |||
| /** | |||
| * Applies a simplification function to the rows defined by the given columns of this table. The | |||
| * returned table will only have (at most) the specified columns present. | |||
| * | |||
| * <p>The simplification function is used to produce ranges which satisfy some business logic | |||
| * criteria (such as having at most N significant digits, or merging lengths). Range | |||
| * simplification enables easier comparison between data sources of differing precision, and | |||
| * helps to reduce unnecessary complexity in generated regular expressions. | |||
| * | |||
| * <p>The simplification function should return a range that's at least as large as the input | |||
| * range. This is to ensure that simplification cannot unassign ranges, even accidentally. The | |||
| * returned range is automatically restricted to preserve disjoint ranges in the final table. | |||
| * | |||
| * <p>By passing a {@link Change} rather than just a {@link RangeTree}, the simplification | |||
| * function has access to the row assignments for the range it is simplifying. This allows it to | |||
| * select different strategies according to the values in specific columns (e.g. area code | |||
| * length). | |||
| * | |||
| * <p>Note that unassigned ranges in the original table will be preserved and simplified ranges | |||
| * will not overwrite them. This can be useful for defining "no go" ranges which should be left | |||
| * alone. | |||
| */ | |||
| public RangeTable simplify( | |||
| Function<Change, RangeTree> simplifyFn, | |||
| int minPrefixLength, | |||
| Column<?> first, | |||
| Column<?>... rest) { | |||
| // Build the single column "index" table (one index for each change) and simplify its ranges. | |||
| // This only works because "toChanges()" produces the minimal set of changes such that each | |||
| // unique combination of assignments appears only once. | |||
| ImmutableList<Change> rows = subTable(getAllRanges(), first, rest).toChanges(); | |||
| RangeTable simplifiedIndexTable = simplifyIndexTable(rows, simplifyFn, minPrefixLength); | |||
| // Reconstruct the output table by assigning values from the original change set according to | |||
| // the indices in the simplified index table. | |||
| Builder simplified = RangeTable.builder(getSchema()).add(simplifiedIndexTable.getAllRanges()); | |||
| for (int i : simplifiedIndexTable.getAssignedValues(INDEX)) { | |||
| RangeTree simplifiedRange = simplifiedIndexTable.getRanges(INDEX, i); | |||
| for (Assignment<?> a : rows.get(i).getAssignments()) { | |||
| simplified.assign(a, simplifiedRange, OverwriteMode.NEVER); | |||
| } | |||
| } | |||
| return simplified.build(); | |||
| } | |||
| /** | |||
| * Helper function to simplify an index table based on the given rows. The resulting table will | |||
| * have a single "index" column with simplified ranges, where the index value {@code N} | |||
| * references the Nth row in the given list of disjoint changes. This is a 3 stage process: | |||
| * <ol> | |||
| * <li>Step 1: Determine which ranges can overlap with respect to set of range prefixes. | |||
| * <li>Step 2: Do simplification on the non-overlapping "prefix disjoint" ranges in the table, | |||
| * which are then be re-partitioned by the disjoint prefixes. | |||
| * <li>Step 3: Copy over any overlapping ranges from the original table (these don't get | |||
| * simplified since it's not possible to easily re-pertition them). | |||
| * </ol> | |||
| */ | |||
| private static <T extends Comparable<T>> RangeTable simplifyIndexTable( | |||
| ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength) { | |||
| RangeTable indexTable = makeIndexTable(rows); | |||
| // Step 1: Determine overlapping ranges from the index table, retaining minimum prefix length. | |||
| ImmutableMap<Integer, PrefixTree> nonDisjointPrefixes = | |||
| indexTable.getPrefixMap(INDEX, minPrefixLength); | |||
| // Don't just use the assigned ranges (we need to account for valid but unassigned ranges when | |||
| // determining overlaps). | |||
| RangeTree allRanges = indexTable.getAllRanges(); | |||
| RangeTree overlaps = RangeTree.empty(); | |||
| for (int n : indexTable.getAssignedValues(INDEX)) { | |||
| RangeTree otherRanges = allRanges.subtract(indexTable.getRanges(INDEX, n)); | |||
| overlaps = overlaps.union(nonDisjointPrefixes.get(n).retainFrom(otherRanges)); | |||
| } | |||
| // Step 2: Determine the "prefix disjoint" ranges in a new table and simplify it. | |||
| // | |||
| // Before getting the new set of prefixes, add the overlapping ranges back to the table, but | |||
| // without assigning them to anything. This keeps the generated prefixes as long as necessary | |||
| // to avoid creating conflicting assignments for different values. Essentially we're trying to | |||
| // keep ranges "away from" any overlaps. Note however that it is still possible for simplified | |||
| // ranges encroach on the overlapping areas, so we must still forcibly overwrite the original | |||
| // overlapping values after siplification. Consider: | |||
| // A = { "12x", "12xxx" }, B = { "123x" } | |||
| // where the simplification function just creates any "any" range for all lengths between the | |||
| // minimum and maximum range lengths (e.g. { "123", "45678" } ==> { "xxx", "xxxx", "xxxxx" }. | |||
| // | |||
| // The (non disjoint) prefix table is Pre(A) => { "12" }, Pre(B) => { "123" } and this | |||
| // captures the overlaps: | |||
| // Pre(A).retainFrom(B) = { "123x" } = B | |||
| // Pre(B).retainFrom(A) = { "123xx" } | |||
| // | |||
| // Since is of "B" is entirely contained by the overlap, it is not simplified, but A is | |||
| // simplified to: | |||
| // { "xxx", "xxxx", "xxxxx" } | |||
| // and the re-captured by the "disjoint" prefix (which is still just "12") to: | |||
| // { "12x", "12xx", "12xxx" } | |||
| // | |||
| // However now, when the original overlaps are added back at the end (in step 3) we find that | |||
| // both "123xx" already exists (with the same index) and "123x" exists with a different index. | |||
| // The resolution is to just overwrite all overlaps back into the table, since these represent | |||
| // the original (unsimplified) values. | |||
| // | |||
| // Thus in this case, the simplified table is: | |||
| // Sim(A) = { "12x", "12[0-24-9]x", "12xxx" }, Sim(B) = { "123x" } | |||
| // | |||
| // And it is still true that: Sim(A).containsAll(A) and Sim(B).containsAll(B) | |||
| RangeTable prefixDisjointTable = indexTable | |||
| .subTable(allRanges.subtract(overlaps), INDEX) | |||
| .toBuilder() | |||
| .add(overlaps) | |||
| .build(); | |||
| // NOTE: Another way to do this would be to implement an "exclusive prefix" method which could | |||
| // be used to immediately return a set of truly "disjoint" prefixes (although this would change | |||
| // the algorithm's behaviour since more ranges would be considered "overlapping" than now). | |||
| // TODO: Experiment with an alternate "exclusive" prefix function. | |||
| ImmutableMap<Integer, PrefixTree> disjointPrefixes = prefixDisjointTable.getPrefixMap(INDEX, 1); | |||
| // Not all values from the original table need be present in the derived table (since some | |||
| // overlaps account for all the ranges of a value). | |||
| Builder simplified = RangeTable.builder(INDEX_SCHEMA); | |||
| for (int n : prefixDisjointTable.getAssignedValues(INDEX)) { | |||
| RangeTree disjointRange = prefixDisjointTable.getRanges(INDEX, n); | |||
| // Pass just the assignments, not the whole row (Change) because that also contains a range, | |||
| // which might not be the same as the disjoint range (so it could be rather confusing). | |||
| PrefixTree disjointPrefix = disjointPrefixes.get(n); | |||
| RangeTree simplifiedRange = | |||
| simplifyFn.apply(Change.of(disjointRange, rows.get(n).getAssignments())); | |||
| // Technically this check is not strictly required, but there's probably no good use-case in | |||
| // which you'd want to remove assignments via the simplification process. | |||
| checkArgument(simplifiedRange.containsAll(disjointRange), | |||
| "simplification should return a superset of the given range\n" | |||
| + "input: %s\n" | |||
| + "output: %s\n" | |||
| + "missing: %s", | |||
| disjointRange, simplifiedRange, disjointRange.subtract(simplifiedRange)); | |||
| // Repartition the simplified ranges by the "disjoint" prefixes to restore most of the | |||
| // simplified ranges. These ranges should never overlap with each other. | |||
| RangeTree repartitionedRange = disjointPrefix.retainFrom(simplifiedRange); | |||
| simplified.assign(INDEX, n, repartitionedRange, OverwriteMode.NEVER); | |||
| } | |||
| // Step 3: Copy remaining overlapping ranges from the original table back into the result. | |||
| // Note that we may end up overwriting values here, but that's correct since it restores | |||
| // original "unsimplifiable" ranges. | |||
| for (int n : indexTable.getAssignedValues(INDEX)) { | |||
| simplified.assign( | |||
| INDEX, n, indexTable.getRanges(INDEX, n).intersect(overlaps), OverwriteMode.ALWAYS); | |||
| } | |||
| return simplified.build(); | |||
| } | |||
| // Helper to make a table with a single column than references a list of disjoint changes by | |||
| // index (against the range of that change). | |||
| private static RangeTable makeIndexTable(ImmutableList<Change> rows) { | |||
| Builder indexTable = RangeTable.builder(INDEX_SCHEMA); | |||
| for (int i = 0; i < rows.size(); i++) { | |||
| // Empty rows are added to the table, but not assigned an index. Their existence in the index | |||
| // table prevents over simplification from affecting unassigned rows of the original table. | |||
| if (rows.get(i).getAssignments().isEmpty()) { | |||
| indexTable.add(rows.get(i).getRanges()); | |||
| } else { | |||
| indexTable.assign(INDEX, i, rows.get(i).getRanges(), OverwriteMode.NEVER); | |||
| } | |||
| } | |||
| return indexTable.build(); | |||
| } | |||
| @Override | |||
| public boolean equals(Object obj) { | |||
| if (!(obj instanceof RangeTable)) { | |||
| return false; | |||
| } | |||
| RangeTable other = (RangeTable) obj; | |||
| return this == other | |||
| || (schema.equals(other.schema) | |||
| && allRanges.equals(other.allRanges) | |||
| && columnRanges.values().asList().equals(other.columnRanges.values().asList())); | |||
| } | |||
| @Override | |||
| public int hashCode() { | |||
| // This could be memoized if it turns out to be slow. | |||
| return schema.hashCode() ^ columnRanges.hashCode() ^ allRanges.hashCode(); | |||
| } | |||
| // TODO: Prettier format for toString(). | |||
| @Override | |||
| public final String toString() { | |||
| ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = toImmutableTable(); | |||
| return table.rowMap().entrySet().stream() | |||
| .map(e -> String.format("%s, %s", e.getKey(), rowToString(e.getValue()))) | |||
| .collect(joining("\n")); | |||
| } | |||
| private static String rowToString(Map<Column<?>, Optional<?>> r) { | |||
| return r.values().stream() | |||
| .map(v -> v.map(Object::toString).orElse("UNSET")) | |||
| .collect(joining(", ")); | |||
| } | |||
| // Helper method to convert a table of values into a minimal set of changes. This is used to | |||
| // turn a single RangeTable into an ImmutableTable, but also to convert a Patch into a minimal | |||
| // sequence of Changes. Each returned "row" defines a range, and a unique sequence of assignments | |||
| // over that range (i.e. no two rows have the same assignments in). The assignments are ordered | |||
| // in column order within each row, and the rows are ordered by the minimum digit sequence in | |||
| // each range and the ranges form a disjoint covering of the ranges in the original table. | |||
| // | |||
| // See go/phonenumber-v2-data-structure for more details. | |||
| private static ImmutableList<Row> toRows( | |||
| Table<Column<?>, Optional<?>, RangeTree> src, | |||
| RangeTree allRanges, | |||
| Comparator<Column<?>> columnOrdering) { | |||
| // Get the non-empty columns in _reverse_ iteration order. We build up rows as a linked list | |||
| // structure, started from the "right hand side". This avoids a lot of copying as new columns | |||
| // are processed. | |||
| ImmutableList<Column<?>> reversedColumns = src.rowMap().entrySet().stream() | |||
| .filter(e -> !e.getValue().isEmpty()) | |||
| .map(Entry::getKey) | |||
| .sorted(columnOrdering.reversed()) | |||
| .collect(toImmutableList()); | |||
| List<Row> uniqueRows = new ArrayList<>(); | |||
| uniqueRows.add(Row.empty(allRanges)); | |||
| for (Column<?> col : reversedColumns) { | |||
| // Loop backward here so that rows can be (a) removed in place and (b) added at the end. | |||
| for (int i = uniqueRows.size() - 1; i >= 0; i--) { | |||
| Row row = uniqueRows.get(i); | |||
| // Track the unprocessed range for each row as we extend it. | |||
| RangeTree remainder = row.getRanges(); | |||
| for (Entry<Optional<?>, RangeTree> e : src.row(col).entrySet()) { | |||
| RangeTree overlap = e.getValue().intersect(remainder); | |||
| if (overlap.isEmpty()) { | |||
| continue; | |||
| } | |||
| // Extend the existing row by the current column value and reduce the remaining ranges. | |||
| uniqueRows.add(Row.of(overlap, col, e.getKey(), row)); | |||
| remainder = remainder.subtract(overlap); | |||
| if (remainder.isEmpty()) { | |||
| // We've accounted for all of the existing row in the new column, so remove it. | |||
| uniqueRows.remove(i); | |||
| break; | |||
| } | |||
| } | |||
| if (!remainder.isEmpty()) { | |||
| // The existing row is not completely covered by the new column, so retain what's left. | |||
| uniqueRows.set(i, row.bound(remainder)); | |||
| } | |||
| } | |||
| } | |||
| return ImmutableList.sortedCopyOf(comparing(r -> r.getRanges().first()), uniqueRows); | |||
| } | |||
| /** | |||
| * A notional "row" with some set of assignments in a range table or table like structure. Note | |||
| * that a Row can represent unassignment as well as assignment, and not all rows need to contain | |||
| * all columns. Rows are used for representing value in a table, but also changes between tables. | |||
| */ | |||
| @AutoValue | |||
| abstract static class Row implements Iterable<Assignment<?>> { | |||
| private static Row empty(RangeTree row) { | |||
| return new AutoValue_RangeTable_Row(row, null); | |||
| } | |||
| private static Row of(RangeTree row, Column<?> col, Optional<?> val, Row next) { | |||
| checkArgument(!row.isEmpty(), "empty ranges not permitted (col=%s, val=%s)", col, val); | |||
| return new AutoValue_RangeTable_Row( | |||
| row, new AutoValue_RangeTable_Cell(Assignment.ofOptional(col, val), next.head())); | |||
| } | |||
| public abstract RangeTree getRanges(); | |||
| @Nullable abstract Cell head(); | |||
| Change toChange() { | |||
| return Change.of(getRanges(), this); | |||
| } | |||
| private Row bound(RangeTree ranges) { | |||
| return new AutoValue_RangeTable_Row(getRanges().intersect(ranges), head()); | |||
| } | |||
| @Override | |||
| public Iterator<Assignment<?>> iterator() { | |||
| return new UnmodifiableIterator<Assignment<?>>() { | |||
| @Nullable private Cell cur = Row.this.head(); | |||
| @Override | |||
| public boolean hasNext() { | |||
| return cur != null; | |||
| } | |||
| @Override | |||
| public Assignment<?> next() { | |||
| Cell c = cur; | |||
| if (c == null) { | |||
| throw new NoSuchElementException(); | |||
| } | |||
| cur = cur.next(); | |||
| return c.assignment(); | |||
| } | |||
| }; | |||
| } | |||
| @Override | |||
| public final String toString() { | |||
| return "Row{" + getRanges() + " >> " + Iterables.toString(this) + "}"; | |||
| } | |||
| } | |||
| @AutoValue | |||
| abstract static class Cell { | |||
| abstract Assignment<?> assignment(); | |||
| @Nullable abstract Cell next(); | |||
| } | |||
| } | |||
| @ -0,0 +1,169 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.collect.ImmutableCollection; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.Ordering; | |||
| import java.util.Comparator; | |||
| /** | |||
| * Representation of ordered {@link Column}s in a table. Schemas define columns in both | |||
| * {@code RangeTable} and {@code CsvTable}. | |||
| */ | |||
| @AutoValue | |||
| public abstract class Schema { | |||
| /** | |||
| * Builder for a table schema. Columns are ordered in the order in which they, or their owning | |||
| * group is added to the schema. | |||
| */ | |||
| public static final class Builder { | |||
| private final ImmutableSet.Builder<String> names = ImmutableSet.builder(); | |||
| private final ImmutableMap.Builder<String, Column<?>> columns = ImmutableMap.builder(); | |||
| private final ImmutableMap.Builder<String, ColumnGroup<?, ?>> groups = ImmutableMap.builder(); | |||
| /** Adds the given column to the schema. */ | |||
| public Builder add(Column<?> column) { | |||
| names.add(column.getName()); | |||
| columns.put(column.getName(), column); | |||
| return this; | |||
| } | |||
| /** Adds the given column group to the schema. */ | |||
| public Builder add(ColumnGroup<?, ?> group) { | |||
| names.add(group.prototype().getName()); | |||
| groups.put(group.prototype().getName(), group); | |||
| return this; | |||
| } | |||
| public Schema build() { | |||
| return new AutoValue_Schema(names.build(), columns.build(), groups.build()); | |||
| } | |||
| } | |||
| private static final Schema EMPTY = builder().build(); | |||
| /** Returns an empty schema with no assigned columns. */ | |||
| public static Schema empty() { | |||
| return EMPTY; | |||
| } | |||
| /** Returns a new schema builder. */ | |||
| public static Builder builder() { | |||
| return new Builder(); | |||
| } | |||
| // Visible for AutoValue only. | |||
| Schema() {} | |||
| // List of column/group names used to determine column order: | |||
| // E.g. if "names" is: ["col1", "grp1", "col2", "col3"] | |||
| // You can have the table <<"col1", "grp1:xx", "grp1:yy", "col3">> | |||
| // Not all columns need to be present and groups are ordered contiguously as the group prefix | |||
| // appears in the names list. | |||
| abstract ImmutableSet<String> names(); | |||
| abstract ImmutableMap<String, Column<?>> columns(); | |||
| abstract ImmutableMap<String, ColumnGroup<?, ?>> groups(); | |||
| /** | |||
| * Returns the column for the specified key string. For "plain" columns (not in groups) the key | |||
| * is just the column name. For group columns, the key takes the form "prefix:suffix", where the | |||
| * prefix is the name of the "prototype" column, and the "suffix" is an ID of a value within the | |||
| * group. For example: | |||
| * <p> {@oode | |||
| * // Schema has a plain column called "Type" in it. | |||
| * typeCol = table.getColumn("Type"); | |||
| * | |||
| * // Schema has a group called "Region" in it which can parse RegionCodes. | |||
| * usRegionCol = table.getColumn("Region:US"); | |||
| * }</p> | |||
| */ | |||
| public Column<?> getColumn(String key) { | |||
| int split = key.indexOf(':'); | |||
| Column<?> column; | |||
| if (split == -1) { | |||
| column = columns().get(key); | |||
| } else { | |||
| ColumnGroup<?, ?> group = groups().get(key.substring(0, split)); | |||
| checkArgument(group != null, "invalid column %s, not in schema: %s", key, this); | |||
| column = group.getColumnFromId(key.substring(split + 1)); | |||
| } | |||
| checkArgument(column != null, "invalid column %s, not in schema: %s", key, this); | |||
| return column; | |||
| } | |||
| /** Returns whether the given column is valid within this schema. */ | |||
| public <T extends Comparable<T>> boolean isValidColumn(Column<T> column) { | |||
| int split = column.getName().indexOf(':'); | |||
| if (split == -1) { | |||
| return columns().containsValue(column); | |||
| } else { | |||
| ColumnGroup<?, ?> group = groups().get(column.getName().substring(0, split)); | |||
| return group != null && column.isIn(group); | |||
| } | |||
| } | |||
| /** | |||
| * Checks whether the given column is valid within this schema, otherwise throws | |||
| * IllegalArgumentException. This is expected to be internal use only, since table users are | |||
| * meant to always know which columns are valid. | |||
| */ | |||
| <T extends Comparable<T>> Column<T> checkColumn(Column<T> column) { | |||
| checkArgument(isValidColumn(column), "invalid column %s, not in schema: %s", column, this); | |||
| return column; | |||
| } | |||
| /** | |||
| * Returns whether the this schema has a subset of columns/groups, in the same order as the | |||
| * given schema. | |||
| */ | |||
| public boolean isSubSchemaOf(Schema schema) { | |||
| return schema.columns().values().containsAll(columns().values()) | |||
| && schema.groups().entrySet().containsAll(groups().entrySet()) | |||
| && names().asList().equals( | |||
| schema.names().stream().filter(names()::contains).collect(toImmutableList())); | |||
| } | |||
| /** Returns an ordering for all columns in this schema. */ | |||
| public Comparator<Column<?>> ordering() { | |||
| return Comparator | |||
| .comparing(Schema::getPrefix, Ordering.explicit(names().asList())) | |||
| .thenComparing(Schema::getSuffix); | |||
| } | |||
| public ImmutableSet<String> getNames() { | |||
| return names(); | |||
| } | |||
| public ImmutableCollection<Column<?>> getColumns() { | |||
| return columns().values(); | |||
| } | |||
| private static String getPrefix(Column<?> column) { | |||
| int split = column.getName().indexOf(':'); | |||
| return split != -1 ? column.getName().substring(0, split) : column.getName(); | |||
| } | |||
| private static String getSuffix(Column<?> column) { | |||
| int split = column.getName().indexOf(':'); | |||
| return split == -1 ? "" : column.getName().substring(split + 1); | |||
| } | |||
| } | |||
| @ -0,0 +1,69 @@ | |||
| // Copyright (C) 2017 The Libphonenumber Authors. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| syntax = "proto3"; | |||
| package i18n.phonenumbers.metadata; | |||
| option java_package = "com.google.i18n.phonenumbers.metadata.proto"; | |||
| // The possible provenance which can be assigned to a range. | |||
| // This enum is NOT stable and must only be stored in text based protocol | |||
| // buffers. | |||
| enum Provenance { | |||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||
| // No data should actually ever have this value. | |||
| UNKNOWN = 0; | |||
| // Indicates that the ranges were defined in an official ITU document. The | |||
| // comment associated with this range should contain a link to the document. | |||
| // This is the most trusted for of evidence and will usually replace any | |||
| // previous "lower" provenance (though this is not always true for some | |||
| // countries). | |||
| ITU = 10; | |||
| // Indicates that the ranges were defined in an official IR21 document. The | |||
| // comment associated with this range should contain a link to the document. | |||
| // This is the most trusted for of evidence and will usually replace any | |||
| // previous "lower" provenance (though this is not always true for some | |||
| // countries). | |||
| IR21 = 20; | |||
| // Indicates that evidence for a range was found in a website belonging to | |||
| // an official, government endorsed entity (e.g. national telecoms operator), | |||
| // but not part of either an official ITU or IR21 document. | |||
| // The comment associated with this range should contain a URL to the | |||
| // appropriate page where the evidence was found. | |||
| GOVERNMENT = 30; | |||
| // Indicates that evidence for a range was found in a website belonging to a | |||
| // telecoms operators (mobile carrier, MVNO etc...). The comment associated | |||
| // with this range should contain a URL to the appropriate page where the | |||
| // evidence was found. | |||
| TELECOMS = 40; | |||
| // Indicates that evidence for a range was found in an unofficial website | |||
| // (e.g Facebook or a general company home page). The comment associated | |||
| // with this range should contain a URL to the appropriate page where the | |||
| // evidence was found. | |||
| WEB = 50; | |||
| // Used to indicate special situations in which a number is accepted as | |||
| // valid, despite no citeable evidence. When this provenance the coment text | |||
| // should indicate some bug report or internal reasoning as to why this range | |||
| // should be accepted. This provenance should be used only in exceptional | |||
| // circumstances and the comment may be scrubbed from externally published | |||
| // versions of the range data. | |||
| INTERNAL = 100; | |||
| } | |||
| @ -0,0 +1,82 @@ | |||
| // Copyright (C) 2017 The Libphonenumber Authors. | |||
| // | |||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||
| // you may not use this file except in compliance with the License. | |||
| // You may obtain a copy of the License at | |||
| // | |||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||
| // | |||
| // Unless required by applicable law or agreed to in writing, software | |||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| // See the License for the specific language governing permissions and | |||
| // limitations under the License. | |||
| syntax = "proto3"; | |||
| package i18n.phonenumbers.metadata; | |||
| option java_package = "com.google.i18n.phonenumbers.metadata.proto"; | |||
| // Enum names must match the element names in the XML metadata modulo casing. | |||
| enum XmlNumberType { | |||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||
| // No data should actually ever have this value. | |||
| XML_UNKNOWN = 0; | |||
| XML_NO_INTERNATIONAL_DIALLING = 1; | |||
| XML_FIXED_LINE = 2; | |||
| XML_MOBILE = 3; | |||
| XML_PAGER = 4; | |||
| XML_TOLL_FREE = 5; | |||
| XML_PREMIUM_RATE = 6; | |||
| XML_SHARED_COST = 7; | |||
| XML_PERSONAL_NUMBER = 8; | |||
| XML_VOIP = 9; | |||
| XML_UAN = 10; | |||
| XML_VOICEMAIL = 11; | |||
| } | |||
| // Validation types for phone number ranges. Each valid range is categorized as | |||
| // exactly one of these types. This does not include NO_INTERNATIONAL_DIALLING | |||
| // since it is an attribute of ranges rather than their fundamental type. | |||
| enum ValidNumberType { | |||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||
| // No data should actually ever have this value. | |||
| UNKNOWN = 0; | |||
| FIXED_LINE = 1; | |||
| MOBILE = 2; | |||
| FIXED_LINE_OR_MOBILE = 3; | |||
| PAGER = 4; | |||
| TOLL_FREE = 5; | |||
| PREMIUM_RATE = 6; | |||
| SHARED_COST = 7; | |||
| PERSONAL_NUMBER = 8; | |||
| VOIP = 9; | |||
| UAN = 10; | |||
| VOICEMAIL = 11; | |||
| } | |||
| // Enum names must match the element names in the XML metadata modulo casing. | |||
| // Unlike main metadata, these types are not required to be exclusive a number. | |||
| enum XmlShortcodeType { | |||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||
| // No data should actually ever have this value. | |||
| SC_UNKNOWN = 0; | |||
| // General short codes without a more specific representation (unlike | |||
| // generalDesc, which can just be the leading digits, this must be precise). | |||
| SC_SHORT_CODE = 1; | |||
| // Mutually exclusive sub-set of types for tariff. | |||
| SC_TOLL_FREE = 2; | |||
| SC_STANDARD_RATE = 3; | |||
| SC_PREMIUM_RATE = 4; | |||
| // Use-case types. | |||
| SC_CARRIER_SPECIFIC = 5; | |||
| SC_EMERGENCY = 6; | |||
| SC_EXPANDED_EMERGENCY = 7; | |||
| SC_SMS_SERVICES = 8; | |||
| } | |||
| @ -0,0 +1,134 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||
| import static org.junit.Assert.assertThrows; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class DigitSequenceTest { | |||
| @Test | |||
| public void testEmpty() { | |||
| Object e = DigitSequence.of(""); | |||
| assertThat(e).isSameInstanceAs(DigitSequence.empty()); | |||
| assertThat(DigitSequence.empty().length()).isEqualTo(0); | |||
| assertThrows(IndexOutOfBoundsException.class, () -> DigitSequence.empty().getDigit(0)); | |||
| assertThat(DigitSequence.empty().toString()).isEqualTo(""); | |||
| } | |||
| @Test | |||
| public void testCreate() { | |||
| DigitSequence s = DigitSequence.of("0123456789"); | |||
| assertThat(s).isEqualTo(DigitSequence.of("0123456789")); | |||
| assertThat(s).isNotEqualTo(DigitSequence.of("1111111111")); | |||
| } | |||
| @Test | |||
| public void testGetDigit() { | |||
| DigitSequence s = DigitSequence.of("0123456789"); | |||
| assertThat(s.length()).isEqualTo(10); | |||
| for (int n = 0; n < s.length(); n++) { | |||
| assertThat(s.getDigit(n)).isEqualTo(n); | |||
| } | |||
| assertThat(s.toString()).isEqualTo("0123456789"); | |||
| } | |||
| @Test | |||
| public void testBadArguments() { | |||
| assertThrows(NullPointerException.class, () -> DigitSequence.of(null)); | |||
| assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("123X")); | |||
| // Too long (19 digits). | |||
| assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("1234567890123456789")); | |||
| } | |||
| @Test | |||
| public void testMin() { | |||
| assertThat(domain().minValue()).isEqualTo(DigitSequence.empty()); | |||
| assertThat(domain().next(DigitSequence.empty())).isNotNull(); | |||
| assertThat(domain().previous(DigitSequence.empty())).isNull(); | |||
| } | |||
| @Test | |||
| public void testMax() { | |||
| DigitSequence max = DigitSequence.of("999999999999999999"); | |||
| assertThat(domain().maxValue()).isEqualTo(max); | |||
| assertThat(domain().previous(max)).isNotNull(); | |||
| assertThat(domain().next(max)).isNull(); | |||
| } | |||
| @Test | |||
| public void testDistance() { | |||
| assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("0"))) | |||
| .isEqualTo(1); | |||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("1"))) | |||
| .isEqualTo(1); | |||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("00"))) | |||
| .isEqualTo(10); | |||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("10"))) | |||
| .isEqualTo(20); | |||
| assertThat(domain().distance(DigitSequence.of("10"), DigitSequence.of("0"))) | |||
| .isEqualTo(-20); | |||
| assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("000000"))) | |||
| .isEqualTo(111111); | |||
| assertThat(domain().distance(DigitSequence.of("000"), DigitSequence.of("000000"))) | |||
| .isEqualTo(111000); | |||
| // Max distance is one less than the total number of digit sequences. | |||
| assertThat(domain().distance(domain().minValue(), domain().maxValue())) | |||
| .isEqualTo(1111111111111111110L); | |||
| } | |||
| @Test | |||
| public void testLexicographicalOrdering() { | |||
| testComparator( | |||
| DigitSequence.empty(), | |||
| DigitSequence.of("0"), | |||
| DigitSequence.of("1"), | |||
| DigitSequence.of("9"), | |||
| DigitSequence.of("00"), | |||
| DigitSequence.of("01"), | |||
| DigitSequence.of("10"), | |||
| DigitSequence.of("99"), | |||
| DigitSequence.of("000"), | |||
| DigitSequence.of("123"), | |||
| DigitSequence.of("124"), | |||
| DigitSequence.of("999")); | |||
| } | |||
| @Test | |||
| public void testExtend() { | |||
| assertThat(DigitSequence.empty().extendBy(0)).isEqualTo(DigitSequence.of("0")); | |||
| assertThat(DigitSequence.of("1234").extendBy(DigitSequence.of("5678"))) | |||
| .isEqualTo(DigitSequence.of("12345678")); | |||
| } | |||
| private static <T extends Comparable<T>> void testComparator(T... items) { | |||
| for (int i = 0; i < items.length; i++) { | |||
| assertThat(items[i]).isEqualTo(items[i]); | |||
| assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]); | |||
| for (int j = i + 1; j < items.length; j++) { | |||
| assertThat(items[i]).isNotEqualTo(items[j]); | |||
| assertThat(items[i]).isLessThan(items[j]); | |||
| assertThat(items[j]).isGreaterThan(items[i]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,213 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTree.empty; | |||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||
| import java.util.Arrays; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class PrefixTreeTest { | |||
| @Test | |||
| public void testNewInstancesNormalized() { | |||
| assertThat(prefixes("123", "1234")).containsExactly("123"); | |||
| assertThat(prefixes("70x", "7[1-9]")).containsExactly("7"); | |||
| // Regression test for b/68707522 | |||
| assertThat(prefixes("123xxx", "123x_xxx", "567xxx", "567x_xxx")).containsExactly("123", "567"); | |||
| } | |||
| @Test | |||
| public void testRetainFrom() { | |||
| PrefixTree prefix = prefixes("123", "124", "126", "555"); | |||
| RangeTree ranges = ranges("1xxxxxx", "5xxxxxx", "6xxxxxx"); | |||
| assertThat(prefix.retainFrom(ranges)).containsExactly("12[346]xxxx", "555xxxx"); | |||
| } | |||
| @Test | |||
| public void testPrefixes() { | |||
| PrefixTree prefix = prefixes("123", "124", "126", "555"); | |||
| assertThat(prefix.prefixes(seq("1230000"))).isTrue(); | |||
| assertThat(prefix.prefixes(seq("555000"))).isTrue(); | |||
| assertThat(prefix.prefixes(seq("12"))).isFalse(); | |||
| assertThat(prefix.prefixes(seq("120000"))).isFalse(); | |||
| } | |||
| @Test | |||
| public void testEmptyVsZeroLength() { | |||
| PrefixTree empty = PrefixTree.from(empty()); | |||
| PrefixTree zeroLength = prefixes("xxx"); | |||
| assertThat(empty).isEmpty(); | |||
| assertThat(zeroLength).isNotEmpty(); | |||
| assertThat(zeroLength).hasSize(1); | |||
| assertThat(zeroLength).containsExactly(RangeSpecification.empty()); | |||
| // While the empty prefix tree filters out everything, the zero length tree allows everything | |||
| // to pass. This is because the zero length prefix tree represents a single prefix of length | |||
| // zero and all digit sequences start with a zero length sub-sequence. | |||
| RangeTree ranges = ranges("12x", "3xx", "456"); | |||
| assertThat(empty.retainFrom(ranges)).isEqualTo(empty()); | |||
| assertThat(zeroLength.retainFrom(ranges)).isEqualTo(ranges); | |||
| } | |||
| @Test | |||
| public void testNoTrailingAnyPath() { | |||
| assertThat(prefixes("123xxx", "456xx", "789x")).containsExactly("123", "456", "789"); | |||
| } | |||
| @Test | |||
| public void testRangeAndPrefixSameLength() { | |||
| PrefixTree prefix = prefixes("1234"); | |||
| RangeTree ranges = ranges("xxxx"); | |||
| assertThat(prefix.retainFrom(ranges)).containsExactly("1234"); | |||
| } | |||
| @Test | |||
| public void testRangeShorterThanPrefix() { | |||
| PrefixTree prefix = prefixes("1234"); | |||
| RangeTree ranges = ranges("xxx"); | |||
| assertThat(prefix.retainFrom(ranges)).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testComplex() { | |||
| PrefixTree prefix = prefixes("[12]", "3x4x5", "67890", "987xx9"); | |||
| RangeTree ranges = ranges("x", "xx", "xxx", "1234xx", "234xxx", "3xx8xx", "67890"); | |||
| assertThat(prefix.retainFrom(ranges)) | |||
| .containsExactly("[12]", "[12]x", "[12]xx", "67890", "1234xx", "234xxx", "3x485x"); | |||
| } | |||
| @Test | |||
| public void testEmptyPrefixTree() { | |||
| // The empty filter filters everything out, since a filter operation is defined to return | |||
| // only ranges which are prefixed by an element in the filter (of which there are none). | |||
| assertThat(PrefixTree.from(empty()).retainFrom(ranges("12xxx"))).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testZeroLengthPrefix() { | |||
| // The non-empty prefix tree which contains a single prefix of zero length. This has no effect | |||
| // as a filter, since all ranges "have a zero length prefix". | |||
| PrefixTree prefix = PrefixTree.from(RangeTree.from(RangeSpecification.empty())); | |||
| RangeTree input = ranges("12xxx"); | |||
| assertThat(prefix.retainFrom(input)).isEqualTo(input); | |||
| } | |||
| @Test | |||
| public void testUnion() { | |||
| // Overlapping prefixes retain the more general (shorter) one. | |||
| assertThat(prefixes("1234").union(prefixes("12"))).containsExactly("12"); | |||
| // Indentical prefixes treated like normal union. | |||
| assertThat(prefixes("12").union(prefixes("12"))).containsExactly("12"); | |||
| // Non-overlapping prefixes treated like normal union. | |||
| assertThat(prefixes("123").union(prefixes("124"))).containsExactly("12[34]"); | |||
| // Complex case where prefixes are split into 2 lengths due to a partial overlap. | |||
| assertThat(prefixes("1234", "45", "800").union(prefixes("12", "4x67"))) | |||
| .containsExactly("12", "45", "4[0-46-9]67", "800"); | |||
| } | |||
| @Test | |||
| public void testIntersection() { | |||
| // Overlapping prefixes retain the more specific (longer) one. | |||
| assertThat(prefixes("1234").intersect(prefixes("12"))).containsExactly("1234"); | |||
| // Indentical prefixes treated like normal intersection. | |||
| assertThat(prefixes("12").intersect(prefixes("12"))).containsExactly("12"); | |||
| // Non-overlapping prefixes treated like normal intersection. | |||
| assertThat(prefixes("123").intersect(prefixes("124"))).isEmpty(); | |||
| // Unlike the union case, with intersection, only the longest prefix remains. | |||
| assertThat(prefixes("1234", "45x", "800").intersect(prefixes("12x", "4x67"))) | |||
| .containsExactly("1234", "4567"); | |||
| } | |||
| @Test | |||
| public void testTrim() { | |||
| assertThat(prefixes("1234").trim(3)).containsExactly("123"); | |||
| assertThat(prefixes("12").trim(3)).containsExactly("12"); | |||
| assertThat(prefixes("1234").trim(0)).containsExactly(RangeSpecification.empty()); | |||
| // Trimming can result in prefixes shorter than the stated length if by collapsing the original | |||
| // prefix tree you end up with trailing any digit sequences. | |||
| assertThat(prefixes("12[0-4]5", "12[5-9]").trim(3)).containsExactly("12"); | |||
| assertThat(prefixes("7001", "70[1-9]", "7[1-9]").trim(3)).containsExactly("7"); | |||
| } | |||
| @Test | |||
| public void testMinimal() { | |||
| // If there are no ranges to include, the minimal prefix is empty (matching nothing). | |||
| assertThat(PrefixTree.minimal(RangeTree.empty(), ranges("123x"), 0)).isEmpty(); | |||
| // If the prefix for the included ranges is the identity, then the result is the identity | |||
| // (after converting to a prefix, ranges like "xxx.." become the identity prefix). | |||
| assertThat(PrefixTree.minimal(ranges("xxxx"), ranges("123"), 0).isIdentity()).isTrue(); | |||
| // Without an exclude set, the prefix returned (at zero length) can just accept everything. | |||
| assertThat(PrefixTree.minimal(ranges("123x"), RangeTree.empty(), 0).isIdentity()).isTrue(); | |||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), ranges("13xx", "459x"), 0)) | |||
| .containsExactly("12", "456"); | |||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 1)).containsExactly("[14]"); | |||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 2)).containsExactly("12", "45"); | |||
| // Pick the shortest prefix when several suffice. | |||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 0)) | |||
| .containsExactly("12", "56"); | |||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 3)) | |||
| .containsExactly("12", "56"); | |||
| // When ranges are contested, split the prefix (only "12" is contested out of "1[2-4]"). | |||
| assertThat(PrefixTree.minimal(ranges("1[2-4]5xx", "189xx"), ranges("128xx"), 0)) | |||
| .containsExactly("125", "1[348]"); | |||
| // If the include range already prefixes an entire path of the exclude set, ignore that path. | |||
| // Here '12' (the shorter path) already captures '123', so '123' is ignored. | |||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5xxx"), 0)) | |||
| .containsExactly("1", "56"); | |||
| // Now all exclude paths are ignored, so you get the "identity" prefix that catches everything. | |||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5678"), 0)) | |||
| .containsExactly(""); | |||
| } | |||
| @Test | |||
| public void testMinimal_regression() { | |||
| // This is extracted from a real case in which the old algorithm would fail for this case. The | |||
| // "281xxxxxxx" path was necessary for failing since while visiting this, the old algorithm | |||
| // became "confused" and added an additional "250" path to the minimal prefix, meaning that | |||
| // the resulting range tree was "250", "250395". When this was turned into a prefix tree, the | |||
| // shorter, early terminating, path took precedence and the result was (incorrectly) "250". | |||
| assertThat( | |||
| PrefixTree.minimal( | |||
| ranges("250395xxxx"), | |||
| ranges("250[24-9]xxxxxx", "2503[0-8]xxxxx", "25039[0-46-9]xxxx", "281xxxxxxx"), | |||
| 3)) | |||
| .containsExactly("250395"); | |||
| } | |||
| private static DigitSequence seq(String s) { | |||
| return DigitSequence.of(s); | |||
| } | |||
| private static PrefixTree prefixes(String... specs) { | |||
| return PrefixTree.from(ranges(specs)); | |||
| } | |||
| private static RangeTree ranges(String... specs) { | |||
| return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse)); | |||
| } | |||
| } | |||
| @ -0,0 +1,308 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.parse; | |||
| import static java.util.Arrays.asList; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.common.collect.ImmutableRangeSet; | |||
| import com.google.common.collect.Range; | |||
| import com.google.common.collect.RangeSet; | |||
| import com.google.common.truth.Truth; | |||
| import java.util.Arrays; | |||
| import java.util.List; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class RangeSpecificationTest { | |||
| @Test | |||
| public void testParse() { | |||
| assertThat(parse("")).isSameInstanceAs(RangeSpecification.empty()); | |||
| assertThat(parse("0").toString()).isEqualTo("0"); | |||
| assertThat(parse("0").length()).isEqualTo(1); | |||
| assertThat(parse("01234").toString()).isEqualTo("01234"); | |||
| assertThat(parse("01234").length()).isEqualTo(5); | |||
| assertThat(parse("012[0-9]").toString()).isEqualTo("012x"); | |||
| assertThat(parse("012[0234789]xxx").toString()).isEqualTo("012[02-47-9]xxx"); | |||
| assertThat(parse("0_1_2").toString()).isEqualTo("012"); | |||
| assertThat(parse("0_12[3-8]_xxx_xxx").toString()).isEqualTo("012[3-8]xxxxxx"); | |||
| } | |||
| @Test | |||
| public void testParseBad() { | |||
| assertThrows(NullPointerException.class, () -> parse(null)); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("#")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[0-")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[0-]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[0--9]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[0..9]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[33]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[32]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[3-3]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("[3-2]")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("123[9-0]456")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("1234_")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("_1234")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("12__34")); | |||
| assertThrows(IllegalArgumentException.class, () -> parse("1[2_4]5")); | |||
| } | |||
| @Test | |||
| public void testSingleton() { | |||
| assertThat(RangeSpecification.singleton(asList(0, 1, 2, 4, 5, 7, 8, 9))) | |||
| .isEqualTo(parse("[0-2457-9]")); | |||
| } | |||
| @Test | |||
| public void testMatches() { | |||
| assertThat(RangeSpecification.empty().matches(DigitSequence.empty())).isTrue(); | |||
| assertAllMatch(parse("0"), "0"); | |||
| assertNoneMatch(parse("0"), "00", "1"); | |||
| assertAllMatch(parse("01234"), "01234"); | |||
| assertNoneMatch(parse("01234"), "01233", "01235"); | |||
| assertAllMatch(parse("012x"), "0120", "0125", "0129"); | |||
| assertNoneMatch(parse("012x"), "012", "0119", "0130", "01200"); | |||
| assertAllMatch(parse("012[3-689]xxx"), "0124000", "0128999"); | |||
| assertNoneMatch(parse("012[3-689]xxx"), "0122000", "0127999"); | |||
| } | |||
| @Test | |||
| public void testMinMax() { | |||
| assertThat(parse("123xxx").min()).isEqualTo(DigitSequence.of("123000")); | |||
| assertThat(parse("123xxx").max()).isEqualTo(DigitSequence.of("123999")); | |||
| assertThat(parse("1x[2-3]x4").min()).isEqualTo(DigitSequence.of("10204")); | |||
| assertThat(parse("1x[2-3]x4").max()).isEqualTo(DigitSequence.of("19394")); | |||
| } | |||
| @Test | |||
| public void testSequenceCount() { | |||
| assertThat(RangeSpecification.empty().getSequenceCount()).isEqualTo(1); | |||
| assertThat(parse("1xx").getSequenceCount()).isEqualTo(100); | |||
| assertThat(parse("1[2-46-8]x").getSequenceCount()).isEqualTo(60); | |||
| assertThat(parse("1xx[0-27-9]").getSequenceCount()).isEqualTo(600); | |||
| } | |||
| @Test | |||
| public void testFrom() { | |||
| assertThat(RangeSpecification.from(DigitSequence.empty())) | |||
| .isEqualTo(RangeSpecification.empty()); | |||
| assertThat(RangeSpecification.from(DigitSequence.of("1"))).isEqualTo(parse("1")); | |||
| assertThat(RangeSpecification.from(DigitSequence.of("1234"))).isEqualTo(parse("1234")); | |||
| } | |||
| @Test | |||
| public void testAny() { | |||
| assertThat(RangeSpecification.any(0)).isEqualTo(RangeSpecification.empty()); | |||
| assertThat(RangeSpecification.any(2)).isEqualTo(parse("xx")); | |||
| assertThat(RangeSpecification.any(10)).isEqualTo(parse("xxxxxxxxxx")); | |||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(-1)); | |||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(19)); | |||
| } | |||
| @Test | |||
| public void testFirst() { | |||
| RangeSpecification spec = parse("123[4-7]xxxx"); | |||
| assertThat(spec.first(3)).isEqualTo(parse("123")); | |||
| assertThat(spec.first(6)).isEqualTo(parse("123[4-7]xx")); | |||
| assertThat(spec.first(spec.length())).isSameInstanceAs(spec); | |||
| assertThat(spec.first(100)).isSameInstanceAs(spec); | |||
| assertThat(spec.first(0)).isEqualTo(RangeSpecification.empty()); | |||
| assertThrows(IllegalArgumentException.class, () -> spec.first(-1)); | |||
| } | |||
| @Test | |||
| public void testLast() { | |||
| RangeSpecification spec = parse("123[4-7]xxxx"); | |||
| assertThat(spec.last(3)).isEqualTo(parse("xxx")); | |||
| assertThat(spec.last(6)).isEqualTo(parse("3[4-7]xxxx")); | |||
| assertThat(spec.last(spec.length())).isSameInstanceAs(spec); | |||
| assertThat(spec.last(100)).isSameInstanceAs(spec); | |||
| assertThat(spec.last(0)).isEqualTo(RangeSpecification.empty()); | |||
| assertThrows(IllegalArgumentException.class, () -> spec.last(-1)); | |||
| } | |||
| @Test | |||
| public void testGetPrefix() { | |||
| assertThat(RangeSpecification.empty().getPrefix()).isEqualTo(RangeSpecification.empty()); | |||
| assertThat(parse("xxxx").getPrefix()).isEqualTo(RangeSpecification.empty()); | |||
| assertThat(parse("xx1x").getPrefix()).isEqualTo(parse("xx1")); | |||
| assertThat(parse("123[4-7]xxxx").getPrefix()).isEqualTo(parse("123[4-7]")); | |||
| } | |||
| @Test | |||
| public void testOrdering_simple() { | |||
| // For specifications representing a single DigitSequence, the ordering should be the same. | |||
| testComparator( | |||
| RangeSpecification.empty(), | |||
| parse("0"), | |||
| parse("00"), | |||
| parse("000"), | |||
| parse("01"), | |||
| parse("1"), | |||
| parse("10"), | |||
| parse("123"), | |||
| parse("124"), | |||
| parse("4111"), | |||
| parse("4200"), | |||
| parse("4555"), | |||
| parse("9"), | |||
| parse("99"), | |||
| parse("999")); | |||
| } | |||
| @Test | |||
| public void testOrdering_disjoint() { | |||
| // NOT the same as using the min() sequence for ordering (since "4555" > "4200" > "4111"). | |||
| testComparator( | |||
| parse("12xx"), | |||
| parse("13xx"), | |||
| parse("14xx"), | |||
| parse("1[5-8]00"), | |||
| parse("[2-3]xxx"), | |||
| parse("[4-6]555"), | |||
| parse("[45]111"), | |||
| parse("[45]2xx"), | |||
| parse("4999")); | |||
| } | |||
| @Test | |||
| public void testOrdering_overlapping() { | |||
| // Ordering for overlapping ranges is well defined but not particularly intuitive. | |||
| testComparator( | |||
| parse("01xxx"), | |||
| parse("01xx[0-5]"), | |||
| parse("01x0[0-5]"), | |||
| parse("01x00"), | |||
| parse("01[0-6]00"), | |||
| parse("01[2-7]xx"), | |||
| parse("01[2-7]00"), | |||
| parse("01[2-7]67"), | |||
| parse("01[4-9]00")); | |||
| } | |||
| @Test | |||
| public void testToString() { | |||
| assertThat(parse("0").toString()).isEqualTo("0"); | |||
| assertThat(parse("01234").toString()).isEqualTo("01234"); | |||
| assertThat(parse("012[3-4]").toString()).isEqualTo("012[34]"); | |||
| assertThat(parse("012[0-9]").toString()).isEqualTo("012x"); | |||
| assertThat(parse("012[3-689]xxx").toString()).isEqualTo("012[3-689]xxx"); | |||
| } | |||
| @Test | |||
| public void testBitmaskToString() { | |||
| assertThat(RangeSpecification.toString(1 << 0)).isEqualTo("0"); | |||
| assertThat(RangeSpecification.toString(1 << 9)).isEqualTo("9"); | |||
| assertThat(RangeSpecification.toString(0xF)).isEqualTo("[0-3]"); | |||
| assertThat(RangeSpecification.toString(0xF1)).isEqualTo("[04-7]"); | |||
| assertThat(RangeSpecification.toString(ALL_DIGITS_MASK)).isEqualTo("x"); | |||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0)); | |||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0x400)); | |||
| } | |||
| @Test | |||
| public void testRangeProcessing_singleBlock() { | |||
| Truth.assertThat(RangeSpecification.from(setOf(range("1200", "1299")))) | |||
| .isEqualTo(specs("12xx")); | |||
| } | |||
| @Test | |||
| public void testRangeProcessing_fullRange() { | |||
| Truth.assertThat(RangeSpecification.from(setOf(range("0000", "9999")))) | |||
| .isEqualTo(specs("xxxx")); | |||
| } | |||
| @Test | |||
| public void testRangeProcessing_edgeCases() { | |||
| Truth.assertThat(RangeSpecification.from(setOf(range("1199", "1300")))).isEqualTo(specs( | |||
| "1199", | |||
| "12xx", | |||
| "1300")); | |||
| } | |||
| @Test | |||
| public void testRangeProcessing_complex() { | |||
| Truth.assertThat(RangeSpecification.from(setOf(range("123", "45678")))).isEqualTo(specs( | |||
| "12[3-9]", | |||
| "1[3-9]x", | |||
| "[2-9]xx", | |||
| "xxxx", | |||
| "[0-3]xxxx", | |||
| "4[0-4]xxx", | |||
| "45[0-5]xx", | |||
| "456[0-6]x", | |||
| "4567[0-8]")); | |||
| } | |||
| @Test | |||
| public void testAsRanges_edgeCase() { | |||
| // The middle 2 ranges abut. | |||
| assertThat(RangeSpecification.parse("12[34][0189]x").asRanges()) | |||
| .containsExactly(range("12300", "12319"), range("12380", "12419"), range("12480", "12499")) | |||
| .inOrder(); | |||
| } | |||
| private static void assertAllMatch(RangeSpecification r, String... sequences) { | |||
| for (String digits : sequences) { | |||
| assertThat(r.matches(DigitSequence.of(digits))).isTrue(); | |||
| } | |||
| } | |||
| private static void assertNoneMatch(RangeSpecification r, String... sequences) { | |||
| for (String digits : sequences) { | |||
| assertThat(r.matches(DigitSequence.of(digits))).isFalse(); | |||
| } | |||
| } | |||
| List<RangeSpecification> specs(String... s) { | |||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||
| } | |||
| private static Range<DigitSequence> range(String lo, String hi) { | |||
| return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain()); | |||
| } | |||
| private static RangeSet<DigitSequence> setOf(Range<DigitSequence>... r) { | |||
| return ImmutableRangeSet.copyOf(Arrays.asList(r)); | |||
| } | |||
| private static <T extends Comparable<T>> void testComparator(T... items) { | |||
| for (int i = 0; i < items.length; i++) { | |||
| assertThat(items[i]).isEqualTo(items[i]); | |||
| assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]); | |||
| for (int j = i + 1; j < items.length; j++) { | |||
| assertThat(items[i]).isNotEqualTo(items[j]); | |||
| assertThat(items[i]).isLessThan(items[j]); | |||
| assertThat(items[j]).isGreaterThan(items[i]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @ -0,0 +1,101 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTree.empty; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.ALLOW_EDGE_SPLITTING; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES; | |||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.factor; | |||
| import java.util.List; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class RangeTreeFactorizerTest { | |||
| @Test | |||
| public void testEmpty() { | |||
| assertThat(factor(empty(), REQUIRE_EQUAL_EDGES)).isEmpty(); | |||
| assertThat(factor(empty(), ALLOW_EDGE_SPLITTING)).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testSimplePrefix() { | |||
| RangeTree t = ranges("123x", "123xx", "123xxx"); | |||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t); | |||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t); | |||
| } | |||
| @Test | |||
| public void testDisjointBranchesNotFactored() { | |||
| RangeTree t = ranges("123xxx", "124xx", "125x"); | |||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t); | |||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t); | |||
| } | |||
| @Test | |||
| public void testOverlappingBranchesAreFactored() { | |||
| RangeTree t = ranges("123xxx", "1234x", "1234", "123"); | |||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)) | |||
| .containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234")) | |||
| .inOrder(); | |||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)) | |||
| .containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234")) | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testStrategyDifference() { | |||
| // When factoring with REQUIRE_EQUAL_EDGES the [3-9] edge in the shorter path cannot be merged | |||
| // into the longer path of the first factor, since [3-4] already exists and is not equal to | |||
| // [3-9]. However since [3-4] is contained by [3-9], when we ALLOW_EDGE_SPLITTING, we can split | |||
| // the edge we are trying to merge to add paths for both [3-4] and [5-9]. This isn't always a | |||
| // win for regular expression length, and in fact for the most complex cases, | |||
| // REQUIRE_EQUAL_EDGES often ends up smaller. | |||
| RangeTree splittable = ranges("12[3-5]xx", "12[3-9]x"); | |||
| assertThat(factor(splittable, REQUIRE_EQUAL_EDGES)) | |||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[3-9]x")) | |||
| .inOrder(); | |||
| assertThat(factor(splittable, ALLOW_EDGE_SPLITTING)) | |||
| .containsExactly(ranges("12[3-5]xx", "12[3-9]x")); | |||
| // In this case, the [3-5] edge in the first factor in only a partial overlap with the [4-9] | |||
| // edge we are trying to merge in. Now both strategies will prefer to treat the shorter path | |||
| // as a separate factor, since there's no clean way to merge into the existing edge. | |||
| RangeTree unsplittable = ranges("12[3-5]xx", "12[4-9]x"); | |||
| assertThat(factor(unsplittable, REQUIRE_EQUAL_EDGES)) | |||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x")) | |||
| .inOrder(); | |||
| assertThat(factor(unsplittable, ALLOW_EDGE_SPLITTING)) | |||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x")) | |||
| .inOrder(); | |||
| // TODO: Find a non-complex example where REQUIRE_EQUAL_EDGES yeilds smaller regex. | |||
| // Approximately 50 out of the 1000+ regex's in the XML get smaller with REQUIRE_EQUAL_EDGES. | |||
| } | |||
| RangeTree ranges(String... s) { | |||
| return RangeTree.from(specs(s)); | |||
| } | |||
| List<RangeSpecification> specs(String... s) { | |||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||
| } | |||
| } | |||
| @ -0,0 +1,555 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||
| import static java.util.Arrays.asList; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.auto.value.AutoValue; | |||
| import com.google.common.base.Strings; | |||
| import com.google.common.collect.ImmutableRangeSet; | |||
| import com.google.common.collect.Range; | |||
| import com.google.common.collect.RangeSet; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||
| import java.util.ArrayList; | |||
| import java.util.Collections; | |||
| import java.util.List; | |||
| import java.util.Random; | |||
| import java.util.concurrent.ExecutionException; | |||
| import java.util.concurrent.ForkJoinPool; | |||
| import java.util.stream.Collectors; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class RangeTreeTest { | |||
| @Test | |||
| public void testEmptyTree() { | |||
| assertThat(RangeTree.empty()).containsExactly(); | |||
| assertThat(RangeTree.empty()).hasSize(0); | |||
| } | |||
| @Test | |||
| public void testEmptySequenceTree() { | |||
| // The tree that matches a zero length input is a perfectly valid range tree (zero length input | |||
| // is perfectly valid input). This is very distinct from the empty tree, which cannot match any | |||
| // input. It's not used very often, but it is well defined. | |||
| RangeTree r = RangeTree.from(RangeSpecification.empty()); | |||
| assertThat(r).containsExactly(RangeSpecification.empty()); | |||
| assertThat(r).hasSize(1); | |||
| } | |||
| @Test | |||
| public void testFromRangeSetSimple() { | |||
| // Single ranges produce minimal/canoncial range specifications. | |||
| RangeTree r = RangeTree.from(rangeSetOf(range("1000", "4999"))); | |||
| assertThat(r).containsExactly("[1-4]xxx"); | |||
| assertThat(r).hasSize(4000); | |||
| } | |||
| @Test | |||
| public void testFromRangeSetMinMax() { | |||
| RangeTree r = RangeTree.from(rangeSetOf(range("0000", "9999"))); | |||
| assertThat(r).containsExactly("xxxx"); | |||
| assertThat(r).hasSize(10000); | |||
| } | |||
| @Test | |||
| public void testFromRangeSetAllValues() { | |||
| // Just checking for any out-of-bounds issues at the end of the domain. | |||
| RangeTree r = RangeTree.from(rangeSetOf(range("0", domain().maxValue().toString()))); | |||
| assertThat(r).containsExactly( | |||
| "x", | |||
| "xx", | |||
| "xxx", | |||
| "xxxx", | |||
| "xxxxx", | |||
| "xxxxxx", | |||
| "xxxxxxx", | |||
| "xxxxxxxx", | |||
| "xxxxxxxxx", | |||
| "xxxxxxxxxx", | |||
| "xxxxxxxxxxx", | |||
| "xxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxxxxxx", | |||
| "xxxxxxxxxxxxxxxxxx"); | |||
| } | |||
| @Test | |||
| public void testContains() { | |||
| // The tree generated from the empty range specification actually contains one digit sequence | |||
| // (the empty one). This is not the same as RangeTree.empty() which really contains nothing. | |||
| assertThat(RangeTree.empty()).doesNotContain(""); | |||
| assertThat(RangeTree.from(RangeSpecification.empty())).contains(""); | |||
| assertThat(RangeTree.from(spec("x"))).contains("7"); | |||
| assertThat(RangeTree.from(spec("1"))).contains("1"); | |||
| assertThat(RangeTree.from(spec("1"))).doesNotContain("5"); | |||
| assertThat(RangeTree.from(spec("xx"))).contains("99"); | |||
| assertThat(RangeTree.from(spec("xx"))).doesNotContain("100"); | |||
| assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).contains("027617"); | |||
| } | |||
| @Test | |||
| public void testMatchCount() { | |||
| assertThat(RangeTree.empty()).hasSize(0); | |||
| assertThat(RangeTree.from(RangeSpecification.empty())).hasSize(1); | |||
| assertThat(RangeTree.from(spec("x"))).hasSize(10); | |||
| assertThat(RangeTree.from(spec("1"))).hasSize(1); | |||
| assertThat(RangeTree.from(spec("[123]"))).hasSize(3); | |||
| assertThat(RangeTree.from(spec("xx"))).hasSize(100); | |||
| assertThat(RangeTree.from(spec("[234]xx"))).hasSize(300); | |||
| assertThat(RangeTree.from(spec("1[234]xx"))).hasSize(300); | |||
| assertThat(RangeTree.from(spec("1[234][567]xx"))).hasSize(900); | |||
| assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).hasSize(2700); | |||
| } | |||
| @Test | |||
| public void testUnion() { | |||
| RangeTree a = ranges("12xx", "456xx"); | |||
| assertThat(a.union(a)).isEqualTo(a); | |||
| assertThat(a.union(RangeTree.empty())).isEqualTo(a); | |||
| assertThat(RangeTree.empty().union(a)).isEqualTo(a); | |||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||
| assertThat(a.union(b)).containsExactly("999", "12xx", "4xxxx"); | |||
| assertThat(b.union(a)).containsExactly("999", "12xx", "4xxxx"); | |||
| } | |||
| @Test | |||
| public void testIntersection() { | |||
| RangeTree a = ranges("12xx", "456xx"); | |||
| assertThat(a.intersect(a)).isEqualTo(a); | |||
| assertThat(a.intersect(RangeTree.empty())).isSameInstanceAs(RangeTree.empty()); | |||
| assertThat(RangeTree.empty().intersect(a)).isSameInstanceAs(RangeTree.empty()); | |||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||
| assertThat(a.intersect(b)).containsExactly("1234", "456xx"); | |||
| assertThat(b.intersect(a)).containsExactly("1234", "456xx"); | |||
| } | |||
| @Test | |||
| public void testSubtraction() { | |||
| RangeTree a = ranges("12xx", "456xx"); | |||
| assertThat(a.subtract(a)).isSameInstanceAs(RangeTree.empty()); | |||
| assertThat(a.subtract(RangeTree.empty())).isEqualTo(a); | |||
| assertThat(RangeTree.empty().subtract(a)).isSameInstanceAs(RangeTree.empty()); | |||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||
| assertThat(a.subtract(b)).containsExactly("12[0-24-9]x", "123[0-35-9]"); | |||
| assertThat(b.subtract(a)).containsExactly("999", "4[0-46-9]xxx", "45[0-57-9]xx"); | |||
| } | |||
| @Test | |||
| public void testContainsAll() { | |||
| RangeTree a = ranges("12[3-6]xx", "13[5-8]xx", "456xxxx"); | |||
| assertThat(a.containsAll(a)).isTrue(); | |||
| assertThat(a.containsAll(RangeTree.empty())).isTrue(); | |||
| assertThat(RangeTree.empty().containsAll(a)).isFalse(); | |||
| // Test branching, since 12.. and 13... are distinct branches but both contain ..[56][78]x | |||
| assertThat(a.containsAll(ranges("1[23][56][78]x", "4567890"))).isTrue(); | |||
| // Path 127.. is not contained. | |||
| assertThat(a.containsAll(ranges("12[357]xx"))).isFalse(); | |||
| // Hard to test for, but this should fail immediately (due to length mismatch). | |||
| assertThat(a.containsAll(ranges("123456"))).isFalse(); | |||
| // Check edge case for zero-length paths. | |||
| assertThat(ranges("", "1").containsAll(ranges(""))).isTrue(); | |||
| assertThat(RangeTree.empty().containsAll(ranges(""))).isFalse(); | |||
| } | |||
| @Test | |||
| public void testVennDiagram() { | |||
| // Test basic set-theoretic assumptions about the logical operations. | |||
| // In theory we could run this test with any non-disjoint pair of trees. | |||
| RangeTree a = ranges("12xx", "456xx"); | |||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||
| RangeTree intAB = a.intersect(b); | |||
| RangeTree subAB = a.subtract(b); | |||
| RangeTree subBA = b.subtract(a); | |||
| // (A\B) and (B\A) are disjoint with (A^B) and each other. | |||
| assertThat(subAB.intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||
| assertThat(subBA.intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||
| assertThat(subAB.intersect(subBA)).isSameInstanceAs(RangeTree.empty()); | |||
| // Even the union of (A\B) and (B\A) is disjoint to the intersection. | |||
| assertThat(subAB.union(subBA).intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||
| // (A\B) + (A^B) = A, (B\A) + (A^B) = B, (A\B) + (B\A) + (A^B) == (A+B) | |||
| assertThat(subAB.union(intAB)).isEqualTo(a); | |||
| assertThat(subBA.union(intAB)).isEqualTo(b); | |||
| assertThat(subAB.union(subBA).union(intAB)).isEqualTo(a.union(b)); | |||
| } | |||
| @Test | |||
| public void testFromRaggedRange() { | |||
| RangeTree r = RangeTree.from(rangeSetOf(range("123980", "161097"))); | |||
| // Very 'ragged' ranges produde a lot of range specifications. | |||
| assertThat(r).containsExactly( | |||
| "1239[8-9]x", | |||
| "12[4-9]xxx", | |||
| "1[3-5]xxxx", | |||
| "160xxx", | |||
| "1610[0-8]x", | |||
| "16109[0-7]"); | |||
| } | |||
| @Test | |||
| public void testComplexSpecsToSimpleRange() { | |||
| List<RangeSpecification> specs = specs( | |||
| "12[3-9]", | |||
| "1[3-9]x", | |||
| "[2-9]xx", | |||
| "xxxx", | |||
| "[0-3]xxxx", | |||
| "4[0-4]xxx", | |||
| "45[0-5]xx", | |||
| "456[0-6]x", | |||
| "4567[0-8]"); | |||
| RangeTree r = RangeTree.from(specs); | |||
| assertThat(r).containsExactly(specs); | |||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf(range("123", "45678"))); | |||
| } | |||
| @Test | |||
| public void testAsRangeSetMultipleGroups() { | |||
| // The range specification has 4 ranges, one each for the four 123x prefixes. | |||
| RangeTree r = ranges("012[3-58][2-7]x"); | |||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf( | |||
| range("012320", "012379"), | |||
| range("012420", "012479"), | |||
| range("012520", "012579"), | |||
| range("012820", "012879"))); | |||
| } | |||
| @Test | |||
| public void testAsRangeSetMerging() { | |||
| // In isolation, the first specification represents two range, and the second represents one. | |||
| RangeTree r = ranges("12[3-4][7-9]x", "125[0-5]x"); | |||
| // The range ending 12499 merges with the range starting 12500, giving 2 rather than 3 ranges. | |||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf( | |||
| range("12370", "12399"), | |||
| range("12470", "12559"))); | |||
| } | |||
| @Test | |||
| public void testVisitor() { | |||
| // Carefully construct DFA so depth first visitation order is just incrementing from 0. | |||
| RangeTree r = ranges("012", "345", "367", "3689"); | |||
| TestVisitor v = new TestVisitor(); | |||
| r.accept(v); | |||
| DfaNode initial = r.getInitial(); | |||
| DfaNode terminal = RangeTree.getTerminal(); | |||
| assertThat(v.visited).hasSize(10); | |||
| // Edges 0 & 3 leave the initial state, edges 2,5,7,9 reach the terminal. | |||
| assertThat(v.visited.stream().map(Edge::source).filter(initial::equals).count()).isEqualTo(2); | |||
| assertThat(v.visited.stream().map(Edge::target).filter(terminal::equals).count()).isEqualTo(4); | |||
| // Check expected edge value masks. | |||
| for (int n = 0; n < 10; n++) { | |||
| assertThat(v.visited.get(n).digitMask()).isEqualTo(1 << n); | |||
| } | |||
| } | |||
| @Test | |||
| public void testMin() { | |||
| assertThrows(IllegalStateException.class, () -> RangeTree.empty().first()); | |||
| assertThat(RangeTree.from(RangeSpecification.empty()).first()).isEqualTo(DigitSequence.empty()); | |||
| RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx"); | |||
| assertThat(tree.first()).isEqualTo(DigitSequence.of("600")); | |||
| assertThat(tree.subtract(ranges("[6-8]xx")).first()).isEqualTo(DigitSequence.of("900")); | |||
| assertThat(tree.subtract(ranges("xxx")).first()).isEqualTo(DigitSequence.of("8000")); | |||
| assertThat(tree.subtract(ranges("xxx", "8[0-6]xx")).first()) | |||
| .isEqualTo(DigitSequence.of("8700")); | |||
| assertThat(tree.subtract(ranges("xxx", "xxxx")).first()).isEqualTo(DigitSequence.of("10000")); | |||
| } | |||
| @Test | |||
| public void testSample() { | |||
| assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(0)); | |||
| assertThat(RangeTree.from(RangeSpecification.empty()).sample(0)) | |||
| .isEqualTo(DigitSequence.empty()); | |||
| RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx"); | |||
| // sometimes iteration looks ordered ... | |||
| assertThat(tree.sample(0)).isEqualTo(DigitSequence.of("10000")); | |||
| assertThat(tree.sample(1)).isEqualTo(DigitSequence.of("10001")); | |||
| assertThat(tree.sample(10)).isEqualTo(DigitSequence.of("10010")); | |||
| // but in general sample(n).next() != sample(n+1) | |||
| assertThat(tree.sample(49999)).isEqualTo(DigitSequence.of("59999")); | |||
| assertThat(tree.sample(50000)).isEqualTo(DigitSequence.of("600")); | |||
| assertThat(tree.sample(50001)).isEqualTo(DigitSequence.of("60000")); | |||
| assertThat(tree.sample(tree.size() - 1)).isEqualTo(DigitSequence.of("9999")); | |||
| assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(tree.size())); | |||
| } | |||
| @Test | |||
| public void testSignificantDigits() { | |||
| RangeTree ranges = ranges("123xx", "14567", "789"); | |||
| assertThat(ranges.significantDigits(3)).containsExactly("123xx", "145xx", "789"); | |||
| assertThat(ranges.significantDigits(2)).containsExactly("12xxx", "14xxx", "78x"); | |||
| assertThat(ranges.significantDigits(1)).containsExactly("1xxxx", "7xx"); | |||
| assertThat(ranges.significantDigits(0)).containsExactly("xxxxx", "xxx"); | |||
| } | |||
| @Test | |||
| public void testPrefixWith() { | |||
| RangeTree ranges = ranges("123xx", "456x"); | |||
| assertThat(ranges.prefixWith(spec("00"))).isEqualTo(ranges("00123xx", "00456x")); | |||
| assertThat(ranges.prefixWith(RangeSpecification.empty())).isSameInstanceAs(ranges); | |||
| // The prefixing of an empty tree is empty (all paths that exist been prefixed correctly). | |||
| assertThat(RangeTree.empty().prefixWith(spec("00"))).isEqualTo(RangeTree.empty()); | |||
| } | |||
| @Test | |||
| public void testSlicing() { | |||
| RangeTree ranges = ranges("", "1", "123", "125xx", "456x"); | |||
| assertThat(ranges.slice(1)).isEqualTo(ranges("[14]")); | |||
| assertThat(ranges.slice(2)).isEqualTo(ranges("12", "45")); | |||
| assertThat(ranges.slice(3)).isEqualTo(ranges("12[35]", "456")); | |||
| assertThat(ranges.slice(4)).isEqualTo(ranges("125x", "456x")); | |||
| assertThat(ranges.slice(2, 4)).isEqualTo(ranges("123", "125x", "456x")); | |||
| assertThat(ranges.slice(0, 5)).isEqualTo(ranges); | |||
| } | |||
| @Test | |||
| public void testSerializingRealWorldExample() { | |||
| List<RangeSpecification> expected = specs( | |||
| "11[2-7]xxxxxxx", | |||
| "12[0-249][2-7]xxxxxx", | |||
| "12[35-8]x[2-7]xxxxx", | |||
| "13[0-25][2-7]xxxxxx", | |||
| "13[346-9]x[2-7]xxxxx", | |||
| "14[145][2-7]xxxxxx", | |||
| "14[236-9]x[2-7]xxxxx", | |||
| "1[59][0235-9]x[2-7]xxxxx", | |||
| "1[59][14][2-7]xxxxxx", | |||
| "16[014][2-7]xxxxxx", | |||
| "16[235-9]x[2-7]xxxxx", | |||
| "17[1257][2-7]xxxxxx", | |||
| "17[34689]x[2-7]xxxxx", | |||
| "18[01346][2-7]xxxxxx", | |||
| "18[257-9]x[2-7]xxxxx", | |||
| "2[02][2-7]xxxxxxx", | |||
| "21[134689]x[2-7]xxxxx", | |||
| "21[257][2-7]xxxxxx", | |||
| "23[013][2-7]xxxxxx", | |||
| "23[24-8]x[2-7]xxxxx", | |||
| "24[01][2-7]xxxxxx", | |||
| "24[2-8]x[2-7]xxxxx", | |||
| "25[0137][2-7]xxxxxx", | |||
| "25[25689]x[2-7]xxxxx", | |||
| "26[0158][2-7]xxxxxx", | |||
| "26[2-4679]x[2-7]xxxxx", | |||
| "27[13-79]x[2-7]xxxxx", | |||
| "278[2-7]xxxxxx", | |||
| "28[1568][2-7]xxxxxx", | |||
| "28[2-479]x[2-7]xxxxx", | |||
| "29[14][2-7]xxxxxx", | |||
| "29[235-9]x[2-7]xxxxx", | |||
| "301x[2-7]xxxxx", | |||
| "31[79]x[2-7]xxxxx", | |||
| "32[1-5]x[2-7]xxxxx", | |||
| "326[2-7]xxxxxx", | |||
| "33[2-7]xxxxxxx", | |||
| "34[13][2-7]xxxxxx", | |||
| "342[0189][2-7]xxxxx", | |||
| "342[2-7]xxxxxx", | |||
| "34[5-8]x[2-7]xxxxx", | |||
| "35[125689]x[2-7]xxxxx", | |||
| "35[34][2-7]xxxxxx", | |||
| "36[01489][2-7]xxxxxx", | |||
| "36[235-7]x[2-7]xxxxx", | |||
| "37[02-46][2-7]xxxxxx", | |||
| "37[157-9]x[2-7]xxxxx", | |||
| "38[159][2-7]xxxxxx", | |||
| "38[2-467]x[2-7]xxxxx", | |||
| "4[04][2-7]xxxxxxx", | |||
| "41[14578]x[2-7]xxxxx", | |||
| "41[36][2-7]xxxxxx", | |||
| "42[1-47][2-7]xxxxxx", | |||
| "42[5689]x[2-7]xxxxx", | |||
| "43[15][2-7]xxxxxx", | |||
| "43[2-467]x[2-7]xxxxx", | |||
| "45[12][2-7]xxxxxx", | |||
| "45[4-7]x[2-7]xxxxx", | |||
| "46[0-26-9][2-7]xxxxxx", | |||
| "46[35]x[2-7]xxxxx", | |||
| "47[0-24-9][2-7]xxxxxx", | |||
| "473x[2-7]xxxxx", | |||
| "48[013-57][2-7]xxxxxx", | |||
| "48[2689]x[2-7]xxxxx", | |||
| "49[014-7][2-7]xxxxxx", | |||
| "49[2389]x[2-7]xxxxx", | |||
| "51[025][2-7]xxxxxx", | |||
| "51[146-9]x[2-7]xxxxx", | |||
| "52[14-8]x[2-7]xxxxx", | |||
| "522[2-7]xxxxxx", | |||
| "53[1346]x[2-7]xxxxx", | |||
| "53[25][2-7]xxxxxx", | |||
| "54[14-69]x[2-7]xxxxx", | |||
| "54[28][2-7]xxxxxx", | |||
| "55[12][2-7]xxxxxx", | |||
| "55[46]x[2-7]xxxxx", | |||
| "56[146-9]x[2-7]xxxxx", | |||
| "56[25][2-7]xxxxxx", | |||
| "571[2-7]xxxxxx", | |||
| "57[2-4]x[2-7]xxxxx", | |||
| "581[2-7]xxxxxx", | |||
| "58[2-8]x[2-7]xxxxx", | |||
| "59[15][2-7]xxxxxx", | |||
| "59[246]x[2-7]xxxxx", | |||
| "61[1358]x[2-7]xxxxx", | |||
| "612[2-7]xxxxxx", | |||
| "621[2-7]xxxxxx", | |||
| "62[2457]x[2-7]xxxxx", | |||
| "631[2-7]xxxxxx", | |||
| "63[2-4]x[2-7]xxxxx", | |||
| "641[2-7]xxxxxx", | |||
| "64[235-7]x[2-7]xxxxx", | |||
| "65[17][2-7]xxxxxx", | |||
| "65[2-689]x[2-7]xxxxx", | |||
| "66[13][2-7]xxxxxx", | |||
| "66[24578]x[2-7]xxxxx", | |||
| "671[2-7]xxxxxx", | |||
| "67[235689]x[2-7]xxxxx", | |||
| "674[0189][2-7]xxxxx", | |||
| "674[2-7]xxxxxx", | |||
| "680[2-7]xxxxxx", | |||
| "68[1-6]x[2-7]xxxxx", | |||
| "71[013-9]x[2-7]xxxxx", | |||
| "712[2-7]xxxxxx", | |||
| "72[0235-9]x[2-7]xxxxx", | |||
| "72[14][2-7]xxxxxx", | |||
| "73[134][2-7]xxxxxx", | |||
| "73[2679]x[2-7]xxxxx", | |||
| "74[1-35689]x[2-7]xxxxx", | |||
| "74[47][2-7]xxxxxx", | |||
| "75[15][2-7]xxxxxx", | |||
| "75[2-46-9]x[2-7]xxxxx", | |||
| "7[67][02-9]x[2-7]xxxxx", | |||
| "7[67]1[2-7]xxxxxx", | |||
| "78[013-7]x[2-7]xxxxx", | |||
| "782[0-6][2-7]xxxxx", | |||
| "788[0189][2-7]xxxxx", | |||
| "788[2-7]xxxxxx", | |||
| "79[0189]x[2-7]xxxxx", | |||
| "79[2-7]xxxxxxx", | |||
| "80[2-467]xxxxxxx", | |||
| "81[1357-9]x[2-7]xxxxx", | |||
| "816[2-7]xxxxxx", | |||
| "82[014][2-7]xxxxxx", | |||
| "82[235-8]x[2-7]xxxxx", | |||
| "83[03-57-9]x[2-7]xxxxx", | |||
| "83[126][2-7]xxxxxx", | |||
| "84[0-24-9]x[2-7]xxxxx", | |||
| "85xx[2-7]xxxxx", | |||
| "86[136][2-7]xxxxxx", | |||
| "86[2457-9]x[2-7]xxxxx", | |||
| "87[078][2-7]xxxxxx", | |||
| "87[1-6]x[2-7]xxxxx", | |||
| "88[1256]x[2-7]xxxxx", | |||
| "88[34][2-7]xxxxxx", | |||
| "891[2-7]xxxxxx", | |||
| "89[2-4]x[2-7]xxxxx"); | |||
| RangeTree t1 = RangeTree.from(expected); | |||
| assertThat(t1).containsExactly(expected); | |||
| assertThat(RangeTree.from(t1.asRangeSet())).containsExactly(expected); | |||
| } | |||
| @Test | |||
| public void testThreadSafety() throws ExecutionException, InterruptedException { | |||
| // For 10^5 this takes ~500ms. For 10^6 it starts to take non-trivial time (~10 seconds). | |||
| int numDigits = 5; | |||
| // At 1000 threads this starts to take non-trivial time. | |||
| int numThreads = 100; | |||
| // Collect 10^N ranges from "00..." to "99...", all distinct. | |||
| List<RangeTree> ranges = Stream | |||
| .iterate(DigitSequence.zeros(numDigits), DigitSequence::next) | |||
| .limit((int) Math.pow(10, numDigits)) | |||
| .map(RangeTreeTest::singletonRange) | |||
| .collect(Collectors.toCollection(ArrayList::new)); | |||
| Collections.shuffle(ranges, new Random(1234L)); | |||
| // Recombining all 10^N ranges should give a single combined block (i.e. "xx..."). Doing it | |||
| // with high parallelism should test the thread safety of the concurrent interning map. | |||
| RangeTree combined = new ForkJoinPool(numThreads) | |||
| .submit(() -> ranges.parallelStream().reduce(RangeTree.empty(), RangeTree::union)) | |||
| .get(); | |||
| assertThat(combined).isEqualTo(ranges(Strings.repeat("x", numDigits))); | |||
| } | |||
| @AutoValue | |||
| abstract static class Edge { | |||
| static Edge of(DfaNode source, DfaNode target, DfaEdge edge) { | |||
| return new AutoValue_RangeTreeTest_Edge(source, target, edge.getDigitMask()); | |||
| } | |||
| abstract DfaNode source(); | |||
| abstract DfaNode target(); | |||
| abstract int digitMask(); | |||
| } | |||
| // Range tree visitor that captures edges visited (in depth first order) | |||
| private static final class TestVisitor implements DfaVisitor { | |||
| List<Edge> visited = new ArrayList<>(); | |||
| @Override | |||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||
| visited.add(Edge.of(source, target, edge)); | |||
| target.accept(this); | |||
| } | |||
| } | |||
| RangeTree ranges(String... s) { | |||
| return RangeTree.from(specs(s)); | |||
| } | |||
| private static RangeSpecification spec(String s) { | |||
| return RangeSpecification.parse(s); | |||
| } | |||
| private static List<RangeSpecification> specs(String... s) { | |||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||
| } | |||
| private static Range<DigitSequence> range(String lo, String hi) { | |||
| return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain()); | |||
| } | |||
| private static RangeSet<DigitSequence> rangeSetOf(Range<DigitSequence>... r) { | |||
| return ImmutableRangeSet.copyOf(asList(r)); | |||
| } | |||
| private static RangeTree singletonRange(DigitSequence s) { | |||
| return RangeTree.from(spec(s.toString())); | |||
| } | |||
| } | |||
| @ -0,0 +1,57 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.common.truth.Truth8.assertThat; | |||
| import static org.junit.Assert.assertThrows; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class PhoneRegionTest { | |||
| @Test | |||
| public void testOrdering() { | |||
| assertThat(Stream.of(r("US"), r("GB"), r("AE"), r("001"), r("KR"), r("MN")).sorted()) | |||
| .containsAtLeast(r("AE"), r("GB"), r("KR"), r("MN"), r("US"), r("001")) | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testWorld() { | |||
| assertThat(PhoneRegion.getWorld()).isEqualTo(r("001")); | |||
| } | |||
| @Test | |||
| public void testBadArgs() { | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("ABC"))) | |||
| .hasMessageThat() | |||
| .contains("ABC"); | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("us"))) | |||
| .hasMessageThat() | |||
| .contains("us"); | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("000"))) | |||
| .hasMessageThat() | |||
| .contains("000"); | |||
| } | |||
| private static PhoneRegion r(String cldrCode) { | |||
| return PhoneRegion.of(cldrCode); | |||
| } | |||
| } | |||
| @ -0,0 +1,42 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static org.junit.Assert.assertThrows; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class SimpleLanguageTagTest { | |||
| @Test | |||
| public void testSimple() { | |||
| assertThat(SimpleLanguageTag.of("en").toString()).isEqualTo("en"); | |||
| assertThat(SimpleLanguageTag.of("zh_Hant").toString()).isEqualTo("zh-Hant"); | |||
| } | |||
| @Test | |||
| public void testBadArgs() { | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("x"))) | |||
| .hasMessageThat().contains("x"); | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("EN"))) | |||
| .hasMessageThat().contains("EN"); | |||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("003"))) | |||
| .hasMessageThat().contains("003"); | |||
| } | |||
| } | |||
| @ -0,0 +1,82 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.common.truth.Truth8.assertThat; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||
| import java.util.Optional; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class AltFormatSpecTest { | |||
| @Test | |||
| public void testSimple() { | |||
| FormatTemplate template = FormatTemplate.parse("XXXX XXXX"); | |||
| RangeSpecification prefix = RangeSpecification.parse("123"); | |||
| AltFormatSpec spec = AltFormatSpec.create(template, prefix, "foo", Optional.of("Comment")); | |||
| assertThat(spec.template()).isEqualTo(template); | |||
| assertThat(spec.prefix()).isEqualTo(prefix); | |||
| assertThat(spec.parentFormatId()).isEqualTo("foo"); | |||
| assertThat(spec.comment()).hasValue("Comment"); | |||
| assertThat(spec.specifier()).isEqualTo("123X XXXX"); | |||
| } | |||
| @Test | |||
| public void testGoodTemplateAndPrefix() { | |||
| assertGoodTemplateAndPrefix("XXX XXX", "", "XXX XXX"); | |||
| assertGoodTemplateAndPrefix("XXX XXX", "123", "123 XXX"); | |||
| assertGoodTemplateAndPrefix("XXX XXX", "1234", "123 4XX"); | |||
| assertGoodTemplateAndPrefix("XXX XXX", "123456", "123 456"); | |||
| assertGoodTemplateAndPrefix("XXX XXX**", "123", "123 XXX**"); | |||
| assertGoodTemplateAndPrefix("XXX XXX", "12[3-6]", "12[3-6] XXX"); | |||
| assertGoodTemplateAndPrefix("XXX XXX", "1x3", "1X3 XXX"); | |||
| } | |||
| @Test | |||
| public void testBadTemplateOrPrefix() { | |||
| // Prefix too long. | |||
| assertBadTemplateAndPrefix("XXXX", "12345"); | |||
| // Prefix too long for min length. | |||
| assertBadTemplateAndPrefix("XXXX**", "12345"); | |||
| // Bad template chars. | |||
| assertBadTemplateAndPrefix("XXX-XXX", "123"); | |||
| // Extra whitespace. | |||
| assertBadTemplateAndPrefix(" XXXXXX", "123"); | |||
| // Prefix must not end with "any digit". | |||
| assertBadTemplateAndPrefix(" XXXXXX", "123xx"); | |||
| } | |||
| private static void assertGoodTemplateAndPrefix(String template, String prefix, String spec) { | |||
| FormatTemplate t = FormatTemplate.parse(template); | |||
| RangeSpecification p = RangeSpecification.parse(prefix); | |||
| assertThat(AltFormatSpec.create(t, p, "foo", Optional.empty()).specifier()).isEqualTo(spec); | |||
| } | |||
| private static void assertBadTemplateAndPrefix(String template, String prefix) { | |||
| FormatTemplate t = FormatTemplate.parse(template); | |||
| RangeSpecification p = RangeSpecification.parse(prefix); | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> AltFormatSpec.create(t, p, "foo", Optional.empty())); | |||
| } | |||
| } | |||
| @ -0,0 +1,111 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.base.Joiner; | |||
| import com.google.common.base.Splitter; | |||
| import com.google.common.collect.ImmutableList; | |||
| import java.io.IOException; | |||
| import java.io.StringReader; | |||
| import java.io.StringWriter; | |||
| import java.util.Arrays; | |||
| import java.util.List; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class AltFormatsSchemaTest { | |||
| @Test | |||
| public void testSimple_export() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| altFormat("123 XXX XXXX", "foo", "Hello World"))) | |||
| .containsExactly( | |||
| "Format ; Parent Format ; Comment", | |||
| "123 XXX XXXX ; foo ; \"Hello World\"") | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testSimple_import() throws IOException { | |||
| assertThat( | |||
| importCsv( | |||
| "Format ; Parent Format ; Comment", | |||
| "123 XXX XXXX ; foo ; \"Hello World\"")) | |||
| .containsExactly( | |||
| altFormat("123 XXX XXXX", "foo", "Hello World")); | |||
| } | |||
| @Test | |||
| public void testEscapedText_export() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\"))) | |||
| .containsExactly( | |||
| "Format ; Parent Format ; Comment", | |||
| "123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\"") | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testEscapedText_import() throws IOException { | |||
| assertThat( | |||
| importCsv( | |||
| "Format ; Parent Format ; Comment", | |||
| "123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\"")) | |||
| .containsExactly( | |||
| altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\")); | |||
| } | |||
| @Test | |||
| public void testRetainsExplicitOrdering() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| altFormat("123 XXXXXX", "foo", "First"), | |||
| altFormat("XX XXXX", "bar", "Second"), | |||
| altFormat("9X XXX XXX", "baz", "Third"))) | |||
| .containsExactly( | |||
| "Format ; Parent Format ; Comment", | |||
| "123 XXXXXX ; foo ; \"First\"", | |||
| "XX XXXX ; bar ; \"Second\"", | |||
| "9X XXX XXX ; baz ; \"Third\"") | |||
| .inOrder(); | |||
| } | |||
| private AltFormatSpec altFormat(String spec, String parentId, String comment) { | |||
| return AltFormatsSchema.parseAltFormat(spec, parentId, comment); | |||
| } | |||
| private static List<String> exportCsv(AltFormatSpec... altFormats) throws IOException { | |||
| try (StringWriter out = new StringWriter()) { | |||
| AltFormatsSchema.exportCsv(out, Arrays.asList(altFormats)); | |||
| // Ignore trailing empty lines. | |||
| return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString())); | |||
| } | |||
| } | |||
| private static ImmutableList<AltFormatSpec> importCsv(String... lines) | |||
| throws IOException { | |||
| // Add a trailing newline, since that's what we expect in the real CSV files. | |||
| StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n"); | |||
| return AltFormatsSchema.importAltFormats(file); | |||
| } | |||
| } | |||
| @ -0,0 +1,156 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.anchor; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE; | |||
| import com.google.common.base.CharMatcher; | |||
| import com.google.common.base.Joiner; | |||
| import com.google.common.base.Splitter; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||
| import java.io.IOException; | |||
| import java.io.StringReader; | |||
| import java.io.StringWriter; | |||
| import java.util.Arrays; | |||
| import java.util.List; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class CommentsSchemaTest { | |||
| private static final PhoneRegion REGION_US = PhoneRegion.of("US"); | |||
| private static final PhoneRegion REGION_CA = PhoneRegion.of("CA"); | |||
| private static final Anchor US_TOP = Comment.anchor(REGION_US); | |||
| private static final Anchor US_FIXED_LINE = anchor(REGION_US, XML_FIXED_LINE); | |||
| private static final Anchor US_MOBILE = anchor(REGION_US, XML_MOBILE); | |||
| private static final Anchor US_SHORTCODE = Comment.shortcodeAnchor(REGION_US); | |||
| private static final Anchor CA_FIXED_LINE = anchor(REGION_CA, XML_FIXED_LINE); | |||
| @Test | |||
| public void testSimple_export() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| comment(US_FIXED_LINE, "Hello World"))) | |||
| .containsExactly( | |||
| "Region ; Label ; Comment", | |||
| "US ; XML_FIXED_LINE ; \"Hello World\"") | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testSimple_import() throws IOException { | |||
| assertThat( | |||
| importCsv( | |||
| "Region ; Label ; Comment", | |||
| "US ; XML_FIXED_LINE ; \"Hello World\"")) | |||
| .containsExactly( | |||
| comment(US_FIXED_LINE, "Hello World")); | |||
| } | |||
| @Test | |||
| public void testEscapedText_export() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| comment(US_FIXED_LINE, "\tHello", "World\\"))) | |||
| .containsExactly( | |||
| "Region ; Label ; Comment", | |||
| "US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\"") | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testEscapedText_import() throws IOException { | |||
| assertThat( | |||
| importCsv( | |||
| "Region ; Label ; Comment", | |||
| "US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\"")) | |||
| .containsExactly( | |||
| comment(US_FIXED_LINE, "\tHello", "World\\")); | |||
| } | |||
| @Test | |||
| public void testOrdering_export() throws IOException { | |||
| assertThat( | |||
| exportCsv( | |||
| comment(US_FIXED_LINE, "First"), | |||
| comment(US_FIXED_LINE, "Second"), | |||
| comment(US_FIXED_LINE, "Third"), | |||
| comment(US_TOP, "Top Level Comment"), | |||
| comment(US_SHORTCODE, "Shortcode Comment"), | |||
| comment(US_MOBILE, "Other Type"), | |||
| comment(CA_FIXED_LINE, "Other Region"))) | |||
| .containsExactly( | |||
| "Region ; Label ; Comment", | |||
| "CA ; XML_FIXED_LINE ; \"Other Region\"", | |||
| "US ; SC ; \"Shortcode Comment\"", | |||
| "US ; XML ; \"Top Level Comment\"", | |||
| "US ; XML_FIXED_LINE ; \"First\"", | |||
| "US ; XML_FIXED_LINE ; \"Second\"", | |||
| "US ; XML_FIXED_LINE ; \"Third\"", | |||
| "US ; XML_MOBILE ; \"Other Type\"") | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testOrdering_import() throws IOException { | |||
| assertThat( | |||
| importCsv( | |||
| "Region ; Label ; Comment", | |||
| "US ; XML_FIXED_LINE ; \"First\"", | |||
| "US ; XML_FIXED_LINE ; \"Second\"", | |||
| "US ; XML_FIXED_LINE ; \"Third\"", | |||
| "US ; XML ; \"Top Level Comment\"", | |||
| "US ; SC ; \"Shortcode Comment\"", | |||
| "US ; XML_MOBILE ; \"Other Type\"", | |||
| "CA ; XML_FIXED_LINE ; \"Other Region\"")) | |||
| .containsExactly( | |||
| comment(CA_FIXED_LINE, "Other Region"), | |||
| comment(US_SHORTCODE, "Shortcode Comment"), | |||
| comment(US_TOP, "Top Level Comment"), | |||
| comment(US_FIXED_LINE, "First"), | |||
| comment(US_FIXED_LINE, "Second"), | |||
| comment(US_FIXED_LINE, "Third"), | |||
| comment(US_MOBILE, "Other Type")) | |||
| .inOrder(); | |||
| } | |||
| private Comment comment(Anchor a, String... lines) { | |||
| return Comment.create(a, Arrays.asList(lines)); | |||
| } | |||
| private static List<String> exportCsv(Comment... comments) throws IOException { | |||
| try (StringWriter out = new StringWriter()) { | |||
| CommentsSchema.exportCsv(out, Arrays.asList(comments)); | |||
| // Ignore trailing empty lines. | |||
| return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString())); | |||
| } | |||
| } | |||
| private static ImmutableList<Comment> importCsv(String... lines) | |||
| throws IOException { | |||
| // Add a trailing newline, since that's what we expect in the real CSV files. | |||
| StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n"); | |||
| return CommentsSchema.importComments(file); | |||
| } | |||
| } | |||
| @ -0,0 +1,160 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.model; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.common.truth.Truth8.assertThat; | |||
| import static java.util.Optional.empty; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||
| import java.util.Optional; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class FormatSpecTest { | |||
| @Test | |||
| public void testCreate_national() { | |||
| national("XXXX"); | |||
| national("XXX***"); | |||
| national("#XXX XXX"); | |||
| national("(#XXX) XX**-XXX"); | |||
| assertThat(national("XX\\XXX").national().skeleton()).isEqualTo("$1X$2"); | |||
| } | |||
| @Test | |||
| public void testCreate_international() { | |||
| // The international spec can be a duplicate (signifies international formatting is permitted). | |||
| international("XXX XXXX", "XXX XXXX"); | |||
| // Or it can be different (including grouping and separators). | |||
| international("(#XXX) XXXX", "XXX-XXXX"); | |||
| } | |||
| @Test | |||
| public void testCreate_carrier() { | |||
| carrier("# XXX XXXX", "# @ XXX XXXX"); | |||
| carrier("XXX XXXX", "@ XXX XXXX"); | |||
| // Carrier and national prefix can differ on whether national prefix is needed. | |||
| carrier("XXX XXXX", "#@ XXX XXXX"); | |||
| } | |||
| @Test | |||
| public void testCreate_national_bad() { | |||
| assertThrows(IllegalArgumentException.class, () -> national("")); | |||
| assertThrows(IllegalArgumentException.class, () -> national("Hello")); | |||
| assertThrows(IllegalArgumentException.class, () -> national("$1")); | |||
| assertThrows(IllegalArgumentException.class, () -> national("XX**XX")); | |||
| assertThrows(IllegalArgumentException.class, () -> national("****")); | |||
| assertThrows(IllegalArgumentException.class, () -> national("@ XXX")); | |||
| } | |||
| @Test | |||
| public void testCreate_international_bad() { | |||
| // National prefix is not allowed. | |||
| assertThrows(IllegalArgumentException.class, () -> international("#XXXX", "#XXXX")); | |||
| // Groups must match. | |||
| assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XX XX")); | |||
| assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XXX")); | |||
| } | |||
| @Test | |||
| public void testCreate_carrier_bad() { | |||
| // Carrier specs must have '@' present. | |||
| assertThrows(IllegalArgumentException.class, () -> carrier("XXX XXXX", "XXX XXXX")); | |||
| // Carrier specs cannot differ after the first group (including separator). | |||
| assertThrows(IllegalArgumentException.class, () -> carrier("#XXX XXXX", "#@XXX-XXXX")); | |||
| // National prefix (if present) must come first (if this is ever relaxed, we would need to | |||
| // change how carrier prefixes are handled and how nationalPrefixForParsing is generated). | |||
| assertThrows(IllegalArgumentException.class, () -> carrier("# XXX XXXX", "@# XXX XXXX")); | |||
| } | |||
| @Test | |||
| public void testTemplate_splitPrefix() { | |||
| FormatTemplate t = FormatTemplate.parse("(#) XXX - XXX**"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{3,5})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$1 - $2"); | |||
| assertThat(t.getXmlPrefix()).hasValue("($NP) $FG"); | |||
| assertThat(t.hasNationalPrefix()).isTrue(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| @Test | |||
| public void testTemplate_noPrefix() { | |||
| FormatTemplate t = FormatTemplate.parse("XXX XX-XX"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$1 $2-$3"); | |||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| @Test | |||
| public void testTemplate_replacementNoNationalPrefix() { | |||
| FormatTemplate t = FormatTemplate.parse("{XXX>123} XX-XX"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$2-$3"); | |||
| assertThat(t.getXmlPrefix()).hasValue("123 $FG"); | |||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| @Test | |||
| public void testTemplate_replacementWithNationalPrefix() { | |||
| FormatTemplate t = FormatTemplate.parse("#{XXX>123} XX-XX"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$2-$3"); | |||
| assertThat(t.getXmlPrefix()).hasValue("$NP123 $FG"); | |||
| assertThat(t.hasNationalPrefix()).isTrue(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| @Test | |||
| public void testTemplate_replacementNotFirstGroup() { | |||
| FormatTemplate t = FormatTemplate.parse("XXX {XX>ABC} XX"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$1 ABC $3"); | |||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| @Test | |||
| public void testTemplate_removeFirstGroupViaReplacement() { | |||
| // This test is very important for Argentina, where the leading group must be removed (and a | |||
| // different mobile token is used after the area code). | |||
| FormatTemplate t = FormatTemplate.parse("{XX>}XXX XXXX"); | |||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{2})(\\d{3})(\\d{4})"); | |||
| assertThat(t.getXmlFormat()).isEqualTo("$2 $3"); | |||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||
| assertThat(t.hasCarrierCode()).isFalse(); | |||
| } | |||
| private static FormatSpec national(String national) { | |||
| return FormatSpec.of(national, empty(), empty(), empty(), false, empty()); | |||
| } | |||
| private static FormatSpec international(String national, String intl) { | |||
| return FormatSpec.of(national, empty(), Optional.of(intl), empty(), false, empty()); | |||
| } | |||
| private static FormatSpec carrier(String national, String carrier) { | |||
| return FormatSpec.of(national, Optional.of(carrier), empty(), empty(), false, empty()); | |||
| } | |||
| } | |||
| @ -0,0 +1,70 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.common.truth.Truth8.assertThat; | |||
| import static org.junit.Assert.assertThrows; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class AssignmentTest { | |||
| private static final Column<String> COL_A = Column.ofString("A"); | |||
| private static final Column<String> COL_B = Column.ofString("B"); | |||
| private static final Column<Integer> COL_X = Column.ofUnsignedInteger("X"); | |||
| private static final Schema SCHEMA = Schema.builder().add(COL_A).add(COL_B).add(COL_X).build(); | |||
| @Test | |||
| public void testParsing() { | |||
| assertAssignment(Assignment.parse("A=foo", SCHEMA), COL_A, "foo"); | |||
| assertAssignment(Assignment.parse(" B = bar ", SCHEMA), COL_B, "bar"); | |||
| assertUnassignment(Assignment.parse("A=", SCHEMA), COL_A); | |||
| assertAssignment(Assignment.parse("X=23", SCHEMA), COL_X, 23); | |||
| assertThrows(IllegalArgumentException.class, () -> Assignment.parse("C=Nope", SCHEMA)); | |||
| assertThrows(IllegalArgumentException.class, () -> Assignment.parse("X=NaN", SCHEMA)); | |||
| } | |||
| @Test | |||
| public void testOf() { | |||
| assertAssignment(Assignment.of(COL_A, "foo"), COL_A, "foo"); | |||
| assertThat(Assignment.of(COL_A, "foo")).isNotEqualTo(Assignment.of(COL_A, "bar")); | |||
| assertThat(Assignment.of(COL_A, "")).isNotEqualTo(Assignment.of(COL_B, "")); | |||
| assertThat(Assignment.of(COL_A, COL_A.defaultValue())).isNotEqualTo(Assignment.unassign(COL_A)); | |||
| assertThrows(NullPointerException.class, () -> Assignment.of(COL_A, null)); | |||
| } | |||
| @Test | |||
| public void testUnassign() { | |||
| // Not much else to do here... | |||
| assertThat(Assignment.unassign(COL_A)).isEqualTo(Assignment.unassign(COL_A)); | |||
| assertUnassignment(Assignment.unassign(COL_A), COL_A); | |||
| } | |||
| private static <T extends Comparable<T>> void assertAssignment( | |||
| Assignment<?> a, Column<T> c, T v) { | |||
| assertThat(a.column()).isSameInstanceAs(c); | |||
| assertThat(a.value()).hasValue(v); | |||
| } | |||
| private static void assertUnassignment(Assignment<?> a, Column<?> c) { | |||
| assertThat(a.column()).isSameInstanceAs(c); | |||
| assertThat(a.value()).isEmpty(); | |||
| } | |||
| } | |||
| @ -0,0 +1,71 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||
| import static java.util.Arrays.asList; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import java.util.Arrays; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class ChangeTest { | |||
| private static final Column<String> COL_A = Column.ofString("A"); | |||
| private static final Column<String> COL_B = Column.ofString("B"); | |||
| @Test | |||
| public void testEmpty() { | |||
| assertThat(Change.empty().getRanges()).isEmpty(); | |||
| assertThat(Change.empty().getAssignments()).isEmpty(); | |||
| // Not all "no-op" changes are equal to the "empty" change (unlike RangeTree). This should be | |||
| // fine however since Changes are expected to have a very short lifecycle in most code and not | |||
| // be used as keys in maps etc... | |||
| assertThat(Change.empty()) | |||
| .isNotEqualTo(Change.builder(RangeTree.empty()).assign(COL_A, "foo").build()); | |||
| assertThat(Change.empty()).isNotEqualTo(Change.builder(ranges("12xxxx")).build()); | |||
| } | |||
| @Test | |||
| public void testBuilder() { | |||
| Change c = Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_B, "bar").build(); | |||
| assertThat(c.getRanges()).containsExactly("12xxxx"); | |||
| Assignment<String> assignFoo = Assignment.of(COL_A, "foo"); | |||
| Assignment<String> assignBar = Assignment.of(COL_B, "bar"); | |||
| assertThat(c.getAssignments()).containsExactly(assignFoo, assignBar); | |||
| assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(assignFoo, assignBar))); | |||
| // Don't allow same column twice (this could be relaxed in future if necessary)! | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_A, "bar").build()); | |||
| } | |||
| @Test | |||
| public void testBuilderUnassignment() { | |||
| Change c = Change.builder(ranges("12xxxx")).unassign(COL_A).build(); | |||
| Assignment<String> unassign = Assignment.unassign(COL_A); | |||
| assertThat(c.getAssignments()).containsExactly(unassign); | |||
| assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(unassign))); | |||
| } | |||
| private static RangeTree ranges(String... rangeSpecs) { | |||
| return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse)); | |||
| } | |||
| } | |||
| @ -0,0 +1,58 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class ColumnGroupTest { | |||
| @Test | |||
| public void testGroupColumns() { | |||
| Column<Boolean> prototype = Column.ofBoolean("Region"); | |||
| ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype); | |||
| Column<Boolean> us = group.getColumnFromId("US"); | |||
| assertThat(us.getName()).isEqualTo("Region:US"); | |||
| assertThat(us.type()).isEqualTo(Boolean.class); | |||
| Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA")); | |||
| assertThat(ca.getName()).isEqualTo("Region:CA"); | |||
| // Only the suffix part should be given to get the column from the group. | |||
| assertThrows(IllegalArgumentException.class, () -> group.getColumnFromId("Region:US")); | |||
| } | |||
| @Test | |||
| public void testExtractGroupColumns() { | |||
| Column<String> first = Column.ofString("FirstColumn"); | |||
| Column<String> last = Column.ofString("LastColumn"); | |||
| Column<Boolean> prototype = Column.ofBoolean("Region"); | |||
| ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype); | |||
| Column<Boolean> us = group.getColumnFromId("US"); | |||
| Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA")); | |||
| // The prototype is a valid column, but it's not part of its own group. | |||
| assertThat(group.extractGroupColumns(ImmutableSet.of(first, us, prototype, ca, last))) | |||
| .containsExactly(PhoneRegion.of("US"), us, PhoneRegion.of("CA"), ca).inOrder(); | |||
| } | |||
| } | |||
| @ -0,0 +1,93 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN; | |||
| import static java.lang.Boolean.FALSE; | |||
| import static java.lang.Boolean.TRUE; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class ColumnTest { | |||
| @Test | |||
| public void testBooleanColumn() { | |||
| Column<Boolean> column = Column.ofBoolean("bool"); | |||
| assertThat(column.getName()).isEqualTo("bool"); | |||
| assertThat(column.type()).isEqualTo(Boolean.class); | |||
| assertThat(column.cast(true)).isTrue(); | |||
| assertThrows(ClassCastException.class, () -> column.cast("")); | |||
| // All upper or all lower case are accepted. | |||
| assertThat(column.parse("true")).isTrue(); | |||
| assertThat(column.parse("false")).isFalse(); | |||
| assertThat(column.parse("TRUE")).isTrue(); | |||
| assertThat(column.parse("FALSE")).isFalse(); | |||
| assertThat(column.serialize(TRUE)).isEqualTo("true"); | |||
| assertThat(column.serialize(FALSE)).isEqualTo("false"); | |||
| // We're lenient, but not that lenient. | |||
| assertThrows(IllegalArgumentException.class, () -> column.parse("TruE")); | |||
| assertThrows(IllegalArgumentException.class, () -> column.parse("FaLse")); | |||
| assertThrows(IllegalArgumentException.class, () -> Column.ofBoolean("Foo:Bar")); | |||
| } | |||
| @Test | |||
| public void testStringColumn() { | |||
| Column<String> column = Column.ofString("string"); | |||
| assertThat(column.getName()).isEqualTo("string"); | |||
| assertThat(column.type()).isEqualTo(String.class); | |||
| assertThat(column.cast("hello")).isEqualTo("hello"); | |||
| assertThat(column.parse("")).isNull(); | |||
| assertThrows(ClassCastException.class, () -> column.cast(true)); | |||
| // Anything other than the empty string is permitted. | |||
| assertThat(column.parse("world")).isEqualTo("world"); | |||
| assertThat(column.serialize("world")).isEqualTo("world"); | |||
| // Unquoted whitespace is stripped. | |||
| assertThat(column.parse(" world ")).isEqualTo("world"); | |||
| // You can preserve whitespace by surrounding the string with double quotes. | |||
| assertThat(column.parse("\" world \"")).isEqualTo(" world "); | |||
| assertThat(column.serialize(" world ")).isEqualTo("\" world \""); | |||
| // And null is always the empty string. | |||
| assertThat(column.serialize(null)).isEqualTo(""); | |||
| assertThrows(IllegalArgumentException.class, () -> Column.ofString("Foo:Bar")); | |||
| } | |||
| @Test | |||
| public void testEnumColumn() { | |||
| Column<ValidNumberType> column = Column.of(ValidNumberType.class, "type", UNKNOWN); | |||
| assertThat(column.getName()).isEqualTo("type"); | |||
| assertThat(column.type()).isEqualTo(ValidNumberType.class); | |||
| assertThat(column.cast(FIXED_LINE)).isEqualTo(FIXED_LINE); | |||
| assertThrows(ClassCastException.class, () -> column.cast("")); | |||
| // Several case formats are supported. | |||
| assertThat(column.parse("FIXED_LINE")).isEqualTo(FIXED_LINE); | |||
| assertThat(column.parse("fixed_line")).isEqualTo(FIXED_LINE); | |||
| assertThat(column.parse("fixedLine")).isEqualTo(FIXED_LINE); | |||
| // We're lenient, but not that lenient. | |||
| assertThrows(IllegalArgumentException.class, () -> column.parse("fIxEdLiNe")); | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> Column.of(XmlNumberType.class, "Foo:Bar", XML_UNKNOWN)); | |||
| } | |||
| } | |||
| @ -0,0 +1,177 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||
| import java.util.ArrayList; | |||
| import java.util.List; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class CsvParserTest { | |||
| @Test | |||
| public void testSimple() { | |||
| // Simplest case. | |||
| assertSingleRow(CsvParser.commaSeparated(), "Hello,World!", "Hello", "World!"); | |||
| // Empty row yields one empty value in the "first column" (matches behaviour with quoting). | |||
| assertSingleRow(CsvParser.commaSeparated(), "", ""); | |||
| assertSingleRow(CsvParser.commaSeparated(), "\"\"", ""); | |||
| // Trailing delimiter yields a trailing empty value (matches behaviour with quoting). | |||
| assertSingleRow(CsvParser.commaSeparated(), "foo,", "foo", ""); | |||
| assertSingleRow(CsvParser.commaSeparated(), "foo,\"\"", "foo", ""); | |||
| } | |||
| @Test | |||
| public void testOtherDelimiters() { | |||
| // Tabs sequences are not "folded" (maybe this could be an option?) | |||
| assertSingleRow(CsvParser.tabSeparated(), "Hello\t\tWorld!", "Hello", "", "World!"); | |||
| assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!"); | |||
| } | |||
| @Test | |||
| public void testWhitespaceTrimming() { | |||
| // Whitespace is preserved by default, but can be trimmed. | |||
| assertSingleRow(CsvParser.commaSeparated(), | |||
| " foo, bar, baz ", " foo", " bar", " baz "); | |||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||
| " foo, bar, baz ", "foo", "bar", "baz"); | |||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||
| " foo, , ", "foo", "", ""); | |||
| } | |||
| @Test | |||
| public void testQuoting() { | |||
| // Quoting works as expected (and combines with whitespace trimming). | |||
| assertSingleRow(CsvParser.commaSeparated(), | |||
| "\"foo\",\"\"\"bar, baz\"\"\"", "foo", "\"bar, baz\""); | |||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||
| " \"foo\" , \"\"\"bar, baz\"\"\" ", "foo", "\"bar, baz\""); | |||
| } | |||
| @Test | |||
| public void testQuoting_illegal() { | |||
| // Without whitespace trimming any quotes in "unquoted" values are not permitted. | |||
| assertThrows(IllegalArgumentException.class, () -> | |||
| parse(CsvParser.commaSeparated(), "foo, \"bar, baz\"")); | |||
| } | |||
| @Test | |||
| public void testDelimiter() { | |||
| assertSingleRow(CsvParser.tabSeparated(), "Hello\tWorld!", "Hello", "World!"); | |||
| assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!"); | |||
| } | |||
| @Test | |||
| public void testUnicode() { | |||
| assertSingleRow(CsvParser.withSeparator('-'), "😱-😂-💩", "😱", "😂", "💩"); | |||
| assertSingleRow(CsvParser.commaSeparated(), "\0,😱😂,\n", "\0", "😱😂", "\n"); | |||
| // Fun fact, not all ISO control codes count as "whitespace". | |||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), "\0,😱😂,\n", "\0", "😱😂", ""); | |||
| } | |||
| @Test | |||
| public void testMultiline() { | |||
| // Newlines become literals in quoted values. | |||
| List<List<String>> rows = parse(CsvParser.commaSeparated().allowMultiline(), | |||
| "foo,\"Hello,", | |||
| "World!\""); | |||
| assertThat(rows).hasSize(1); | |||
| assertThat(rows.get(0)).containsExactly("foo", "Hello,\nWorld!").inOrder(); | |||
| } | |||
| @Test | |||
| public void testMultilineWithTrimming() { | |||
| List<List<String>> rows = parse( | |||
| CsvParser.commaSeparated().allowMultiline().trimWhitespace(), | |||
| " foo , \" Hello,", | |||
| "World! \" "); | |||
| assertThat(rows).hasSize(1); | |||
| assertThat(rows.get(0)).containsExactly("foo", " Hello,\nWorld! ").inOrder(); | |||
| } | |||
| @Test | |||
| public void testMultiline_illegal() { | |||
| // If not configured for multiline values, this is an unterminated quoted value. | |||
| assertThrows(IllegalArgumentException.class, () -> | |||
| parse(CsvParser.commaSeparated(), "foo,\"Hello,", "World!\"")); | |||
| // This fails because no more lines exist (even if multiline is allowed) | |||
| assertThrows(IllegalArgumentException.class, () -> | |||
| parse(CsvParser.commaSeparated().allowMultiline(), "foo,\"Hello,")); | |||
| } | |||
| @Test | |||
| public void testRowMapping() { | |||
| List<ImmutableMap<String, String>> rows = parseMap( | |||
| CsvParser.commaSeparated(), | |||
| rowMapper(), | |||
| "FOO,BAR", | |||
| "foo,bar", | |||
| "Hello,World!", | |||
| "No Trailing,", | |||
| ",", | |||
| ""); | |||
| assertThat(rows).hasSize(5); | |||
| assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder(); | |||
| assertThat(rows.get(1)).containsExactly("FOO", "Hello", "BAR", "World!").inOrder(); | |||
| assertThat(rows.get(2)).containsExactly("FOO", "No Trailing").inOrder(); | |||
| assertThat(rows.get(3)).isEmpty(); | |||
| assertThat(rows.get(4)).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testRowMapping_withHeader() { | |||
| List<String> header = new ArrayList<>(); | |||
| List<ImmutableMap<String, String>> rows = parseMap( | |||
| CsvParser.commaSeparated(), | |||
| rowMapper(header::addAll), | |||
| "FOO,BAR", | |||
| "foo,bar"); | |||
| assertThat(rows).hasSize(1); | |||
| assertThat(header).containsExactly("FOO", "BAR").inOrder(); | |||
| assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder(); | |||
| } | |||
| private void assertSingleRow(CsvParser parser, String line, String... values) { | |||
| List<List<String>> rows = parse(parser, line); | |||
| assertThat(rows).hasSize(1); | |||
| assertThat(rows.get(0)).containsExactlyElementsIn(values).inOrder(); | |||
| } | |||
| private static List<List<String>> parse(CsvParser parser, String... lines) { | |||
| List<List<String>> rows = new ArrayList<>(); | |||
| parser.parse(Stream.of(lines), r -> rows.add(r.collect(toImmutableList()))); | |||
| return rows; | |||
| } | |||
| private static List<ImmutableMap<String, String>> parseMap( | |||
| CsvParser p, RowMapper mapper, String... lines) { | |||
| List<ImmutableMap<String, String>> rows = new ArrayList<>(); | |||
| p.parse(Stream.of(lines), mapper.mapTo(rows::add)); | |||
| return rows; | |||
| } | |||
| } | |||
| @ -0,0 +1,275 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.AREA_CODE_LENGTH; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.COMMENT; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE_OR_MOBILE; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.MOBILE; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.FORMAT; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.REGIONS; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TABLE_COLUMNS; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TYPE; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toCsv; | |||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toRangeTable; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.ALL; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.CHANGES; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.LHS; | |||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.RHS; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.common.collect.HashBasedTable; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.Table; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema; | |||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import java.io.IOException; | |||
| import java.io.PrintWriter; | |||
| import java.io.StringReader; | |||
| import java.io.StringWriter; | |||
| import java.util.Optional; | |||
| import java.util.stream.IntStream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class CsvTableTest { | |||
| private static final CsvKeyMarshaller<String> TEST_MARSHALLER = | |||
| CsvKeyMarshaller.ofSortedString("Id"); | |||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||
| @Test | |||
| public void testRangeTableExport() throws IOException { | |||
| ImmutableList<Column<?>> columns = | |||
| ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_CA, REGION_US, COMMENT); | |||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||
| .apply(row(columns, key("1", 7), MOBILE, 0, true, true)) | |||
| .apply(row(columns, key("2x[34]", 7, 8), FIXED_LINE_OR_MOBILE, 0, true, null, "Foo Bar")) | |||
| .apply(row(columns, key("345", 8), FIXED_LINE, 3, true, null)) | |||
| .apply(row(columns, key("456x8", 8), FIXED_LINE, 3, null, true)) | |||
| .build(); | |||
| CsvTable<RangeKey> csv = toCsv(table); | |||
| assertCsv(csv, | |||
| "Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment", | |||
| "1 ; 7 ; MOBILE ; 0 ; \"CA,US\"", | |||
| "2x[34] ; 7,8 ; FIXED_LINE_OR_MOBILE ; 0 ; \"CA\" ; \"Foo Bar\"", | |||
| "345 ; 8 ; FIXED_LINE ; 3 ; \"CA\"", | |||
| "456x8 ; 8 ; FIXED_LINE ; 3 ; \"US\""); | |||
| assertThat(toRangeTable(csv)).isEqualTo(table); | |||
| } | |||
| @Test | |||
| public void testExampleNumberExport() throws IOException { | |||
| Table<PhoneRegion, ValidNumberType, DigitSequence> table = HashBasedTable.create(); | |||
| table.put(PhoneRegion.of("US"), ValidNumberType.TOLL_FREE, DigitSequence.of("800123456")); | |||
| table.put(PhoneRegion.of("US"), ValidNumberType.PREMIUM_RATE, DigitSequence.of("945123456")); | |||
| table.put(PhoneRegion.of("CA"), ValidNumberType.MOBILE, DigitSequence.of("555123456")); | |||
| // Ordering is well defined in the CSV output. | |||
| // TODO: Consider making columns able to identify if their values need CSV escaping. | |||
| CsvTable<ExampleNumberKey> csv = ExamplesTableSchema.toCsv(table); | |||
| assertCsv(csv, | |||
| "Region ; Type ; Number", | |||
| "CA ; MOBILE ; \"555123456\"", | |||
| "US ; TOLL_FREE ; \"800123456\"", | |||
| "US ; PREMIUM_RATE ; \"945123456\""); | |||
| assertThat(ExamplesTableSchema.toExampleTable(csv)).isEqualTo(table); | |||
| } | |||
| @Test | |||
| public void testDiff() throws IOException { | |||
| ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT); | |||
| RangeTable lhs = RangeTable.builder(TABLE_COLUMNS) | |||
| .apply(row(columns, key("1", 6), "Left Side Only")) | |||
| .apply(row(columns, key("3", 6), "Left Value")) | |||
| .apply(row(columns, key("4", 6), "Same Value")) | |||
| .build(); | |||
| RangeTable rhs = RangeTable.builder(TABLE_COLUMNS) | |||
| .apply(row(columns, key("2", 6), "Right Side Only")) | |||
| .apply(row(columns, key("3", 6), "Right Value")) | |||
| .apply(row(columns, key("4", 6), "Same Value")) | |||
| .build(); | |||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), ALL), | |||
| "Diff ; Prefix ; Length ; Comment", | |||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||
| ">>>> ; 3 ; 6 ; \"Right Value\"", | |||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), CHANGES), | |||
| "Diff ; Prefix ; Length ; Comment", | |||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||
| ">>>> ; 3 ; 6 ; \"Right Value\""); | |||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), LHS), | |||
| "Diff ; Prefix ; Length ; Comment", | |||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), RHS), | |||
| "Diff ; Prefix ; Length ; Comment", | |||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||
| ">>>> ; 3 ; 6 ; \"Right Value\"", | |||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||
| } | |||
| @Test | |||
| public void testEscaping() throws IOException { | |||
| ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT); | |||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||
| .apply(row(columns, key("1", 6), "Doubling \" Double Quotes")) | |||
| .apply(row(columns, key("2", 6), "Escaping \n Newlines")) | |||
| .apply(row(columns, key("3", 6), "Other \t \\ \r Escaping")) | |||
| .build(); | |||
| assertCsv(toCsv(table), | |||
| "Prefix ; Length ; Comment", | |||
| "1 ; 6 ; \"Doubling \"\" Double Quotes\"", | |||
| "2 ; 6 ; \"Escaping \\n Newlines\"", | |||
| "3 ; 6 ; \"Other \\t \\\\ \\r Escaping\""); | |||
| } | |||
| @Test | |||
| public void testOrdering() throws IOException { | |||
| // This came up in relation to discovering that ImmutableSet.copyOf(TreeBasedTable) does not | |||
| // result in rows/columns in the order of the TreeBasedTable's column comparator. Hence the | |||
| // code does a copy via a temporary ImmutableTable.Builder. | |||
| ImmutableList<Column<?>> columns = | |||
| ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_US, COMMENT); | |||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||
| .apply(row(columns, key("1", 4), null, null, null, "Foo Bar")) | |||
| .apply(row(columns, key("2", 4), null, null, true)) | |||
| .apply(row(columns, key("3", 4), null, 2)) | |||
| .apply(row(columns, key("4", 4), MOBILE)) | |||
| .build(); | |||
| CsvTable<RangeKey> csv = toCsv(table); | |||
| assertCsv( | |||
| csv, | |||
| "Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment", | |||
| "1 ; 4 ; ; ; ; \"Foo Bar\"", | |||
| "2 ; 4 ; ; ; \"US\"", | |||
| "3 ; 4 ; ; 2", | |||
| "4 ; 4 ; MOBILE"); | |||
| assertThat(toRangeTable(csv)).isEqualTo(table); | |||
| } | |||
| // This is (Jan 2019) currently impossible using ImmutableTable. | |||
| @Test | |||
| public void testOptionalRowOrdering() throws IOException { | |||
| CsvKeyMarshaller<Integer> unorderedIntegerMarshaller = | |||
| new CsvKeyMarshaller<>( | |||
| n -> IntStream.of(n).boxed().map(Object::toString), | |||
| p -> Integer.parseInt(p.get(0)), | |||
| Optional.empty(), | |||
| "Unordered"); | |||
| CsvSchema<Integer> schema = | |||
| CsvSchema.of(unorderedIntegerMarshaller, RangesTableSchema.SCHEMA.columns()); | |||
| CsvTable.Builder<Integer> csv = CsvTable.builder(schema); | |||
| csv.putRow(4, ImmutableMap.of(COMMENT, "Foo Bar")); | |||
| csv.putRow(1, ImmutableMap.of(FORMAT, "Quux")); | |||
| csv.putRow(3, ImmutableMap.of(AREA_CODE_LENGTH, 2)); | |||
| csv.putRow(2, ImmutableMap.of(TYPE, MOBILE)); | |||
| assertCsv( | |||
| csv.build(), | |||
| "Unordered ; Type ; Area Code Length ; Format ; Comment", | |||
| "4 ; ; ; ; \"Foo Bar\"", | |||
| "1 ; ; ; \"Quux\"", | |||
| "3 ; ; 2", | |||
| "2 ; MOBILE"); | |||
| } | |||
| @Test | |||
| public void testUnsafeString() { | |||
| Column<String> unsafe = Column.ofString("unsafe"); | |||
| CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build()); | |||
| CsvTable<String> csv = | |||
| CsvTable.builder(schema).put("key", unsafe, "Control chars Not \0 Allowed").build(); | |||
| assertThrows(IllegalArgumentException.class, () -> export(csv, false)); | |||
| } | |||
| private enum Perverse { | |||
| UNSAFE_VALUE() { | |||
| @Override | |||
| public String toString() { | |||
| return "Unsafe ; for \n \"CSV\""; | |||
| } | |||
| }; | |||
| } | |||
| @Test | |||
| public void testPerverseEdgeCase() { | |||
| Column<Perverse> unsafe = Column.of(Perverse.class, "Unsafe", Perverse.UNSAFE_VALUE); | |||
| CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build()); | |||
| CsvTable<String> csv = | |||
| CsvTable.builder(schema).put("key", unsafe, Perverse.UNSAFE_VALUE).build(); | |||
| assertThrows(IllegalArgumentException.class, () -> export(csv, false)); | |||
| } | |||
| private static <K> void assertCsv(CsvTable<K> csv, String... lines) throws IOException { | |||
| String aligned = join(lines); | |||
| // Assumes test values don't contain semi-colons where space matters. | |||
| String unaligned = aligned.replaceAll(" *; *", ";"); | |||
| String exported = export(csv, true); | |||
| assertThat(exported).isEqualTo(aligned); | |||
| assertThat(export(csv, false)).isEqualTo(unaligned); | |||
| CsvTable<K> imported = CsvTable.importCsv(csv.getSchema(), new StringReader(exported)); | |||
| assertThat(csv).isEqualTo(imported); | |||
| } | |||
| private static String export(CsvTable<?> csv, boolean align) { | |||
| StringWriter out = new StringWriter(); | |||
| csv.exportCsv(new PrintWriter(out), align); | |||
| return out.toString(); | |||
| } | |||
| private static Change row(ImmutableList<Column<?>> columns, RangeKey key, Object... values) { | |||
| Change.Builder row = Change.builder(key.asRangeTree()); | |||
| checkArgument(values.length <= columns.size()); | |||
| int n = 0; | |||
| for (Object v : values) { | |||
| if (v != null) { | |||
| Column<?> c = columns.get(n); | |||
| row.assign(c, c.cast(v)); | |||
| } | |||
| n++; | |||
| } | |||
| return row.build(); | |||
| } | |||
| private static String join(String... lines) { | |||
| return String.join("\n", lines) + "\n"; | |||
| } | |||
| private static RangeKey key(String spec, Integer... lengths) { | |||
| RangeSpecification prefix = | |||
| spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec); | |||
| return RangeKey.create(prefix, ImmutableSet.copyOf(lengths)); | |||
| } | |||
| } | |||
| @ -0,0 +1,132 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class RangeKeyTest { | |||
| @Test | |||
| public void testEmpty() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.empty()); | |||
| assertThat(keys).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testZeroLengthMatch() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.from(RangeSpecification.empty())); | |||
| assertThat(keys).containsExactly(key("", 0)); | |||
| } | |||
| @Test | |||
| public void testOnlyAnyPath() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("xxx", "xxxx", "xxxxx")); | |||
| assertThat(keys).containsExactly(key("", 3, 4, 5)); | |||
| } | |||
| @Test | |||
| public void testSimple() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxx", "123xxxx", "123xxxxx")); | |||
| assertThat(keys).containsExactly(key("123", 6, 7, 8)); | |||
| } | |||
| @Test | |||
| public void testEmbeddedRanges() { | |||
| ImmutableList<RangeKey> keys = | |||
| RangeKey.decompose(ranges("1x", "1xx", "1xx23", "1xx23x", "1xx23xx")); | |||
| assertThat(keys).containsExactly(key("1", 2, 3), key("1xx23", 5, 6, 7)).inOrder(); | |||
| } | |||
| @Test | |||
| public void testSplitFactors() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxxx", "1234x", "1234xx")); | |||
| // If the input wasn't "factored" first, this would result in: | |||
| // key("123[0-35-9]", 7), key("1234", 5, 6, 7) | |||
| assertThat(keys).containsExactly(key("123", 7), key("1234", 5, 6)).inOrder(); | |||
| } | |||
| @Test | |||
| public void testMergeStrategy() { | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("12[0-4]xxx", "12xxx", "12xx")); | |||
| // The merge strategy for factorizing the tree will prefer to keep the longer paths intact | |||
| // and split shorter paths around it. Using the other strategy we would get: | |||
| // key("12", 4, 5), key("12[0-4]", 6) | |||
| assertThat(keys).containsExactly(key("12[0-4]", 4, 5, 6), key("12[5-9]", 4, 5)).inOrder(); | |||
| } | |||
| @Test | |||
| public void testAsRangeSpecifications() { | |||
| assertThat(key("", 3, 4, 5).asRangeSpecifications()) | |||
| .containsExactly(spec("xxx"), spec("xxxx"), spec("xxxxx")).inOrder(); | |||
| assertThat(key("1[2-4]", 3, 4, 5).asRangeSpecifications()) | |||
| .containsExactly(spec("1[2-4]x"), spec("1[2-4]xx"), spec("1[2-4]xxx")).inOrder(); | |||
| assertThat(key("1x[468]", 3, 5, 7).asRangeSpecifications()) | |||
| .containsExactly(spec("1x[468]"), spec("1x[468]xx"), spec("1x[468]xxxx")).inOrder(); | |||
| } | |||
| @Test | |||
| public void testSimpleRealWorldData() { | |||
| // From ITU German numbering plan, first few fixed line ranges. | |||
| PrefixTree prefixes = | |||
| PrefixTree.from(ranges("20[1-389]", "204[135]", "205[1-468]", "206[4-6]", "20[89]")); | |||
| RangeTree ranges = prefixes.retainFrom( | |||
| ranges("xxxxxx", "xxxxxxx", "xxxxxxxx", "xxxxxxxxx", "xxxxxxxxxx", "xxxxxxxxxxx")); | |||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges); | |||
| assertThat(keys).containsExactly( | |||
| key("20[1-389]", 6, 7, 8, 9, 10, 11), | |||
| key("204[135]", 6, 7, 8, 9, 10, 11), | |||
| key("205[1-468]", 6, 7, 8, 9, 10, 11), | |||
| key("206[4-6]", 6, 7, 8, 9, 10, 11)) | |||
| .inOrder(); | |||
| } | |||
| @Test | |||
| public void testContains() { | |||
| RangeKey key = key("1[23]", 7, 8, 9); | |||
| assertThat(key.contains(digitSequence("12"), 8)).isTrue(); | |||
| assertThat(key.contains(digitSequence("12"), 10)).isFalse(); | |||
| assertThat(key.contains(digitSequence("7"), 8)).isFalse(); | |||
| } | |||
| private static RangeKey key(String spec, Integer... lengths) { | |||
| RangeSpecification prefix = | |||
| spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec); | |||
| return RangeKey.create(prefix, ImmutableSet.copyOf(lengths)); | |||
| } | |||
| private static RangeTree ranges(String... spec) { | |||
| return RangeTree.from(Stream.of(spec).map(RangeSpecification::parse)); | |||
| } | |||
| private static RangeSpecification spec(String spec) { | |||
| return RangeSpecification.parse(spec); | |||
| } | |||
| private static DigitSequence digitSequence(String spec) { | |||
| return DigitSequence.of(spec); | |||
| } | |||
| } | |||
| @ -0,0 +1,412 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTableSubject.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||
| import static java.util.stream.IntStream.rangeClosed; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.Table; | |||
| import com.google.common.collect.Table.Cell; | |||
| import com.google.common.collect.Tables; | |||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import java.util.Arrays; | |||
| import java.util.Map; | |||
| import java.util.Optional; | |||
| import java.util.function.Function; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class RangeTableTest { | |||
| private static final Column<ValidNumberType> TYPE = | |||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||
| public static final Column<Integer> AREA_CODE_LENGTH = Column.ofUnsignedInteger("AreaCodeLength"); | |||
| private static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||
| private static final Schema SCHEMA = | |||
| Schema.builder().add(TYPE).add(AREA_CODE_LENGTH).add(REGIONS).build(); | |||
| // This is essentially the most "extreme" simplification you can have. All detail is removed and | |||
| // lengths are merged into a contiguous range. It's basically like turning a range into "\d{n,m}" | |||
| // For example, { "123", "12345" } becomes { "xxx", "xxxx", "xxxxx" }. | |||
| private static final Function<Change, RangeTree> EXTREME_SIMPLIFICATION = | |||
| c -> RangeTree.from( | |||
| rangeClosed(c.getRanges().getLengths().first(), c.getRanges().getLengths().last()) | |||
| .mapToObj(RangeSpecification::any)); | |||
| @Test | |||
| public void testEmptyMap() { | |||
| RangeTable table = RangeTable.builder(SCHEMA).build(); | |||
| assertThat(table).isEmpty(); | |||
| } | |||
| @Test | |||
| public void testBasicAssign() { | |||
| RangeTable.Builder table = RangeTable.builder(SCHEMA); | |||
| table.assign(TYPE, MOBILE, ranges("1[234]xxxx"), OverwriteMode.ALWAYS); | |||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[234]xxxx")); | |||
| table.assign(TYPE, null, ranges("13xxxx"), OverwriteMode.ALWAYS); | |||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[24]xxxx")); | |||
| Assignment<ValidNumberType> fixedLine = Assignment.of(TYPE, FIXED_LINE); | |||
| // Overwrite an existing range. | |||
| table.assign(fixedLine, ranges("14xxxx"), OverwriteMode.ALWAYS); | |||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("14xxxx")); | |||
| // Partially overwrite an existing range (same value). | |||
| table.assign(fixedLine, ranges("1[34]xxxx"), OverwriteMode.SAME); | |||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[34]xxxx")); | |||
| // Fail to overwrite range with a different value in "SAME" mode. | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> table.assign(fixedLine, ranges("1[23]xxxx"), OverwriteMode.SAME)); | |||
| // Add new ranges (but never overwriting). | |||
| table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER); | |||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[3-5]xxxx")); | |||
| // Fail to write ranges with the same value in "NEVER" mode. | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER)); | |||
| // Unassignment (null value) makes no sense for modes other than "ALWAYS". | |||
| // TODO: This highlights the way this API is bad, make a separate "unassign" method. | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> table.assign(TYPE, null, ranges("123"), OverwriteMode.SAME)); | |||
| assertThrows(IllegalArgumentException.class, | |||
| () -> table.assign(TYPE, null, ranges("123"), OverwriteMode.NEVER)); | |||
| } | |||
| @Test | |||
| public void testApplyChanges() { | |||
| // Changes ordered top-to-bottom. | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign( | |||
| ranges("[18]2xxxxx"), ImmutableMap.of(TYPE, MOBILE, AREA_CODE_LENGTH, 3))) | |||
| .apply(assign(ranges("7xxxxxx"), TYPE, MOBILE)) | |||
| .apply(assign(ranges("[1-3]xxxxxx"), TYPE, FIXED_LINE)) | |||
| .build(); | |||
| // The union of all the ranges. | |||
| assertThat(table).allRanges().containsExactly("[1-37]xxxxxx", "82xxxxx"); | |||
| // The ranges assigned for various columns. | |||
| assertThat(table).assigned(TYPE).containsExactly("[1-37]xxxxxx", "82xxxxx"); | |||
| assertThat(table).assigned(AREA_CODE_LENGTH).containsExactly("[18]2xxxxx"); | |||
| // Note that the 12xxxxx range is replaced by the fixed line in the type map. | |||
| assertThat(table).assigned(TYPE, FIXED_LINE).containsExactly("[1-3]xxxxxx"); | |||
| assertThat(table).assigned(TYPE, MOBILE).containsExactly("7xxxxxx", "82xxxxx"); | |||
| // Area code length unaffected by update of the 12xxxxx range (only type was affected). | |||
| assertThat(table).assigned(AREA_CODE_LENGTH, 3).containsExactly("[18]2xxxxx"); | |||
| } | |||
| @Test | |||
| public void testBareRangeAddition() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .add(ranges("1xxxxx")) | |||
| .apply(assign(ranges("12xxxx"), TYPE, MOBILE)) | |||
| .build(); | |||
| assertThat(table).allRanges().containsExactly("1xxxxx"); | |||
| // Note that there is not "getUnassignedRanges()" on RangeTable (yet), so we fudge it by | |||
| // checking that there's only one column and looking at all the assigned ranges in it. | |||
| assertThat(table).hasColumns(TYPE); | |||
| assertThat(table).assigned(TYPE).containsExactly("12xxxx"); | |||
| // Also check that the re-built builder remembers the unassigned ranges. | |||
| RangeTable.Builder builder = table.toBuilder(); | |||
| assertThat(builder.getAllRanges()).containsExactly("1xxxxx"); | |||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("12xxxx"); | |||
| } | |||
| @Test | |||
| public void testAssignAndUnassign() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("1xxxxx"), TYPE, MOBILE)) | |||
| .apply(unassign(ranges("1[0-4]xxxx"), TYPE)) | |||
| .build(); | |||
| assertThat(table).allRanges().containsExactly("1xxxxx"); | |||
| assertThat(table).hasColumns(TYPE); | |||
| assertThat(table).assigned(TYPE).containsExactly("1[5-9]xxxx"); | |||
| // Also check that the re-built builder remembers the unassigned ranges. | |||
| RangeTable.Builder builder = table.toBuilder(); | |||
| assertThat(builder.getAllRanges()).containsExactly("1xxxxx"); | |||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[5-9]xxxx"); | |||
| } | |||
| @Test | |||
| public void testAssignAndRemove() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("1xxxxx"), TYPE, MOBILE)) | |||
| .remove(ranges("1[5-9]xxxx")) | |||
| .build(); | |||
| assertThat(table).allRanges().containsExactly("1[0-4]xxxx"); | |||
| assertThat(table).hasColumns(TYPE); | |||
| assertThat(table).assigned(TYPE).containsExactly("1[0-4]xxxx"); | |||
| RangeTable.Builder builder = table.toBuilder(); | |||
| assertThat(builder.getAllRanges()).containsExactly("1[0-4]xxxx"); | |||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[0-4]xxxx"); | |||
| } | |||
| @Test | |||
| public void testTableImportExport() { | |||
| RangeTable original = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE)) | |||
| .apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3)) | |||
| .apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2)) | |||
| .build(); | |||
| Table<RangeSpecification, Column<?>, Optional<?>> exported = original.toImmutableTable(); | |||
| assertThat(exported).hasSize(8); | |||
| assertThat(exported).containsCell(assigned("1xxxxxx", TYPE, MOBILE)); | |||
| assertThat(exported).containsCell(assigned("1xxxxxx", AREA_CODE_LENGTH, 3)); | |||
| assertThat(exported).containsCell(assigned("2xxxxxx", TYPE, FIXED_LINE)); | |||
| assertThat(exported).containsCell(assigned("2xxxxxx", AREA_CODE_LENGTH, 2)); | |||
| assertThat(exported).containsCell(assigned("3xxxxxx", TYPE, MOBILE)); | |||
| assertThat(exported).containsCell(assigned("3xxxxxx", AREA_CODE_LENGTH, 2)); | |||
| assertThat(exported).containsCell(assigned("4xxxxxx", TYPE, FIXED_LINE)); | |||
| assertThat(exported).containsCell(assigned("4xxxxxx", AREA_CODE_LENGTH, 3)); | |||
| RangeTable imported = RangeTable.from(SCHEMA, exported); | |||
| assertThat(imported).isEqualTo(original); | |||
| assertThat(imported.toImmutableTable()).isEqualTo(exported); | |||
| } | |||
| @Test | |||
| public void testColumnGroupMapping() { | |||
| // Changes ordered top-to-bottom. | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("1xxxxx"), ImmutableMap.of(REGION_US, true))) | |||
| .apply(assign(ranges("2xxxxx"), ImmutableMap.of(REGION_CA, true))) | |||
| .apply(assign(ranges("3xxxxx"), ImmutableMap.of(REGION_US, true, REGION_CA, true))) | |||
| .build(); | |||
| // The union of all the ranges. | |||
| assertThat(table).allRanges().containsExactly("[1-3]xxxxx"); | |||
| Map<PhoneRegion, Column<Boolean>> regionMap = REGIONS.extractGroupColumns(table.getColumns()); | |||
| assertThat(regionMap.keySet()).containsExactly(PhoneRegion.of("US"), PhoneRegion.of("CA")); | |||
| assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("US")))).containsExactly("[13]xxxxx"); | |||
| assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("CA")))).containsExactly("[23]xxxxx"); | |||
| // If a column in a group is not present, it counts as having no ranges, but if a plain column | |||
| // is not in the schema at all, it's an error. | |||
| assertThat(table.getAssignedRanges(REGIONS.getColumn(PhoneRegion.of("CH")))).isEmpty(); | |||
| Column<String> bogus = Column.ofString("Bogus"); | |||
| assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(bogus)); | |||
| Column<String> nope = ColumnGroup.byRegion(bogus).getColumn(PhoneRegion.of("US")); | |||
| assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(nope)); | |||
| } | |||
| @Test | |||
| public void testSubTable() { | |||
| RangeTable original = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE)) | |||
| .apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3)) | |||
| .apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2)) | |||
| .build(); | |||
| // Restrict to the ranges in which area code length is 2, but keep only the type column. | |||
| RangeTable subTable = original.subTable(original.getRanges(AREA_CODE_LENGTH, 2), TYPE); | |||
| assertThat(subTable).hasColumns(TYPE); | |||
| assertThat(subTable).hasRowCount(2); | |||
| assertThat(subTable).hasRanges("2xxxxxx", FIXED_LINE); | |||
| assertThat(subTable).hasRanges("3xxxxxx", MOBILE); | |||
| } | |||
| @Test | |||
| public void testGetPrefixMap() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("1234xxxx", "1256xxxx"), TYPE, MOBILE)) | |||
| .apply(assign(ranges("1236xxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE)) | |||
| .apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE)) | |||
| .build(); | |||
| ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 0); | |||
| assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("1234", "125"))); | |||
| assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("1236"))); | |||
| // The ranges 4xxxx and 49xxxx overlap (since 49 is a prefix for both) and the prefix map | |||
| // contains the shortest unique prefix for each range. The mapping from TOLL_FREE could not | |||
| // contain only "4[0-8]" since that would not match "49123". Overlapping range lengths with | |||
| // different types is thus highly problematic, but the prefix map will contain mappings for | |||
| // both, and it's up to the caller to handle this, possibly by ordering any checks made. | |||
| assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4"))); | |||
| assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49"))); | |||
| } | |||
| @Test | |||
| public void testGetPrefixMap_minLength() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| .apply(assign(ranges("123xxxxx", "1256xxxx"), TYPE, MOBILE)) | |||
| .apply(assign(ranges("124xxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE)) | |||
| .apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE)) | |||
| .build(); | |||
| ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 3); | |||
| assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("12[35]"))); | |||
| assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("124"))); | |||
| assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4"))); | |||
| assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49"))); | |||
| } | |||
| @Test | |||
| public void testSimplify_multipleColumns() { | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| // This can't be simplified since expanding any of the area code length ranges will overlap | |||
| // (possibly with the unassigned area code length ranges). | |||
| .apply(assign(ranges("1[0-4]x_xxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("12x_xxxx"), AREA_CODE_LENGTH, 2)) | |||
| .apply(assign(ranges("123_xxxx"), AREA_CODE_LENGTH, 3)) | |||
| .apply(assign(ranges("123_4xxx"), AREA_CODE_LENGTH, 4)) | |||
| // This can be simplified since it expands into "empty" ranges. | |||
| .apply(assign(ranges("156_xxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("156_xxxx"), AREA_CODE_LENGTH, 3)) | |||
| .apply(assign(ranges("234_xxxx"), TYPE, MOBILE)) | |||
| // This should be ignored since simplification happens only on the other columns. | |||
| .apply(assign(ranges("[12]23_xxxx"), REGION_CA, true)) | |||
| .build(); | |||
| RangeTable simplified = | |||
| table.simplify(c -> c.getRanges().significantDigits(2), 0, TYPE, AREA_CODE_LENGTH); | |||
| assertThat(simplified).hasColumns(TYPE, AREA_CODE_LENGTH); | |||
| // The 156 range got pulled back to 2 digits (the other was already 2 digits). | |||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("1[0-4]x_xxxx", "15x_xxxx"); | |||
| // The 234 range got pulled back to 2 digits. | |||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("23x_xxxx"); | |||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 2).containsExactly("12[0-24-9]_xxxx"); | |||
| // The 123 ranges were preserved, but the 156 range was pulled back to 2 digits. | |||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 3) | |||
| .containsExactly("123_[0-35-9]xxx", "15x_xxxx"); | |||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 4).containsExactly("123_4xxx"); | |||
| } | |||
| @Test | |||
| public void testSimplify_chineseRanges() { | |||
| // This mimics real data found in the CN regular expression whereby a SHARED_COST range | |||
| // partially overlaps with the fixed line prefixes. | |||
| RangeTable table = RangeTable.builder(SCHEMA) | |||
| // The pattern is: | |||
| // abc | length=10 | FIXED_LINE | |||
| // abc100 | length=8 | FIXED_LINE | |||
| // abc95 | length=8,9 | FIXED_LINE | |||
| // abc96 | length=8,9 | SHARED_COST | |||
| .apply(assign(ranges("123_xxx_xxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("123_100xx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("123_95xxx", "123_95xxxx"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("123_96xxx", "123_96xxxx"), TYPE, SHARED_COST)) | |||
| // Just add a range that sits "either side" of what's being simplified to ensure it | |||
| // doesn't "leak". | |||
| .apply(assign(ranges("1[13]4_56xx_xxxx"), TYPE, MOBILE)) | |||
| .build(); | |||
| RangeTable simplified = table.simplify(c -> c.getRanges().significantDigits(3), 0, TYPE); | |||
| // The simplification function just takes the first 3 significant digits. If the "shared cost" | |||
| // ranges were not overlapping, this would result in a "fixed line" range of "123xxx..." with | |||
| // lengths 8,9,10. However to avoid corrupting the shared cost range, we end up with: | |||
| // abc | length=10 | FIXED_LINE | |||
| // abc[0-8] | length=8,9 | FIXED_LINE | |||
| // abc9[0-57-9] | length=8,9 | FIXED_LINE | |||
| // abc96 | length=8,9 | SHARED_COST | |||
| assertThat(simplified).hasColumns(TYPE); | |||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly( | |||
| "123_xxx_xxxx", | |||
| "123_[0-8]xx_xx", | |||
| "123_[0-8]xx_xxx", | |||
| "123_9[0-57-9]x_xx", | |||
| "123_9[0-57-9]x_xxx"); | |||
| assertThat(simplified).assigned(TYPE, SHARED_COST).containsExactly( | |||
| "123_96x_xx", | |||
| "123_96x_xxx"); | |||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly( | |||
| "1[13]4_xxxx_xxxx"); | |||
| } | |||
| @Test | |||
| public void testSimplify_overlappingCheck() { | |||
| Schema shortcodeSchema = Schema.builder().add(TYPE).build(); | |||
| RangeTable table = RangeTable.builder(shortcodeSchema) | |||
| .apply(assign(ranges("123x"), TYPE, FIXED_LINE)) | |||
| .apply(assign(ranges("12x", "12xxx"), TYPE, MOBILE)) | |||
| .build(); | |||
| // The simplification function here is good for testing edge case behaviour since it's | |||
| // essentially the most "extreme" simplification you can have. | |||
| RangeTable simplified = table.simplify(EXTREME_SIMPLIFICATION, 0, TYPE); | |||
| assertThat(simplified).hasColumns(TYPE); | |||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("123x"); | |||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("12x", "12[0-24-9]x", "12xxx"); | |||
| } | |||
| private static RangeTree ranges(String... rangeSpecs) { | |||
| return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse)); | |||
| } | |||
| private static <T extends Comparable<T>> Change assign( | |||
| RangeTree ranges, Column<T> column, T value) { | |||
| return Change.builder(ranges).assign(column, value).build(); | |||
| } | |||
| private static <T extends Comparable<T>> Change unassign(RangeTree ranges, Column<T> column) { | |||
| return Change.builder(ranges).unassign(column).build(); | |||
| } | |||
| private Change assign(RangeTree ranges, Map<Column<?>, ?> map) { | |||
| return Change.of(ranges, | |||
| map.entrySet().stream() | |||
| .map(e -> Assignment.of(e.getKey(), e.getValue())) | |||
| .collect(toImmutableList())); | |||
| } | |||
| private static Cell<RangeSpecification, Column<?>, Optional<?>> assigned( | |||
| String range, Column<?> column, Object value) { | |||
| return Tables.immutableCell(RangeSpecification.parse(range), column, Optional.of(value)); | |||
| } | |||
| } | |||
| @ -0,0 +1,71 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.table; | |||
| import static com.google.common.truth.Truth.assertThat; | |||
| import static com.google.common.truth.Truth8.assertThat; | |||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||
| import static org.junit.Assert.assertThrows; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import java.util.stream.Stream; | |||
| import org.junit.Test; | |||
| import org.junit.runner.RunWith; | |||
| import org.junit.runners.JUnit4; | |||
| @RunWith(JUnit4.class) | |||
| public class SchemaTest { | |||
| private static final Column<ValidNumberType> TYPE = | |||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||
| private static final Column<String> OPERATORS = Column.ofString("Operators"); | |||
| private static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||
| private static final Column<Boolean> BOGUS = Column.ofBoolean("Bogus"); | |||
| private static final Schema SCHEMA = | |||
| Schema.builder().add(TYPE).add(OPERATORS).add(REGIONS).build(); | |||
| @Test | |||
| public void testColumnOrdering() { | |||
| assertThat(Stream.of(OPERATORS, REGION_US, TYPE, REGION_CA).sorted(SCHEMA.ordering())) | |||
| .containsExactly(TYPE, OPERATORS, REGION_CA, REGION_US) | |||
| .inOrder(); | |||
| // The names are the columns/groups (but not the names of columns in groups, such as | |||
| // "Region:US", since those are functionally generated and aren't known by the schema. | |||
| assertThat(SCHEMA.names()).containsExactly("Type", "Operators", "Region").inOrder(); | |||
| } | |||
| @Test | |||
| public void test() { | |||
| assertThat(SCHEMA.getColumn("Type")).isEqualTo(TYPE); | |||
| assertThat(SCHEMA.getColumn("Region:US")).isEqualTo(REGION_US); | |||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Region")); | |||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Bogus")); | |||
| } | |||
| @Test | |||
| public void testCheckColumn() { | |||
| assertThat(SCHEMA.checkColumn(TYPE)).isEqualTo(TYPE); | |||
| assertThat(SCHEMA.checkColumn(REGION_US)).isEqualTo(REGION_US); | |||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.checkColumn(BOGUS)); | |||
| } | |||
| } | |||
| @ -0,0 +1,132 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.testing; | |||
| import static com.google.common.base.Strings.lenientFormat; | |||
| import static com.google.common.truth.Fact.simpleFact; | |||
| import static com.google.common.truth.Truth.assertAbout; | |||
| import static java.util.Arrays.asList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableTable; | |||
| import com.google.common.truth.FailureMetadata; | |||
| import com.google.common.truth.Subject; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import java.util.Optional; | |||
| import javax.annotation.Nullable; | |||
| /** A Truth subject for asserting on {@link RangeTable} instances. */ | |||
| public class RangeTableSubject extends Subject { | |||
| /** Returns Truth subject for asserting on a {@link RangeTable}. */ | |||
| public static RangeTableSubject assertThat(@Nullable RangeTable table) { | |||
| return assertAbout(RangeTableSubject.SUBJECT_FACTORY).that(table); | |||
| } | |||
| private static final Factory<RangeTableSubject, RangeTable> SUBJECT_FACTORY = | |||
| RangeTableSubject::new; | |||
| private final RangeTable actual; | |||
| private RangeTableSubject(FailureMetadata failureMetadata, @Nullable RangeTable subject) { | |||
| super(failureMetadata, subject); | |||
| this.actual = subject; | |||
| } | |||
| // Add more methods below as needed. | |||
| /** Asserts that the table is empty. */ | |||
| public void isEmpty() { | |||
| if (!actual.isEmpty()) { | |||
| failWithActual(simpleFact("expected to be empty")); | |||
| } | |||
| } | |||
| /** Asserts that the table has exactly the given columns in the given order (and no others). */ | |||
| public void hasColumns(Column<?>... columns) { | |||
| check("getColumns()").that(actual.getColumns()).containsExactlyElementsIn(asList(columns)); | |||
| } | |||
| /** Asserts that the table has the specified number of rows. */ | |||
| public void hasRowCount(int count) { | |||
| check("toImmutableTable().rowKeySet().size()") | |||
| .that(actual.toImmutableTable().rowKeySet().size()) | |||
| .isEqualTo(count); | |||
| } | |||
| /** | |||
| * Asserts the specified range has the given values for each column. All columns need to be | |||
| * specified, with {@code null} meanings "no value present". This method does not ensure that no | |||
| * other ranges were also assigned the same values, so for complete coverage in a test it's best | |||
| * to use this in conjunction with something like {@link #allRanges()}. | |||
| */ | |||
| public void hasRanges(String spec, Object... values) { | |||
| ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = | |||
| this.actual.toImmutableTable(); | |||
| RangeSpecification rowKey = RangeSpecification.parse(spec); | |||
| if (!table.rowKeySet().contains(rowKey)) { | |||
| failWithoutActual( | |||
| simpleFact( | |||
| lenientFormat( | |||
| "specified row %s does not exist in the table: rows=%s", | |||
| rowKey, table.rowKeySet()))); | |||
| } | |||
| ImmutableMap<Column<?>, Optional<?>> row = table.row(rowKey); | |||
| if (row.size() != values.length) { | |||
| failWithoutActual( | |||
| simpleFact( | |||
| lenientFormat( | |||
| "incorrect number of columns: expected %s, got %s", row.size(), values.length))); | |||
| } | |||
| int n = 0; | |||
| for (Optional<?> actual : row.values()) { | |||
| Object expected = values[n++]; | |||
| if (actual.isPresent()) { | |||
| if (!actual.get().equals(expected)) { | |||
| failWithoutActual( | |||
| simpleFact( | |||
| lenientFormat("unexpected value in row: expected %s, got %s", expected, actual))); | |||
| } | |||
| } else if (expected != null) { | |||
| failWithoutActual(simpleFact(lenientFormat("missing value in row: expected %s", expected))); | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * Returns a {@link RangeTreeSubject} for asserting about the ranges assigned to the given value | |||
| * in the specified column. | |||
| */ | |||
| public RangeTreeSubject assigned(Column<?> column, Object value) { | |||
| return RangeTreeSubject.assertWithMessageThat( | |||
| actual.getRanges(column, value), "%s in column %s", value, column); | |||
| } | |||
| /** | |||
| * Returns a {@link RangeTreeSubject} for asserting about all ranges assigned in the specified | |||
| * column. | |||
| */ | |||
| public RangeTreeSubject assigned(Column<?> column) { | |||
| return RangeTreeSubject.assertWithMessageThat( | |||
| actual.getAssignedRanges(column), "column %s", column); | |||
| } | |||
| /** Returns a {@link RangeTreeSubject} for asserting about all ranges in the table. */ | |||
| public RangeTreeSubject allRanges() { | |||
| return RangeTreeSubject.assertWithMessageThat(actual.getAllRanges(), "all ranges"); | |||
| } | |||
| } | |||
| @ -0,0 +1,118 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.testing; | |||
| import static com.google.common.truth.Fact.simpleFact; | |||
| import static com.google.common.truth.Truth.assertAbout; | |||
| import static com.google.common.truth.Truth.assertWithMessage; | |||
| import com.google.common.collect.FluentIterable; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.truth.FailureMetadata; | |||
| import com.google.common.truth.Subject; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import javax.annotation.Nullable; | |||
| /** A Truth subject for asserting on {@link RangeTree} instances. */ | |||
| public class RangeTreeSubject extends Subject { | |||
| public static RangeTreeSubject assertThat(@Nullable RangeTree tree) { | |||
| return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree); | |||
| } | |||
| public static RangeTreeSubject assertThat(@Nullable PrefixTree tree) { | |||
| return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree.asRangeTree()); | |||
| } | |||
| public static RangeTreeSubject assertWithMessageThat( | |||
| @Nullable RangeTree tree, String message, Object... args) { | |||
| return assertWithMessage(message, args).about( | |||
| RangeTreeSubject.SUBJECT_FACTORY).that(tree); | |||
| } | |||
| private static final Factory<RangeTreeSubject, RangeTree> SUBJECT_FACTORY = | |||
| RangeTreeSubject::new; | |||
| private final RangeTree actual; | |||
| private RangeTreeSubject(FailureMetadata failureMetadata, @Nullable RangeTree subject) { | |||
| super(failureMetadata, subject); | |||
| this.actual = subject; | |||
| } | |||
| // Add more methods below as needed. | |||
| public void isEmpty() { | |||
| if (!actual.isEmpty()) { | |||
| failWithActual(simpleFact("expected to be empty")); | |||
| } | |||
| } | |||
| public void isNotEmpty() { | |||
| if (actual.isEmpty()) { | |||
| failWithActual(simpleFact("expected not to be empty")); | |||
| } | |||
| } | |||
| public void hasSize(long size) { | |||
| check("size()").withMessage("size").that(actual.size()).isEqualTo(size); | |||
| } | |||
| public void contains(String digits) { | |||
| DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits); | |||
| if (!actual.contains(seq)) { | |||
| failWithActual("expected to contain ", digits); | |||
| } | |||
| } | |||
| public void doesNotContain(String digits) { | |||
| DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits); | |||
| if (actual.contains(seq)) { | |||
| failWithActual("expected not to contain", digits); | |||
| } | |||
| } | |||
| public void containsExactly(RangeSpecification spec) { | |||
| RangeTree tree = RangeTree.from(spec); | |||
| if (!actual.equals(tree)) { | |||
| failWithActual("expected to be equal to", spec); | |||
| } | |||
| } | |||
| public void containsExactly(Iterable<RangeSpecification> specs) { | |||
| RangeTree tree = RangeTree.from(specs); | |||
| if (!actual.equals(tree)) { | |||
| failWithActual("expected to be equal to", specs); | |||
| } | |||
| } | |||
| public void containsExactly(String spec) { | |||
| containsExactly(RangeSpecification.parse(spec)); | |||
| } | |||
| public void containsExactly(String... specs) { | |||
| containsExactly(FluentIterable.from(specs).transform(RangeSpecification::parse)); | |||
| } | |||
| public void hasLengths(Integer... lengths) { | |||
| check("getLengths()") | |||
| .that(actual.getLengths()) | |||
| .containsExactlyElementsIn(ImmutableSet.copyOf(lengths)); | |||
| } | |||
| } | |||
| @ -0,0 +1,477 @@ | |||
| /* | |||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| package com.google.i18n.phonenumbers.metadata.testing; | |||
| import static com.google.common.base.Preconditions.checkArgument; | |||
| import static com.google.common.base.Preconditions.checkNotNull; | |||
| import static com.google.common.base.Preconditions.checkState; | |||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||
| import static java.lang.Boolean.TRUE; | |||
| import static java.util.function.Function.identity; | |||
| import com.google.common.collect.HashBasedTable; | |||
| import com.google.common.collect.ImmutableList; | |||
| import com.google.common.collect.ImmutableMap; | |||
| import com.google.common.collect.ImmutableSet; | |||
| import com.google.common.collect.Maps; | |||
| import com.google.common.collect.Table; | |||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||
| import com.google.i18n.phonenumbers.metadata.Types; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||
| import com.google.i18n.phonenumbers.metadata.model.AltFormatSpec; | |||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Attributes; | |||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType; | |||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema; | |||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType; | |||
| import com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema; | |||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||
| import java.util.ArrayList; | |||
| import java.util.Arrays; | |||
| import java.util.HashMap; | |||
| import java.util.LinkedHashMap; | |||
| import java.util.List; | |||
| import java.util.Map; | |||
| import java.util.Map.Entry; | |||
| import java.util.Optional; | |||
| import java.util.stream.Stream; | |||
| /** | |||
| * Reusable test-only builder for numbering schemes. More methods can be added as necessary to | |||
| * support whatever is needed for testing. | |||
| * | |||
| * <p>Note that the various "modifer" classes returned by methods such as "addRanges()" are | |||
| * designed only as fluent APIs and instances of modifiers should never be assigned to variables | |||
| * and especially not interleaved with other mutations of the range tables. | |||
| */ | |||
| public final class TestNumberingScheme { | |||
| /** | |||
| * Returns a mutable numbering scheme builder for testing. Since an IDD is always required by | |||
| * NumberingScheme for geographic regions, a default value of "00" is set by default. This can be | |||
| * overridden or reset by {@code setInternationalPrefix{}} and {@code clearInternationalPrefix()}. | |||
| */ | |||
| public static TestNumberingScheme forCallingCode( | |||
| String cc, PhoneRegion main, PhoneRegion... others) { | |||
| return new TestNumberingScheme(DigitSequence.of(cc), main, ImmutableSet.copyOf(others)); | |||
| } | |||
| private final DigitSequence callingCode; | |||
| private final PhoneRegion mainRegion; | |||
| private final ImmutableSet<PhoneRegion> otherRegions; | |||
| private final ImmutableMap<PhoneRegion, Column<Boolean>> regionMap; | |||
| // See setNationalPrefix() / clearNationalPrefix() | |||
| private final List<DigitSequence> nationalPrefix = new ArrayList<>(); | |||
| // See setInternationalPrefix() / clearInternationalPrefix() | |||
| private Optional<DigitSequence> internationalPrefix = Optional.empty(); | |||
| // See setCarrierPrefixes() | |||
| private RangeTree carrierPrefixes = RangeTree.empty(); | |||
| // Uses the CSV schema (rather than XML) since that handles type/tariff better. | |||
| private final RangeTable.Builder csvRanges = RangeTable.builder(RangesTableSchema.TABLE_COLUMNS); | |||
| private final Map<PhoneRegion, RangeTable.Builder> shortcodes = new HashMap<>(); | |||
| private final Map<FormatSpec, String> formats = new LinkedHashMap<>(); | |||
| // Alternate formats are largely separate from everything else. | |||
| private ImmutableList<AltFormatSpec> altFormats = ImmutableList.of(); | |||
| // Explicit example numbers. | |||
| private final Table<PhoneRegion, ValidNumberType, DigitSequence> examples = | |||
| HashBasedTable.create(); | |||
| private final List<Comment> comments = new ArrayList<>(); | |||
| private TestNumberingScheme( | |||
| DigitSequence cc, PhoneRegion main, ImmutableSet<PhoneRegion> others) { | |||
| checkArgument(!others.contains(main), "duplicate regions"); | |||
| this.callingCode = checkNotNull(cc); | |||
| this.mainRegion = checkNotNull(main); | |||
| this.otherRegions = others; | |||
| this.regionMap = Stream.concat(Stream.of(main), others.stream()) | |||
| .collect(toImmutableMap(identity(), RangesTableSchema.REGIONS::getColumn)); | |||
| // Set a reasonable IDD default for geographic regions. | |||
| if (!main.equals(PhoneRegion.getWorld())) { | |||
| setInternationalPrefix("00"); | |||
| } | |||
| } | |||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||
| public TestNumberingScheme setNationalPrefix(String prefix) { | |||
| checkArgument(!prefix.isEmpty(), "national prefix must not be empty"); | |||
| this.nationalPrefix.clear(); | |||
| this.nationalPrefix.add(DigitSequence.of(prefix)); | |||
| return this; | |||
| } | |||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||
| public TestNumberingScheme setNationalPrefixes(String... prefix) { | |||
| List<String> prefixes = Arrays.asList(prefix); | |||
| this.nationalPrefix.clear(); | |||
| prefixes.forEach(p -> { | |||
| checkArgument(!p.isEmpty(), "national prefix must not be empty"); | |||
| this.nationalPrefix.add(DigitSequence.of(p)); | |||
| }); | |||
| return this; | |||
| } | |||
| /** Removes the national prefix */ | |||
| public TestNumberingScheme clearNationalPrefix() { | |||
| this.nationalPrefix.clear(); | |||
| return this; | |||
| } | |||
| /** Sets the international prefix of this scheme, replacing any previous value. */ | |||
| public TestNumberingScheme setInternationalPrefix(String prefix) { | |||
| checkState(!mainRegion.equals(PhoneRegion.getWorld()), | |||
| "[%s] cannot set IDD for non-geographic calling code", callingCode); | |||
| this.internationalPrefix = Optional.of(DigitSequence.of(prefix)); | |||
| return this; | |||
| } | |||
| /** Removes the international prefix */ | |||
| public TestNumberingScheme clearInternationalPrefix() { | |||
| this.internationalPrefix = Optional.empty(); | |||
| return this; | |||
| } | |||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||
| public TestNumberingScheme setCarrierPrefixes(String... prefix) { | |||
| this.carrierPrefixes = RangeTree.from(Arrays.stream(prefix).map(RangeSpecification::parse)); | |||
| return this; | |||
| } | |||
| /** | |||
| * Adds ranges (which must not already exist) to the scheme. This method returns a fluent API | |||
| * for modifying the newly added ranges. | |||
| */ | |||
| public RangeModifier addRanges(ExtType type, ExtTariff tariff, String... specs) { | |||
| return addRanges(type, tariff, rangesOf(specs)); | |||
| } | |||
| /** | |||
| * Adds ranges (which must not already exist) to the scheme. This method returns a fluent API | |||
| * for modifying the newly added ranges. | |||
| */ | |||
| public RangeModifier addRanges(ExtType type, ExtTariff tariff, RangeTree ranges) { | |||
| RangeTree overlap = csvRanges.getAllRanges().intersect(ranges); | |||
| checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap); | |||
| csvRanges.assign(RangesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER); | |||
| csvRanges.assign(RangesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER); | |||
| // Setting all regions here generates "legal" numbering schemes by default. | |||
| regionMap.values().forEach(c -> csvRanges.assign(c, true, ranges, OverwriteMode.NEVER)); | |||
| return new RangeModifier(ranges); | |||
| } | |||
| /** Removes ranges (which need not already exist) from the scheme. */ | |||
| public void removeRanges(String... specs) { | |||
| removeRanges(rangesOf(specs)); | |||
| } | |||
| /** Removes ranges (which need not already exist) from the scheme. */ | |||
| public void removeRanges(RangeTree ranges) { | |||
| csvRanges.remove(ranges); | |||
| } | |||
| /** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */ | |||
| public RangeModifier forRangesIn(String... specs) { | |||
| return forRangesIn(rangesOf(specs)); | |||
| } | |||
| /** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */ | |||
| public RangeModifier forRangesIn(RangeTree ranges) { | |||
| return new RangeModifier(ranges.intersect(csvRanges.getAllRanges())); | |||
| } | |||
| /** | |||
| * Adds shortcodes (which must not already exist) to a given region in the scheme. This method | |||
| * returns a fluent API for modifying the newly added shortcodes. | |||
| */ | |||
| public ShortcodeModifier addShortcodes( | |||
| PhoneRegion region, ShortcodeType type, ExtTariff tariff, String... specs) { | |||
| return addShortcodes(region, type, tariff, rangesOf(specs)); | |||
| } | |||
| /** | |||
| * Adds shortcodes (which must not already exist) to a given region in the scheme. This method | |||
| * returns a fluent API for modifying the newly added shortcodes. | |||
| */ | |||
| public ShortcodeModifier addShortcodes( | |||
| PhoneRegion region, ShortcodeType type, ExtTariff tariff, RangeTree ranges) { | |||
| RangeTable.Builder table = shortcodes | |||
| .computeIfAbsent(region, r -> RangeTable.builder(ShortcodesTableSchema.SCHEMA.columns())); | |||
| RangeTree overlap = table.getAllRanges().intersect(ranges); | |||
| checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap); | |||
| table.assign(ShortcodesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER); | |||
| table.assign(ShortcodesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER); | |||
| return new ShortcodeModifier(region, ranges); | |||
| } | |||
| /** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */ | |||
| public ShortcodeModifier forShortcodesIn(PhoneRegion region, String... specs) { | |||
| return forShortcodesIn(region, rangesOf(specs)); | |||
| } | |||
| /** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */ | |||
| public ShortcodeModifier forShortcodesIn(PhoneRegion region, RangeTree ranges) { | |||
| RangeTable.Builder shortcodeTable = | |||
| checkNotNull(shortcodes.get(region), "no shortcodes in region %s", region); | |||
| return new ShortcodeModifier(region, ranges.intersect(shortcodeTable.getAllRanges())); | |||
| } | |||
| public TypeModifier forRangeTypes(PhoneRegion region, ExtType type, ExtTariff tariff) { | |||
| return new TypeModifier(region, type, tariff); | |||
| } | |||
| public TestNumberingScheme setAlternateFormats(List<AltFormatSpec> altFormats) { | |||
| this.altFormats = ImmutableList.copyOf(altFormats); | |||
| return this; | |||
| } | |||
| /** Builds a valid numbering scheme from the current state of this builder. */ | |||
| public NumberingScheme build() { | |||
| Attributes attributes = Attributes.create( | |||
| callingCode, | |||
| mainRegion, | |||
| otherRegions, | |||
| ImmutableSet.copyOf(nationalPrefix), | |||
| carrierPrefixes, | |||
| // This is currently simplistic (only 1 value) and could be extended for tests if needed. | |||
| internationalPrefix.map(Object::toString).orElse(""), | |||
| internationalPrefix.map(p -> RangeTree.from(RangeSpecification.from(p))) | |||
| .orElse(RangeTree.empty()), | |||
| "", | |||
| ImmutableSet.of()); | |||
| RangeTable xmlTable = XmlRangesSchema.fromExternalTable(csvRanges.build()); | |||
| ImmutableMap<PhoneRegion, RangeTable> shortcodeMap = | |||
| shortcodes.entrySet().stream() | |||
| .collect(toImmutableMap(Entry::getKey, e -> e.getValue().build())); | |||
| // Some formats may have been unassigned by modifications to the test scheme. Only copy the | |||
| // formats with keys that exist in the range tables at the time the scheme is built. | |||
| ImmutableSet<String> assignedFormats = Stream.concat( | |||
| xmlTable.getAssignedValues(XmlRangesSchema.FORMAT).stream(), | |||
| shortcodeMap.values().stream() | |||
| .flatMap(t -> t.getAssignedValues(ShortcodesTableSchema.FORMAT).stream())) | |||
| .collect(toImmutableSet()); | |||
| ImmutableMap<String, FormatSpec> formatMap = formats.entrySet().stream() | |||
| .filter(e -> assignedFormats.contains(e.getValue())) | |||
| .collect(toImmutableMap(Entry::getValue, Entry::getKey)); | |||
| return NumberingScheme.from( | |||
| attributes, | |||
| xmlTable, | |||
| Maps.transformValues(shortcodes, RangeTable.Builder::build), | |||
| formatMap, | |||
| altFormats, | |||
| fillInMissingExampleNumbersFrom(xmlTable, examples), | |||
| comments); | |||
| } | |||
| public TerritoryModifier forTerritory(PhoneRegion region) { | |||
| return new TerritoryModifier(region); | |||
| } | |||
| /** Fluent API for modifying a set of ranges. */ | |||
| public final class RangeModifier { | |||
| private final RangeTree ranges; | |||
| private RangeModifier(RangeTree ranges) { | |||
| checkArgument(!ranges.isEmpty(), "cannot modify empty ranges"); | |||
| this.ranges = ranges; | |||
| } | |||
| /** Sets the regions in which the ranges are valid. */ | |||
| public RangeModifier setRegions(PhoneRegion... regions) { | |||
| ImmutableSet<PhoneRegion> regionsToSet = ImmutableSet.copyOf(regions); | |||
| checkArgument(regionMap.keySet().containsAll(regionsToSet)); | |||
| regionMap.forEach((r, c) -> | |||
| csvRanges.assign(c, regionsToSet.contains(r), ranges, OverwriteMode.ALWAYS)); | |||
| return this; | |||
| } | |||
| /** Sets ranges to be "national only" dialing. */ | |||
| public RangeModifier setNationalOnly(boolean nationalOnly) { | |||
| csvRanges.assign(RangesTableSchema.NATIONAL_ONLY, nationalOnly, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| /** Sets the area code length of the ranges. */ | |||
| public RangeModifier setAreaCodeLength(int n) { | |||
| csvRanges.assign(RangesTableSchema.AREA_CODE_LENGTH, n, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| /** Sets the format assigned to the ranges. */ | |||
| public RangeModifier setFormat(FormatSpec format) { | |||
| String id = | |||
| formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1)); | |||
| csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| public RangeModifier setFormat(String id, FormatSpec format) { | |||
| formats.put(format, id); | |||
| csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| /** Clears the format assigned to the ranges. */ | |||
| public RangeModifier clearFormat() { | |||
| csvRanges.assign(RangesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| public RangeModifier setGeocode(SimpleLanguageTag lang, String name) { | |||
| csvRanges.assign( | |||
| RangesTableSchema.GEOCODES.getColumn(lang), name, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| } | |||
| /** Fluent API for modifying a set of shortcodes in a region. */ | |||
| public final class ShortcodeModifier { | |||
| private final PhoneRegion region; | |||
| private final RangeTree ranges; | |||
| private ShortcodeModifier(PhoneRegion region, RangeTree ranges) { | |||
| checkArgument(!ranges.isEmpty(), "cannot modify empty ranges"); | |||
| this.region = checkNotNull(region); | |||
| this.ranges = ranges; | |||
| } | |||
| private RangeTable.Builder shortcode() { | |||
| return shortcodes.get(region); | |||
| } | |||
| /** Sets the format assigned to the shortcodes. */ | |||
| public ShortcodeModifier setFormat(FormatSpec format) { | |||
| String id = | |||
| formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1)); | |||
| shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| /** Sets the format assigned to the shortcodes. */ | |||
| public ShortcodeModifier setFormat(String id, FormatSpec format) { | |||
| formats.put(format, id); | |||
| shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| /** Clears the format assigned to the shortcodes. */ | |||
| public ShortcodeModifier clearFormat() { | |||
| shortcode().assign(ShortcodesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS); | |||
| return this; | |||
| } | |||
| } | |||
| /** Fluent API for modifying attributes of range types. */ | |||
| public final class TypeModifier { | |||
| private final PhoneRegion region; | |||
| private final ExtType type; | |||
| private final ExtTariff tariff; | |||
| public TypeModifier(PhoneRegion region, ExtType type, ExtTariff tariff) { | |||
| this.region = checkNotNull(region); | |||
| this.type = checkNotNull(type); | |||
| this.tariff = checkNotNull(tariff); | |||
| checkArgument(regionMap.containsKey(region), | |||
| "invalid test region '%s' not in: %s", region, regionMap.keySet()); | |||
| } | |||
| public TypeModifier setExampleNumber(String ex) { | |||
| inferValidNumberType(type, tariff) | |||
| .ifPresent(t -> examples.put(region, t, DigitSequence.of(ex))); | |||
| return this; | |||
| } | |||
| public TypeModifier addComment(String... lines) { | |||
| inferValidNumberType(type, tariff) | |||
| .flatMap(Types::toXmlType) | |||
| .ifPresent(t -> comments.add( | |||
| Comment.create(Comment.anchor(region, t), Arrays.asList(lines)))); | |||
| return this; | |||
| } | |||
| } | |||
| /** Fluent API for modifying territory-level attributes. */ | |||
| public final class TerritoryModifier { | |||
| private final PhoneRegion region; | |||
| public TerritoryModifier(PhoneRegion region) { | |||
| this.region = checkNotNull(region); | |||
| } | |||
| public TerritoryModifier addComment(String... lines) { | |||
| comments.add(Comment.create(Comment.anchor(region), Arrays.asList(lines))); | |||
| return this; | |||
| } | |||
| } | |||
| private Table<PhoneRegion, ValidNumberType, DigitSequence> fillInMissingExampleNumbersFrom( | |||
| RangeTable xmlTable, Table<PhoneRegion, ValidNumberType, DigitSequence> examples) { | |||
| // Take a copy since the build() method is not meant to be modifying the builder itself. | |||
| HashBasedTable<PhoneRegion, ValidNumberType, DigitSequence> examplesCopy = | |||
| HashBasedTable.create(examples); | |||
| addMissingExampleNumbersFor(mainRegion, xmlTable, examplesCopy); | |||
| otherRegions.forEach(r -> addMissingExampleNumbersFor(r, xmlTable, examplesCopy)); | |||
| return examplesCopy; | |||
| } | |||
| private static void addMissingExampleNumbersFor( | |||
| PhoneRegion region, | |||
| RangeTable xmlTable, | |||
| Table<PhoneRegion, ValidNumberType, DigitSequence> examples) { | |||
| Column<Boolean> regionColumn = XmlRangesSchema.REGIONS.getColumn(region); | |||
| RangeTable regionTable = | |||
| xmlTable.subTable(xmlTable.getRanges(regionColumn, TRUE), XmlRangesSchema.TYPE); | |||
| for (ValidNumberType type : regionTable.getAssignedValues(XmlRangesSchema.TYPE)) { | |||
| if (examples.contains(region, type)) { | |||
| continue; | |||
| } | |||
| RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, type); | |||
| // Assigned types must be assigned via non empty ranges (so first() cannot fail). | |||
| examples.put(region, type, ranges.first()); | |||
| } | |||
| } | |||
| private static RangeTree rangesOf(String... specs) { | |||
| checkArgument(specs.length > 0, "must provide at least one range specifier"); | |||
| RangeTree ranges = RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse)); | |||
| checkArgument(!ranges.getInitial().canTerminate(), "cannot add the empty digit sequence"); | |||
| return ranges; | |||
| } | |||
| private static Optional<ValidNumberType> inferValidNumberType(ExtType type, ExtTariff tariff) { | |||
| // Tariff takes precedence over type. | |||
| Optional<ValidNumberType> vnt = tariff.toValidNumberType(); | |||
| if (!vnt.isPresent()) { | |||
| vnt = type.toValidNumberType(); | |||
| } | |||
| return vnt; | |||
| } | |||
| } | |||