PiperOrigin-RevId: 319856719 Co-authored-by: The libphonenumber Team <noreply@google.com>pull/3882/head
| @ -0,0 +1,311 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.base.Preconditions; | |||||
| import com.google.common.collect.DiscreteDomain; | |||||
| import com.google.errorprone.annotations.Immutable; | |||||
| import com.google.errorprone.annotations.concurrent.LazyInit; | |||||
| /** | |||||
| * A small, fast, immutable representation of a phone number digit sequence. This class represents | |||||
| * contiguous sequences of digits in phone numbers, such as "123" or "000". It does not encode | |||||
| * semantic information such as the region code to which a number belongs or perform any semantic | |||||
| * validation. It can be thought of as equivalent to a String containing only the ASCII digits | |||||
| * {@code '0'} to {@code '9'}. | |||||
| */ | |||||
| @Immutable | |||||
| public final class DigitSequence implements Comparable<DigitSequence> { | |||||
| private static final CharMatcher ASCII_DIGITS = CharMatcher.inRange('0', '9'); | |||||
| // IMPORTANT | |||||
| // This cannot be more than 18 to avoid overflowing a signed long (it must be signed due to the | |||||
| // calculation of the "distance" metric which can be +ve or -ve). | |||||
| // | |||||
| // If it does need to be raised, this whole class probably needs to be rethought. ITU recommends | |||||
| // a limit of 15 digits (not including country calling code) but there are currently 2 examples | |||||
| // in the metadata XML file which exceed this (Japan) where some non-international toll free | |||||
| // numbers (those starting with 0037 and 0036) can be up to 17 digits (still okay) in the current | |||||
| // metadata but there's a note saying that they may even extend to 21 digits!! | |||||
| // | |||||
| // An appropriate way to split this class would be to make a closed type hierarchy with 2 | |||||
| // separate implementations, one using a long to encode the numbers and one using BigInteger (or | |||||
| // maybe just encoding digits in a string directly). | |||||
| // The good thing about this approach is that instances of the different implementations could | |||||
| // never be equal to each other. This is likely not a difficult refactoring, although the Domain | |||||
| // class will also need to be considered carefully and details like the "index()" value will have | |||||
| // to change completely between the classes. | |||||
| // | |||||
| /** The maximum number of digits which can be held in a digit sequence. */ | |||||
| public static final int MAX_DIGITS = 18; | |||||
| // Simple lookup of powers-of-10 for all valid sequence lengths (0 - MAX_DIGITS). | |||||
| private static final long[] POWERS_OF_TEN = new long[MAX_DIGITS + 1]; | |||||
| static { | |||||
| // 1, 10, 100, 1000, 10000 ... | |||||
| POWERS_OF_TEN[0] = 1; | |||||
| for (int n = 1; n < POWERS_OF_TEN.length; n++) { | |||||
| POWERS_OF_TEN[n] = 10 * POWERS_OF_TEN[n - 1]; | |||||
| } | |||||
| } | |||||
| // A table of adjustment values to convert a digit sequence into an absolute index in the | |||||
| // integer domain, to impose a true lexicographical ordering. The value of a digit sequence is | |||||
| // adjusted by the number of additional elements in the phone number domain which cannot be | |||||
| // represented as integers (the empty sequence or anything with leading zeros). This results in | |||||
| // an absolute ordering of all digit sequences. For example the digit sequence "0123" is length | |||||
| // 4, and there are 111 additional additional elements that come before 4-length sequences | |||||
| // ("", "00"-"09", "000"-"099"), so its index is {@code 123 + 111 = 234}. | |||||
| // To calculate this value dynamically for any length N, offset=floor(10^N / 9). | |||||
| private static final long[] DOMAIN_OFFSET = new long[MAX_DIGITS + 1]; | |||||
| static { | |||||
| // 0, 1, 11, 111, 1111 ... | |||||
| for (int n = 1; n < DOMAIN_OFFSET.length; n++) { | |||||
| DOMAIN_OFFSET[n] = 10 * DOMAIN_OFFSET[n - 1] + 1; | |||||
| } | |||||
| } | |||||
| private static final DigitSequence EMPTY = new DigitSequence(0, 0L); | |||||
| private static final DigitSequence[] SINGLETON_DIGITS = new DigitSequence[] { | |||||
| new DigitSequence(1, 0L), | |||||
| new DigitSequence(1, 1L), | |||||
| new DigitSequence(1, 2L), | |||||
| new DigitSequence(1, 3L), | |||||
| new DigitSequence(1, 4L), | |||||
| new DigitSequence(1, 5L), | |||||
| new DigitSequence(1, 6L), | |||||
| new DigitSequence(1, 7L), | |||||
| new DigitSequence(1, 8L), | |||||
| new DigitSequence(1, 9L), | |||||
| }; | |||||
| // Simple helper to return {@code 10^n} for all valid sequence lengths. | |||||
| private static long pow10(int n) { | |||||
| return POWERS_OF_TEN[n]; | |||||
| } | |||||
| /** | |||||
| * Returns the domain in which phone number digit sequences exist. This is needed when creating | |||||
| * canonical {@link com.google.common.collect.Range Ranges} of digit-sequences. | |||||
| */ | |||||
| public static DiscreteDomain<DigitSequence> domain() { | |||||
| return Domain.INSTANCE; | |||||
| } | |||||
| private static final class Domain extends DiscreteDomain<DigitSequence> { | |||||
| private static final Domain INSTANCE = new Domain(); | |||||
| private static final DigitSequence MIN = EMPTY; | |||||
| private static final DigitSequence MAX = DigitSequence.of("999999999999999999"); | |||||
| @Override | |||||
| public DigitSequence next(DigitSequence num) { | |||||
| long next = num.value + 1; | |||||
| if (next < pow10(num.length)) { | |||||
| return new DigitSequence(num.length, next); | |||||
| } else { | |||||
| int len = num.length + 1; | |||||
| return (len <= MAX_DIGITS) ? new DigitSequence(len, 0) : null; | |||||
| } | |||||
| } | |||||
| @Override | |||||
| public DigitSequence previous(DigitSequence num) { | |||||
| long prev = num.value - 1; | |||||
| if (prev >= 0) { | |||||
| return new DigitSequence(num.length, prev); | |||||
| } else { | |||||
| int len = num.length - 1; | |||||
| return (len >= 0) ? new DigitSequence(len, pow10(len) - 1) : null; | |||||
| } | |||||
| } | |||||
| @Override | |||||
| public long distance(DigitSequence start, DigitSequence end) { | |||||
| // The indices get up to 19 digits but can't overflow Long.MAX_VALUE, so they can be safely | |||||
| // subtracted to get a signed long "distance" without risk of over-/under- flow. | |||||
| return end.index() - start.index(); | |||||
| } | |||||
| @Override | |||||
| public DigitSequence minValue() { | |||||
| return MIN; | |||||
| } | |||||
| @Override | |||||
| public DigitSequence maxValue() { | |||||
| return MAX; | |||||
| } | |||||
| } | |||||
| /** Returns the digit sequence of length one representing the given digit value. */ | |||||
| public static DigitSequence singleton(int digit) { | |||||
| Preconditions.checkArgument(0 <= digit && digit <= 9, "invalid digit value: %s", digit); | |||||
| return SINGLETON_DIGITS[digit]; | |||||
| } | |||||
| /** | |||||
| * Returns the empty digit sequence. This is useful in special cases where you need to build up | |||||
| * a digit sequence starting from nothing). | |||||
| */ | |||||
| public static DigitSequence empty() { | |||||
| return EMPTY; | |||||
| } | |||||
| /** Returns a digit sequence for the given string (e.g. "012345"). */ | |||||
| public static DigitSequence of(String digits) { | |||||
| Preconditions.checkArgument(digits.length() <= MAX_DIGITS, | |||||
| "Digit string too long: '%s'", digits); | |||||
| Preconditions.checkArgument(ASCII_DIGITS.matchesAllOf(digits), | |||||
| "Digit string contains non-digit characters: '%s'", digits); | |||||
| return digits.isEmpty() ? empty() : new DigitSequence(digits.length(), Long.parseLong(digits)); | |||||
| } | |||||
| /** | |||||
| * Returns a digit sequence of {@code length} containing only the digit '0'. This is useful when | |||||
| * performing range calculations to determine the smallest digit sequence in a block. | |||||
| */ | |||||
| public static DigitSequence zeros(int length) { | |||||
| return new DigitSequence(length, 0L); | |||||
| } | |||||
| /** | |||||
| * Returns a digit sequence of {@code length} containing only the digit '9'. This is useful when | |||||
| * performing range calculations to determine the largest digit sequence in a block. | |||||
| */ | |||||
| public static DigitSequence nines(int length) { | |||||
| return new DigitSequence(length, pow10(length) - 1); | |||||
| } | |||||
| // The overall length of the digit sequence, including any leading zeros. | |||||
| private final int length; | |||||
| // The decimal value of the digit sequence (excluding leading zeros, obviously). | |||||
| private final long value; | |||||
| // Cached toString() representation (toString() of DigitSequence is used in comparisons for | |||||
| // sorting to achieve lexicographical ordering, which means it gets churned a lot). | |||||
| @LazyInit | |||||
| private String toString; | |||||
| // Called directly from RangeSpecification. | |||||
| DigitSequence(int length, long value) { | |||||
| // Don't check for -ve length as this should never happen and will blow up in pow10() anyway. | |||||
| Preconditions.checkArgument(length <= MAX_DIGITS, | |||||
| "Digit sequence too long [%s digits]", length); | |||||
| // This should not happen unless there's a code error, so nice user messages aren't needed. | |||||
| Preconditions.checkArgument(value >= 0 && value < pow10(length)); | |||||
| this.length = length; | |||||
| this.value = value; | |||||
| } | |||||
| /** Returns if this sequence is empty (i.e. length == 0). */ | |||||
| public boolean isEmpty() { | |||||
| return length == 0; | |||||
| } | |||||
| /** Returns the length of this digit sequence. */ | |||||
| public int length() { | |||||
| return length; | |||||
| } | |||||
| /** | |||||
| * Returns the digit at index {@code n} in this digit sequence, starting from the most | |||||
| * significant digit. | |||||
| */ | |||||
| public int getDigit(int n) { | |||||
| Preconditions.checkElementIndex(n, length); | |||||
| return (int) (value / pow10(((length - 1) - n)) % 10); | |||||
| } | |||||
| /** | |||||
| * Returns the sub-sequence representing only the first {@code n} digits in this sequence. For | |||||
| * example, {@code "01234".first(3) == "012"}. | |||||
| */ | |||||
| public DigitSequence first(int n) { | |||||
| Preconditions.checkElementIndex(n, length); | |||||
| return new DigitSequence(n, value / pow10(length - n)); | |||||
| } | |||||
| /** | |||||
| * Returns the sub-sequence representing only the last {@code n} digits in this sequence. For | |||||
| * example, {@code "01234".last(3) == "234"}. | |||||
| */ | |||||
| public DigitSequence last(int n) { | |||||
| Preconditions.checkElementIndex(n, length); | |||||
| return new DigitSequence(n, value % pow10(n)); | |||||
| } | |||||
| /** | |||||
| * Returns a new sequence which extends this sequence by a single digit ({@code 0 <= digit <= 9}). | |||||
| */ | |||||
| public DigitSequence extendBy(int digit) { | |||||
| Preconditions.checkArgument(0 <= digit && digit <= 9); | |||||
| return new DigitSequence(length + 1, (10 * value) + digit); | |||||
| } | |||||
| /** Returns a new sequence which extends this sequence by the given value. */ | |||||
| public DigitSequence extendBy(DigitSequence n) { | |||||
| Preconditions.checkNotNull(n); | |||||
| return new DigitSequence(length + n.length, (pow10(n.length) * value) + n.value); | |||||
| } | |||||
| /** | |||||
| * Returns the digit sequence immediately after this one, or {@code null} if this is the | |||||
| * maximum value. | |||||
| */ | |||||
| public DigitSequence next() { | |||||
| return domain().next(this); | |||||
| } | |||||
| /** | |||||
| * Returns the digit sequence immediately before this one, or {@code null} if this is the | |||||
| * minimum value. | |||||
| */ | |||||
| public DigitSequence previous() { | |||||
| return domain().previous(this); | |||||
| } | |||||
| /** Returns the absolute index of this digit sequence within the integer domain. */ | |||||
| private long index() { | |||||
| return value + DOMAIN_OFFSET[length]; | |||||
| } | |||||
| @Override | |||||
| public int compareTo(DigitSequence other) { | |||||
| return Long.signum(index() - other.index()); | |||||
| } | |||||
| @Override | |||||
| public boolean equals(Object o) { | |||||
| return (o instanceof DigitSequence) && index() == ((DigitSequence) o).index(); | |||||
| } | |||||
| @Override | |||||
| public int hashCode() { | |||||
| return Long.hashCode(index()); | |||||
| } | |||||
| @Override | |||||
| public String toString() { | |||||
| // This little dance is required (according to the docs for the LazyInit annotation) for lazy | |||||
| // initialization of non-volatile fields (yes, that's a double init in a single statement). | |||||
| String localVar = toString; | |||||
| if (localVar == null) { | |||||
| toString = localVar = (length > 0 ? String.format("%0" + length + "d", value) : ""); | |||||
| } | |||||
| return localVar; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,65 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.Preconditions; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import java.util.Comparator; | |||||
| /** | |||||
| * A key for uniquely identifying number metadata for a region. For "geographical" regions, the | |||||
| * region code suffices to identify the range information, but for "non geographical" regions, the | |||||
| * calling code is required and the region is set to "UN001" (world). | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class MetadataKey implements Comparable<MetadataKey> { | |||||
| private static final Comparator<MetadataKey> ORDERING = | |||||
| Comparator.comparing(MetadataKey::region).thenComparing(MetadataKey::callingCode); | |||||
| /** | |||||
| * Returns a key to identify phone number data in the given region with the specified calling | |||||
| * code. Care must be taken when creating keys because it is possible to create invalid keys that | |||||
| * would not match any data (e.g. region="US", calling code="44"). | |||||
| */ | |||||
| public static MetadataKey create(PhoneRegion region, DigitSequence callingCode) { | |||||
| // Null checks and semantic checks. | |||||
| Preconditions.checkArgument(region.equals(PhoneRegion.getWorld()) | |||||
| || (region.toString().length() == 2 && !region.equals(PhoneRegion.getUnknown()))); | |||||
| Preconditions.checkArgument(!callingCode.isEmpty()); | |||||
| return new AutoValue_MetadataKey(region, callingCode); | |||||
| } | |||||
| /** | |||||
| * Returns the region for this key (this is {@link PhoneRegion#getWorld()} for non-geographical | |||||
| * regions). | |||||
| */ | |||||
| public abstract PhoneRegion region(); | |||||
| /** Returns the calling code for this key. */ | |||||
| public abstract DigitSequence callingCode(); | |||||
| @Override | |||||
| public int compareTo(MetadataKey other) { | |||||
| return ORDERING.compare(this, other); | |||||
| } | |||||
| // Used in human readable formatting during presubmit checks; be careful if you change it. | |||||
| @Override | |||||
| public final String toString() { | |||||
| return String.format("region=%s, calling code=+%s", region(), callingCode()); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,351 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.SetOperations; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| /** | |||||
| * A variation of a {@link RangeTree} which represents a set of prefixes (as opposed to a set of | |||||
| * ranges). While this implementation is backed by a {@code RangeTree} and has a similar serialized | |||||
| * representation, it is a deliberately distinct type and should not be thought of as a subset of | |||||
| * {@code RangeTree}. In particular, set operations are defined to work differently for | |||||
| * {@code PrefixTree} due to its differing semantics and some set operations (e.g. subtraction) are | |||||
| * not even well defined. | |||||
| */ | |||||
| public final class PrefixTree { | |||||
| private static final PrefixTree EMPTY = new PrefixTree(RangeTree.empty()); | |||||
| /** Returns the "empty" prefix tree, which matches no ranges. */ | |||||
| public static PrefixTree empty() { | |||||
| return EMPTY; | |||||
| } | |||||
| /** | |||||
| * Returns a prefix tree with the paths of the given ranges, trimmed to the earliest point of | |||||
| * termination. For example, the ranges {@code {"1[0-3]", "1234", "56x"}} will result in the | |||||
| * prefixes {@code {"1[0-3]", "56x"}}, since {@code "1[0-3]"} contains {@code "12"}, which is a | |||||
| * prefix of {@code "1234"}. | |||||
| */ | |||||
| public static PrefixTree from(RangeTree ranges) { | |||||
| return !ranges.isEmpty() | |||||
| ? new PrefixTree(removeTrailingAnyDigitPaths(TrimmingVisitor.trim(ranges))) | |||||
| : empty(); | |||||
| } | |||||
| /** | |||||
| * Returns a prefix tree containing all digit sequences in the given range specification. A | |||||
| * single range specification cannot overlap in the way that general range trees can, so unlike | |||||
| * {@link #from(RangeTree)}, this method will never throw {@code IllegalArgumentException}. | |||||
| */ | |||||
| public static PrefixTree from(RangeSpecification spec) { | |||||
| // Range specifications define ranges of a single length, so must always be a valid prefix. | |||||
| return from(RangeTree.from(spec)); | |||||
| } | |||||
| /** | |||||
| * Returns the minimal prefix tree which includes all the paths in "include", and none of the | |||||
| * paths in "exclude". For example: | |||||
| * <pre> {@code | |||||
| * minimal({ "123x", "456x" }, { "13xx", "459x" }, 0) == { "12", "456" } | |||||
| * minimal({ "123x", "456x" }, {}, 0) == { "" } | |||||
| * minimal({ "123x", "456x" }, {}, 1) == { "[14]" } | |||||
| * }</pre> | |||||
| * | |||||
| * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some | |||||
| * circumstances. | |||||
| * | |||||
| * <p>Caveat: In cases where the {@code include} and {@code exclude} ranges overlap, the shortest | |||||
| * possible prefix is chosen. For example: | |||||
| * <pre> {@code | |||||
| * minimal({ "12", "1234", "56" }, { "123", "5678" }) == { "12", "56" } | |||||
| * }</pre> | |||||
| * This means that it may not always be true that {@code minimal(A, B).intersect(minimal(B, A))} | |||||
| * is empty. | |||||
| */ | |||||
| public static PrefixTree minimal(RangeTree include, RangeTree exclude, int minLength) { | |||||
| checkArgument(include.intersect(exclude).isEmpty(), "ranges must be disjoint"); | |||||
| checkArgument(minLength >= 0, "invalid minimum prefix length: %s", minLength); | |||||
| PrefixTree prefix = PrefixTree.from(include); | |||||
| if (prefix.isEmpty()) { | |||||
| // This matches no input, not all input. | |||||
| return prefix; | |||||
| } | |||||
| // Ignore anything that the prefix already captures, since there's no point avoiding shortening | |||||
| // the prefix to avoid what's already overlapping. | |||||
| exclude = exclude.subtract(prefix.retainFrom(exclude)); | |||||
| // This can contain only the empty sequence (i.e. match all input) if the original include set | |||||
| // was something like "xxxxx". In that case the initial node is just the terminal. | |||||
| RangeTree minimal; | |||||
| DfaNode root = prefix.asRangeTree().getInitial(); | |||||
| if (prefix.isIdentity() || exclude.isEmpty()) { | |||||
| // Either we already accept anything, or there is nothing to exclude. | |||||
| minimal = emit(root, RangeSpecification.empty(), RangeTree.empty(), minLength); | |||||
| } else { | |||||
| minimal = recursivelyMinimize( | |||||
| root, RangeSpecification.empty(), exclude.getInitial(), RangeTree.empty(), minLength); | |||||
| } | |||||
| // No need to go via the static factory here, since that does a bunch of work we know cannot | |||||
| // be necessary. The range tree here is a subset of an already valid prefix tree, so cannot | |||||
| // contain "early terminating nodes" or "trailing any digit sequences". | |||||
| return new PrefixTree(minimal); | |||||
| } | |||||
| private final RangeTree ranges; | |||||
| private PrefixTree(RangeTree ranges) { | |||||
| // Caller is responsible for ensuring that the ranges conform to expectations of a prefix tree. | |||||
| this.ranges = ranges; | |||||
| } | |||||
| /** | |||||
| * Returns a {@link RangeTree} containing the same digit sequences as this prefix tree. Prefix | |||||
| * trees and range trees do not have the same semantics, but they do have the same serialized | |||||
| * form (i.e. to serialize a prefix tree, you can just serialize the corresponding range tree). | |||||
| */ | |||||
| public RangeTree asRangeTree() { | |||||
| return ranges; | |||||
| } | |||||
| /** | |||||
| * Returns whether this prefix tree is empty. Filtering a {@link RangeTree} by the empty prefix | |||||
| * tree always returns the empty range tree. The result of filtering a range tree is defined as | |||||
| * containing only digit sequences which are prefixed by some digit sequence in the prefix tree. | |||||
| * If the prefix tree is empty, no digit sequence can ever satisfy that requirement. | |||||
| */ | |||||
| public boolean isEmpty() { | |||||
| return ranges.isEmpty(); | |||||
| } | |||||
| /** | |||||
| * Returns whether this prefix tree matches any digit sequence. Filtering a {@link RangeTree} by | |||||
| * the identity prefix returns the original range tree. The result of filtering a range tree is | |||||
| * defined as containing only digit sequences which are prefixed by some digit sequence in the | |||||
| * prefix tree. The identity prefix tree contains the empty digit sequence, which is a prefix of | |||||
| * every digit sequence. | |||||
| */ | |||||
| public boolean isIdentity() { | |||||
| return !ranges.isEmpty() && ranges.getInitial().equals(RangeTree.getTerminal()); | |||||
| } | |||||
| /** Returns whether the given sequence would be retained by this prefix tree. */ | |||||
| public boolean prefixes(DigitSequence digits) { | |||||
| DfaNode node = ranges.getInitial(); | |||||
| for (int n = 0; n < digits.length(); n++) { | |||||
| DfaEdge e = node.find(digits.getDigit(n)); | |||||
| if (e == null) { | |||||
| break; | |||||
| } | |||||
| node = e.getTarget(); | |||||
| } | |||||
| return node.equals(RangeTree.getTerminal()); | |||||
| } | |||||
| /** | |||||
| * Returns a subset of the given ranges, containing only ranges which are prefixed by an | |||||
| * element in this prefix tree. For example: | |||||
| * <pre> {@code | |||||
| * RangeTree r = { "12xx", "1234x" } | |||||
| * PrefixTree p = { "12[0-5]" } | |||||
| * p.retainFrom(r) = { "12[0-5]x", "1234x"} | |||||
| * }</pre> | |||||
| * Note that if the prefix tree is empty, this method returns the empty range tree. | |||||
| */ | |||||
| public RangeTree retainFrom(RangeTree ranges) { | |||||
| return SetOperations.INSTANCE.retainFrom(this, ranges); | |||||
| } | |||||
| /** | |||||
| * Returns the union of two prefix trees. For prefix trees {@code p1}, {@code p2} and any range | |||||
| * tree {@code R}, the union {@code P = p1.union(p2)} is defined such that: | |||||
| * <pre> {@code | |||||
| * P.retainFrom(R) = p1.retainFrom(R).union(p2.retainFrom(R)) | |||||
| * }</pre> | |||||
| * If prefixes are the same length this is equivalent to {@link RangeTree#union(RangeTree)}, | |||||
| * but when prefixes overlap, only the more general (shorter) prefix is retained. | |||||
| */ | |||||
| public PrefixTree union(PrefixTree other) { | |||||
| return SetOperations.INSTANCE.union(this, other); | |||||
| } | |||||
| /** | |||||
| * Returns the intersection of two prefix trees. For prefix trees {@code p1}, {@code p2} and any | |||||
| * range tree {@code R}, the intersection {@code P = p1.intersect(p2)} is defined such that: | |||||
| * <pre> {@code | |||||
| * P.retainFrom(R) = p1.retainFrom(R).intersect(p2.retainFrom(R)) | |||||
| * }</pre> | |||||
| * If prefixes are the same length this is equivalent to {@link RangeTree#intersect(RangeTree)}, | |||||
| * but when prefixes overlap, only the more specific (longer) prefix is retained. | |||||
| */ | |||||
| public PrefixTree intersect(PrefixTree other) { | |||||
| return SetOperations.INSTANCE.intersect(this, other); | |||||
| } | |||||
| /** | |||||
| * Returns a prefix tree trimmed to at most {@code maxLength} digits. The returned value may be | |||||
| * shorter if, in the process of trimming, trailing edges are collapsed to "any digit" sequences. | |||||
| * For example: | |||||
| * <pre> {@code | |||||
| * { "12[0-4]5", "12[5-9]" }.trim(3) == "12" | |||||
| * { "7001", "70[1-9]", "7[1-9]" }.trim(3) == "7" | |||||
| * }</pre> | |||||
| */ | |||||
| public PrefixTree trim(int maxLength) { | |||||
| return PrefixTree.from( | |||||
| RangeTree.from( | |||||
| ranges.asRangeSpecifications().stream() | |||||
| .map(s -> s.first(maxLength)) | |||||
| .collect(toImmutableList()))); | |||||
| } | |||||
| @Override | |||||
| public int hashCode() { | |||||
| return ranges.hashCode(); | |||||
| } | |||||
| @Override | |||||
| public boolean equals(Object o) { | |||||
| return (o instanceof PrefixTree) && ranges.equals(((PrefixTree) o).ranges); | |||||
| } | |||||
| @Override | |||||
| public String toString() { | |||||
| return ranges.toString(); | |||||
| } | |||||
| private static final class TrimmingVisitor implements DfaVisitor { | |||||
| static RangeTree trim(RangeTree ranges) { | |||||
| if (ranges.isEmpty()) { | |||||
| return ranges; | |||||
| } | |||||
| if (ranges.getInitial().canTerminate()) { | |||||
| // Not the "empty range tree" (which matches no input), but the range tree containing the | |||||
| // empty range specification (which matches only the empty digit sequence). | |||||
| return RangeTree.from(RangeSpecification.empty()); | |||||
| } | |||||
| TrimmingVisitor v = new TrimmingVisitor(); | |||||
| ranges.accept(v); | |||||
| return RangeTree.from(v.paths); | |||||
| } | |||||
| private final List<RangeSpecification> paths = new ArrayList<>(); | |||||
| private RangeSpecification path = RangeSpecification.empty(); | |||||
| @Override | |||||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||||
| RangeSpecification oldPath = path; | |||||
| path = path.extendByMask(edge.getDigitMask()); | |||||
| if (target.canTerminate()) { | |||||
| paths.add(path); | |||||
| } else { | |||||
| target.accept(this); | |||||
| } | |||||
| path = oldPath; | |||||
| } | |||||
| } | |||||
| // Note: This is NOT as simple as just calling "getPrefix()" on each range specification because | |||||
| // ranges that are too short become problematic. Consider { "7[1-9]", "70x" } which should result | |||||
| // in "7". If we just call "getPrefix()" and merge, we end up with "7x". | |||||
| // | |||||
| // One way to fix this is by repeatedly creating prefix trees (removing trailing "any digit" | |||||
| // sequences) until it becomes stable. | |||||
| // | |||||
| // The other way (simpler) is to extend the length of any shorter range specifications to bring | |||||
| // them up to the max length before merging them. In the above example, we extend the length of | |||||
| // "7[1-9]" to "7[1-9]x" and merge it with "70x" to get "7xx", which can then have its prefix | |||||
| // extracted. | |||||
| private static RangeTree removeTrailingAnyDigitPaths(RangeTree ranges) { | |||||
| if (ranges.isEmpty()) { | |||||
| return ranges; | |||||
| } | |||||
| // Skip this if "ranges" matches only one length (since it would be a no-op). | |||||
| if (ranges.getLengths().size() > 1) { | |||||
| int length = ranges.getLengths().last(); | |||||
| ranges = ranges.map(s -> s.length() < length ? s.extendByLength(length - s.length()) : s); | |||||
| } | |||||
| // Having merged everything, we can now extract the correct prefixes as the final step. | |||||
| return ranges.map(RangeSpecification::getPrefix); | |||||
| } | |||||
| /** | |||||
| * Recursively determines the next level of prefix minimization. The algorithm follows as much | |||||
| * of the "included" path as possible (node), potentially splitting into several sub-recursive | |||||
| * steps if the current included edge overlaps with multiple "excluded" paths. Once a path no | |||||
| * longer overlaps with the exclude paths, it is added to the result. Paths are also added to | |||||
| * the result if they terminate while still overlapping the excluded paths. | |||||
| */ | |||||
| private static RangeTree recursivelyMinimize( | |||||
| DfaNode node, RangeSpecification path, DfaNode exclude, RangeTree minimal, int minLength) { | |||||
| for (DfaEdge edge : node.getEdges()) { | |||||
| int mask = edge.getDigitMask(); | |||||
| DfaNode target = edge.getTarget(); | |||||
| // This algorithm only operates on the DFA of a prefix tree (not a general range tree). As | |||||
| // such the only terminating node we can reach is the terminal node itself. If we hit that | |||||
| // from the current edge, just emit it and continue on to the next edge. | |||||
| if (target.equals(RangeTree.getTerminal())) { | |||||
| minimal = minimal.union(RangeTree.from(path.extendByMask(mask))); | |||||
| continue; | |||||
| } | |||||
| checkState(!target.canTerminate(), "invalid DFA state for prefix tree at: %s", path); | |||||
| // Otherwise recurse on every "exclude" path, using the intersection of the "include" and | |||||
| // "exclude" masks. Anything left on the include mask which didn't overlap any of excluded | |||||
| // edges can emitted. This also works at the end of the exclude paths (exclude == TERMINAL) | |||||
| // since that has no outgoing edges (so the entire include path is emitted). | |||||
| for (DfaEdge ex : exclude.getEdges()) { | |||||
| int m = ex.getDigitMask() & mask; | |||||
| if (m != 0) { | |||||
| mask &= ~m; | |||||
| minimal = | |||||
| recursivelyMinimize(target, path.extendByMask(m), ex.getTarget(), minimal, minLength); | |||||
| } | |||||
| } | |||||
| // The mask identifies edges which are now outside the exclude tree, and thus safe to emit. | |||||
| if (mask != 0) { | |||||
| // Emitting an included path may involve emitting some of the sub-tree below it in order | |||||
| // to make up the minimal length (we can't do this for the terminating case above). | |||||
| minimal = emit(target, path.extendByMask(mask), minimal, minLength); | |||||
| } | |||||
| } | |||||
| return minimal; | |||||
| } | |||||
| /** | |||||
| * Recursively visits the sub-tree under the given node, extending the path until it reaches the | |||||
| * minimum length before emitting it. | |||||
| */ | |||||
| private static RangeTree emit( | |||||
| DfaNode node, RangeSpecification path, RangeTree minimal, int minLength) { | |||||
| if (path.length() >= minLength || node.equals(RangeTree.getTerminal())) { | |||||
| minimal = minimal.union(RangeTree.from(path)); | |||||
| } else { | |||||
| for (DfaEdge e : node.getEdges()) { | |||||
| minimal = minimal.union( | |||||
| emit(e.getTarget(), path.extendByMask(e.getDigitMask()), minimal, minLength)); | |||||
| } | |||||
| } | |||||
| return minimal; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,752 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||||
| import static java.lang.Integer.numberOfLeadingZeros; | |||||
| import static java.lang.Integer.numberOfTrailingZeros; | |||||
| import com.google.common.collect.ContiguousSet; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.Iterables; | |||||
| import com.google.common.collect.Range; | |||||
| import com.google.common.collect.RangeSet; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Arrays; | |||||
| import java.util.Comparator; | |||||
| import java.util.Iterator; | |||||
| import java.util.List; | |||||
| import java.util.Set; | |||||
| /** | |||||
| * A compact representation of a disjoint set of ranges of digit sequences. This is a compact way | |||||
| * to represent one or many ranges of digit sequences which share the same length. Examples include: | |||||
| * <pre>{@code | |||||
| * "01234" --> the singleton range containing only the digit sequence "01234" | |||||
| * "012xx" --> the contiguous digit sequence range ["01200".."01299"] | |||||
| * "012[3-5]6xx" --> the disjoint set of contiguous digit sequence ranges | |||||
| * ["0123600".."0123699"], ["0124600".."0124699"], ["0125600".."0125699"] | |||||
| * }</pre> | |||||
| * Note that the sets of contiguous ranges defined by a {@code RangeSpecification} are always | |||||
| * mutually disjoint. | |||||
| * | |||||
| * <p>Range specifications have a natural prefix based lexicographical ordering (based on the | |||||
| * most-significant point at which a difference appears), but if you are comparing a disjoint set | |||||
| * of range specifications (e.g. from a {@link RangeTree}) then it can be more intuitive to use an | |||||
| * ordering based on the minimum digit sequence, but note this approach fails if the range | |||||
| * specifications can overlap (e.g. comparing "1xx" and "100"). | |||||
| */ | |||||
| public final class RangeSpecification implements Comparable<RangeSpecification> { | |||||
| /** The mask of all possible digits. */ | |||||
| public static final char ALL_DIGITS_MASK = (1 << 10) - 1; | |||||
| private static final RangeSpecification EMPTY = new RangeSpecification(""); | |||||
| /** Returns the empty range specification, which matches only the empty digit sequence. */ | |||||
| public static RangeSpecification empty() { | |||||
| return EMPTY; | |||||
| } | |||||
| /** Returns the range specification of length one which matches any of the given digits. */ | |||||
| public static RangeSpecification singleton(Iterable<Integer> digits) { | |||||
| int mask = 0; | |||||
| for (int digit : digits) { | |||||
| checkArgument(0 <= digit && digit <= 9, "bad digit value '%s'", digit); | |||||
| mask |= (1 << digit); | |||||
| } | |||||
| return new RangeSpecification(String.valueOf((char) mask)); | |||||
| } | |||||
| /** Returns a new range specification which matches only the given non-empty digit sequence. */ | |||||
| public static RangeSpecification from(DigitSequence s) { | |||||
| if (s.length() == 0) { | |||||
| return RangeSpecification.empty(); | |||||
| } | |||||
| char[] masks = new char[s.length()]; | |||||
| for (int n = 0; n < masks.length; n++) { | |||||
| masks[n] = (char) (1 << s.getDigit(n)); | |||||
| } | |||||
| return new RangeSpecification(new String(masks)); | |||||
| } | |||||
| /** Returns a new range specification which matches any digit sequence of the specified length. */ | |||||
| public static RangeSpecification any(int length) { | |||||
| checkArgument(length >= 0); | |||||
| if (length == 0) { | |||||
| return RangeSpecification.empty(); | |||||
| } | |||||
| char[] masks = new char[length]; | |||||
| Arrays.fill(masks, ALL_DIGITS_MASK); | |||||
| return new RangeSpecification(new String(masks)); | |||||
| } | |||||
| /** | |||||
| * Parses the string form of a range specification (e.g. "1234[57-9]xxx"). This must be | |||||
| * correctly formed, including having all ranges be well formed (e.g. not "[33]", "[3-3]" or | |||||
| * "[6-4]"). | |||||
| * | |||||
| * <p>Note that non-canonical ranges are permitted if the digits are in order (e.g. "[1234]", | |||||
| * "[4-5]" or "[0-9]" but not "[4321]"). The returned range specification is canonical (e.g. | |||||
| * {@code parse("12[34569]").toString() == "12[3-69]"}). | |||||
| * | |||||
| * <p>The empty string is parsed as the empty range specification. | |||||
| * | |||||
| * <p>The use of single ASCII underscores ("_") to group ranges and aid readability is supported | |||||
| * during parsing but is not retained in the parsed result (e.g. | |||||
| * {@code parse("12_34[5-8]_xxx_xxx").toString() == "1234[5-8]xxxxxx"}). Note that underscore may | |||||
| * not be present inside ranges (e.g. "1_4") or at the ends of the range (e.g. "123xxx_"). | |||||
| */ | |||||
| public static RangeSpecification parse(String s) { | |||||
| if (s.isEmpty()) { | |||||
| return empty(); | |||||
| } | |||||
| checkArgument(!s.startsWith("_") && !s.endsWith("_"), "cannot start/end with '_': %s", s); | |||||
| StringBuilder bitmasks = new StringBuilder(); | |||||
| boolean lastCharWasUnderscore = false; | |||||
| for (int n = 0; n < s.length(); n++) { | |||||
| char c = s.charAt(n); | |||||
| switch (c) { | |||||
| case '_': | |||||
| checkArgument(!lastCharWasUnderscore, "cannot have multiple '_' in a row: %s", s); | |||||
| lastCharWasUnderscore = true; | |||||
| // Continue the for-loop rather than breaking out the switch to avoid resetting the flag. | |||||
| continue; | |||||
| case 'x': | |||||
| bitmasks.append(ALL_DIGITS_MASK); | |||||
| break; | |||||
| case '[': | |||||
| n += 1; | |||||
| int end = s.indexOf(']', n); | |||||
| checkArgument(end != -1, "unclosed range in specification: %s", s); | |||||
| checkArgument(end > n, "empty range in specification: %s", s); | |||||
| bitmasks.append(parseRange(s, n, end)); | |||||
| n = end; | |||||
| break; | |||||
| default: | |||||
| checkArgument('0' <= c && c <= '9', | |||||
| "bad digit value '%s' in range specification: %s", c, s); | |||||
| bitmasks.append((char) (1 << (c - '0'))); | |||||
| break; | |||||
| } | |||||
| lastCharWasUnderscore = false; | |||||
| } | |||||
| return new RangeSpecification(bitmasks.toString()); | |||||
| } | |||||
| private static char parseRange(String s, int start, int end) { | |||||
| int mask = 0; | |||||
| for (int n = start; n < end;) { | |||||
| char c = s.charAt(n++); | |||||
| checkArgument('0' <= c && c <= '9', | |||||
| "bad digit value '%s' in range specification: %s", c, s); | |||||
| int shift = (c - '0'); | |||||
| // check that this bit and all above it are zero (to ensure correct ordering). | |||||
| checkArgument(mask >> shift == 0, "unordered range in specification: %s", s); | |||||
| if (n == end || s.charAt(n) != '-') { | |||||
| // Single digit not in a range. | |||||
| mask |= 1 << shift; | |||||
| continue; | |||||
| } | |||||
| n++; | |||||
| checkArgument(n < end, "unclosed range in specification: %s", s); | |||||
| c = s.charAt(n++); | |||||
| checkArgument('0' <= c && c <= '9', | |||||
| "bad digit value '%s' in range specification: %s", c, s); | |||||
| int rshift = (c - '0'); | |||||
| checkArgument(rshift > shift, "unordered range in specification: %s", s); | |||||
| // Set bits from shift to rshift inclusive (e.g. 11111 & ~11 = 11100). | |||||
| mask |= ((1 << (rshift + 1)) - 1) & ~((1 << shift) - 1); | |||||
| } | |||||
| return (char) mask; | |||||
| } | |||||
| /** | |||||
| * Returns the canonical representation of the given ranges. The number of range specifications | |||||
| * in the returned instance may be higher or lower than the number of given ranges. | |||||
| * <p> | |||||
| * NOTE: This is only used by RangeTree for generating a RangeTree from a RangeSet, and is not | |||||
| * suitable as a public API (one day we might generate the RangeTree directly and be able to | |||||
| * delete this code). | |||||
| */ | |||||
| static ImmutableList<RangeSpecification> from(RangeSet<DigitSequence> ranges) { | |||||
| List<RangeSpecification> specs = new ArrayList<>(); | |||||
| Set<Range<DigitSequence>> s = ranges.asRanges(); | |||||
| checkArgument(!s.isEmpty(), "empty range set not permitted"); | |||||
| // Make sure are ranges we use are canonicalized over the domain of DigitSequences (so Range | |||||
| // operations (e.g. isConnected()) work as expected. See Range for more on why this matters. | |||||
| Range<DigitSequence> cur = s.iterator().next().canonical(domain()); | |||||
| checkArgument(!cur.contains(DigitSequence.empty()), | |||||
| "empty digit sequence not permitted in range set"); | |||||
| for (Range<DigitSequence> next : Iterables.skip(ranges.asRanges(), 1)) { | |||||
| next = next.canonical(domain()); | |||||
| if (cur.isConnected(next)) { | |||||
| // Even though 'cur' and 'next' are both canonicalized, it's not guaranteed that they are | |||||
| // closed-open (singleton ranges are fully closed and any range containing the maximum | |||||
| // value must be closed. To "union" the two ranges we must also preserve the bound types. | |||||
| cur = Range.range( | |||||
| cur.lowerEndpoint(), cur.lowerBoundType(), | |||||
| next.upperEndpoint(), next.upperBoundType()) | |||||
| .canonical(domain()); | |||||
| continue; | |||||
| } | |||||
| addRangeSpecsOf(cur, specs); | |||||
| cur = next; | |||||
| } | |||||
| addRangeSpecsOf(cur, specs); | |||||
| return ImmutableList.sortedCopyOf(Comparator.comparing(RangeSpecification::min), specs); | |||||
| } | |||||
| /** Adds the canonical minimal range specifications for a single range to the given list. */ | |||||
| private static void addRangeSpecsOf(Range<DigitSequence> r, List<RangeSpecification> specs) { | |||||
| // Given range is already canonical but may span multiple lengths. It's easier to view this | |||||
| // as a contiguous set when finding first/last elements however to avoid worrying about bound | |||||
| // types. A contiguous set is not an expensive class to create. | |||||
| ContiguousSet<DigitSequence> s = ContiguousSet.create(r, domain()); | |||||
| DigitSequence start = s.first(); | |||||
| DigitSequence end = s.last(); | |||||
| while (start.length() < end.length()) { | |||||
| // Add <start> to "999..." for the current block length (the max domain value is all 9's). | |||||
| DigitSequence blockEnd = DigitSequence.nines(start.length()); | |||||
| addRangeSpecs(start, blockEnd, specs); | |||||
| // Reset the start to the next length up (i.e. the "000..." sequence that's one longer). | |||||
| start = blockEnd.next(); | |||||
| } | |||||
| // Finally and the range specs up to (and including) the end value. | |||||
| addRangeSpecs(start, end, specs); | |||||
| } | |||||
| // Adds canonical minimal range specifications for the range of same-length digit sequences. | |||||
| private static void addRangeSpecs( | |||||
| DigitSequence start, DigitSequence end, List<RangeSpecification> specs) { | |||||
| int length = start.length(); | |||||
| checkArgument(end.length() == length); | |||||
| // Masks contains a running total of the bitmasks we want to convert to RangeSpecifications. | |||||
| // As processing proceeds, the mask array is reused. This is because the prefix used for | |||||
| // successive range specifications is always a subset of the previous specifications and the | |||||
| // trailing part of the array always fills up with the range mask for 'x' (i.e. [0-9]). | |||||
| int[] masks = new int[length]; | |||||
| // Stage 1: | |||||
| // Starting from the last digit in the 'start' sequence, work up until we find something that | |||||
| // is not a '0'. This is the first digit that needs to be adjusted to create a range | |||||
| // specification covering it and the digits 'below' it. For example, the first specification | |||||
| // for the range ["1200".."9999"] is "1[2-9]xx". | |||||
| // Once a specification is emitted, the start value is adjusted to the next digit sequence | |||||
| // immediately above the end of the emitted range, so after emitting "1[2-9]xx", start="2000". | |||||
| // Once each range specification is emitted, we continue working 'up' the digit sequence until | |||||
| // the next calculated start value exceeds the 'end' of our range. This specification cannot | |||||
| // be emitted and signals the end of stage 1. | |||||
| setBitmasks(masks, start); | |||||
| for (int n = previousNon(0, start, length); n != -1; n = previousNon(0, start, n)) { | |||||
| int loDigit = start.getDigit(n); | |||||
| DigitSequence prefix = start.first(n); | |||||
| DigitSequence blockEnd = prefix.extendBy(DigitSequence.nines(length - n)); | |||||
| if (blockEnd.compareTo(end) > 0) { | |||||
| // The end of this block would exceed the end of the main range, so we must stop. | |||||
| break; | |||||
| } | |||||
| // The bitmasks we want is: | |||||
| // <first (n-1) digits of 'start'> [loDigit..9] <any digits mask...> | |||||
| masks[n] = bitmaskUpFrom(loDigit); | |||||
| fillBitmasksAfter(masks, n); | |||||
| specs.add(RangeSpecification.fromBitmasks(masks)); | |||||
| // Adjust the range start now we have emitted the range specification. | |||||
| start = blockEnd.next(); | |||||
| } | |||||
| // Stage 2: | |||||
| // Very similar to stage 1, but work up from the last digit in the 'end' sequence. The | |||||
| // difference now is that we look for the first digit that's not '9' and generate ranges that | |||||
| // go down to the start of the range, not up to the end. Thus for ["0000", "1299"] the first | |||||
| // specification generated is "1[0-2]xx", which is emitted at the end of the list. | |||||
| int midIdx = specs.size(); | |||||
| setBitmasks(masks, end); | |||||
| for (int n = previousNon(9, end, length); n != -1; n = previousNon(9, end, n)) { | |||||
| int hiDigit = end.getDigit(n); | |||||
| DigitSequence prefix = end.first(n); | |||||
| DigitSequence blockStart = prefix.extendBy(DigitSequence.zeros(length - n)); | |||||
| if (blockStart.compareTo(start) < 0) { | |||||
| // The start of this block would precede the start of the main range, so we must stop. | |||||
| break; | |||||
| } | |||||
| // The bitmasks we want is: | |||||
| // <first (n-1) digits of 'end'> [0..hiDigit] <any digits mask...> | |||||
| masks[n] = bitmaskDownFrom(hiDigit); | |||||
| fillBitmasksAfter(masks, n); | |||||
| specs.add(midIdx, RangeSpecification.fromBitmasks(masks)); | |||||
| // Adjust the range end now we have emitted the range specification. | |||||
| end = blockStart.previous(); | |||||
| } | |||||
| // Stage 3: Having emitted the first and last set of range specifications, it only remains to | |||||
| // emit the "center" specification in the middle of the list. This is special as neither bound | |||||
| // is the end of a block. In previous stages, all partial ranges are either "up to 9" or | |||||
| // "down to zero". For example: ["1234".."1789"] has the center range "1[3-6]xx", and | |||||
| // ["1234".."1345"] has no center range at all. | |||||
| if (start.compareTo(end) < 0) { | |||||
| // Find the last digit before start and end combine (ie, 1200, 1299 --> 12xx --> n=1). We | |||||
| // know that 'start' and 'end' are the same length and bound a range like: | |||||
| // <prefix> [X..Y] [000..999] | |||||
| // but X or Y could be 0 or 9 respectively (just not both). | |||||
| // | |||||
| // Note that we don't even both to test the first digit in the sequences because if 'start' | |||||
| // and 'end' span a full range (e.g. [000.999]) we can just use the same code to fill the | |||||
| // masks correctly anyway. | |||||
| int n = start.length(); | |||||
| while (--n > 0 && start.getDigit(n) == 0 && end.getDigit(n) == 9) {} | |||||
| // Bitwise AND the masks for [X..9] and [0..Y] to get the mask for [X..Y]. | |||||
| // Note that the "masks" array already contains the correct prefix digits up to (n-1). | |||||
| masks[n] = bitmaskUpFrom(start.getDigit(n)) & bitmaskDownFrom(end.getDigit(n)); | |||||
| fillBitmasksAfter(masks, n); | |||||
| specs.add(midIdx, RangeSpecification.fromBitmasks(masks)); | |||||
| } | |||||
| } | |||||
| // Sets the values in the given array to correspond to the digits in the given sequence. If a | |||||
| // range specification were made from the resulting array it would match only that digit sequence. | |||||
| private static void setBitmasks(int[] masks, DigitSequence s) { | |||||
| for (int n = 0; n < s.length(); n++) { | |||||
| masks[n] = 1 << s.getDigit(n); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Creates a range specification from a given array of integer masks. The Nth element of the | |||||
| * array corresponds to the Nth element in the range specification, and mask values must be | |||||
| * non-zero and have only bits 0 to 9 set. | |||||
| */ | |||||
| private static RangeSpecification fromBitmasks(int[] bitmasks) { | |||||
| checkArgument(bitmasks.length <= DigitSequence.MAX_DIGITS, | |||||
| "range specification too large"); | |||||
| StringBuilder s = new StringBuilder(bitmasks.length); | |||||
| s.setLength(bitmasks.length); | |||||
| for (int n = 0; n < bitmasks.length; n++) { | |||||
| int mask = bitmasks[n]; | |||||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask); | |||||
| s.setCharAt(n, (char) mask); | |||||
| } | |||||
| return new RangeSpecification(s.toString()); | |||||
| } | |||||
| // Fills the bitmasks after the given index with the "all digits" mask (i.e. matching [0-9]). | |||||
| // This can accept -1 as the index since it always pre-increments before using it. | |||||
| private static void fillBitmasksAfter(int[] masks, int n) { | |||||
| // Because of the iterative way the mask array is handled, we can stop filling when we hit | |||||
| // ALL_DIGITS_MASK because everything past that must already be filled. | |||||
| while (++n < masks.length && masks[n] != ALL_DIGITS_MASK) { | |||||
| masks[n] = ALL_DIGITS_MASK; | |||||
| } | |||||
| } | |||||
| // Starting at digit-N, returns the index of the nearest preceding digit that's not equal to the | |||||
| // given value (or -1 if no such digit exists). | |||||
| private static int previousNon(int digit, DigitSequence s, int n) { | |||||
| while (--n >= 0 && s.getDigit(n) == digit) {} | |||||
| return n; | |||||
| } | |||||
| /** Returns the bitmask for the range {@code [n-9]}. */ | |||||
| private static int bitmaskUpFrom(int n) { | |||||
| return (-1 << n) & ALL_DIGITS_MASK; | |||||
| } | |||||
| /** Returns the bitmask for the range {@code [0-n]}. */ | |||||
| private static int bitmaskDownFrom(int n) { | |||||
| return ALL_DIGITS_MASK >>> (9 - n); | |||||
| } | |||||
| // String containing one bitmasks per character (bits 0..9). | |||||
| private final String bitmasks; | |||||
| // Minimum and maximum sequences (inclusive) which span the ranges defined by this specification. | |||||
| // Caching this is deliberate, since we sort disjoint ranges using the minimum value. It might | |||||
| // not be so useful to cache the maximum value though. | |||||
| private final DigitSequence min; | |||||
| private final DigitSequence max; | |||||
| // Total number of sequences matched by this specification. | |||||
| private final long sequenceCount; | |||||
| private RangeSpecification(String bitmasks) { | |||||
| int length = bitmasks.length(); | |||||
| checkArgument(length <= DigitSequence.MAX_DIGITS, | |||||
| "Range specification too long (%s digits)", length); | |||||
| this.bitmasks = bitmasks; | |||||
| long minValue = 0; | |||||
| long maxValue = 0; | |||||
| long sequenceCount = 1; | |||||
| for (int n = 0; n < length; n++) { | |||||
| int mask = bitmasks.charAt(n); | |||||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "invalid bitmask: %s", mask); | |||||
| minValue = (minValue * 10) + numberOfTrailingZeros(mask); | |||||
| maxValue = (maxValue * 10) + (31 - numberOfLeadingZeros(mask)); | |||||
| sequenceCount *= Integer.bitCount(mask); | |||||
| } | |||||
| this.min = new DigitSequence(length, minValue); | |||||
| this.max = new DigitSequence(length, maxValue); | |||||
| this.sequenceCount = sequenceCount; | |||||
| } | |||||
| /** | |||||
| * Returns the number of digits that this specification can match. This is the length of all | |||||
| * digit sequences which can match this specification. | |||||
| */ | |||||
| public int length() { | |||||
| return bitmasks.length(); | |||||
| } | |||||
| /** Returns the smallest digit sequence matched by this range. */ | |||||
| public DigitSequence min() { | |||||
| return min; | |||||
| } | |||||
| /** Returns the largest digit sequence matched by this range. */ | |||||
| public DigitSequence max() { | |||||
| return max; | |||||
| } | |||||
| /** Returns the total number of digit sequences matched by (contained in) this specification. */ | |||||
| public long getSequenceCount() { | |||||
| return sequenceCount; | |||||
| } | |||||
| /** | |||||
| * Returns the bitmask of the Nth range in this specification. Bit-X (0<= X <= 9) corresponds to | |||||
| * the digit with value X. As every range in a specification must match at least one digit, this | |||||
| * mask can never be zero. | |||||
| */ | |||||
| public int getBitmask(int n) { | |||||
| return bitmasks.charAt(n); | |||||
| } | |||||
| /** | |||||
| * Returns whether the given digit sequence is in one of the ranges specified by this instance. | |||||
| * This is more efficient that obtaining the associated {@code RangeSet} and checking that. | |||||
| */ | |||||
| public boolean matches(DigitSequence digits) { | |||||
| if (digits.length() != length()) { | |||||
| return false; | |||||
| } | |||||
| for (int n = 0; n < length(); n++) { | |||||
| if ((bitmasks.charAt(n) & (1 << digits.getDigit(n))) == 0) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| // Returns the next sequence in forward order which is contained by a range defined by this | |||||
| // range specification, or null if none exists. The given sequence must not be matched by this | |||||
| // specification. | |||||
| private DigitSequence nextRangeStart(DigitSequence s) { | |||||
| // Easy length based checks (this is where the fact that range specification only define ranges | |||||
| // of the same length really simplifies things). | |||||
| if (s.length() < length()) { | |||||
| return min(); | |||||
| } else if (s.length() > length()) { | |||||
| return null; | |||||
| } | |||||
| // Algorithm: | |||||
| // 1) Find the highest digit that isn't in the corresponding bitmask for the range. | |||||
| // 2) Try and increase the digit value until it's inside the next available range. | |||||
| // 3) If that fails, move back up the sequence and increment the next digit up. | |||||
| // 4) Repeat until a digit can be adjusted to start a new range, or all digits are exhausted. | |||||
| // If all digits exhausted, the sequence was above all ranges in this specification. | |||||
| // Otherwise return a new sequence using the unchanged prefix of the original sequence, the | |||||
| // newly adjusted digit and the trailing digits of the minimal sequence. | |||||
| for (int n = 0; n < length(); n++) { | |||||
| int d = s.getDigit(n); | |||||
| int mask = bitmasks.charAt(n); | |||||
| if ((mask & (1 << d)) != 0) { | |||||
| continue; | |||||
| } | |||||
| while (true) { | |||||
| // Digit 'd' is either outside the range mask (first time though the loop) or inside a | |||||
| // range. Either way we want to find the next digit above it which is inside a range. | |||||
| // First increment 'd', and then find the next set bit in the mask at or above that point. | |||||
| // Not extra check is needed at the end of ranges because numberOfTrailingZeros(0)==32 | |||||
| // which neatly ensures that the new value of 'd' must be out-of-range. | |||||
| // If mask=[3-58]: d=1-->d'=3, d=4-->d'=5, d=5-->d'=8, d=8-->d'>9 | |||||
| d++; | |||||
| d += numberOfTrailingZeros(mask >>> d); | |||||
| if (d <= 9) { | |||||
| // Found the value of the largest digit which can be adjusted to start the next range. | |||||
| // Everything higher than this digit is the same as the original sequence and everything | |||||
| // lower that this digit is the same as the corresponding digit in the minimal value. | |||||
| return s.first(n).extendBy(d).extendBy(min.last((length() - n) - 1)); | |||||
| } | |||||
| // No more bits available in this range, so go back up to the previous range. | |||||
| if (--n < 0) { | |||||
| // The sequence was above the last element in the set. | |||||
| // Example: Range Spec: 1[2-8][3-8]456, Sequence: 188457 | |||||
| return null; | |||||
| } | |||||
| d = s.getDigit(n); | |||||
| mask = bitmasks.charAt(n); | |||||
| } | |||||
| } | |||||
| // If we finish the outer loop the given sequence was in a range (which is an error). | |||||
| throw new IllegalArgumentException( | |||||
| "Digit sequence '" + s + "' is in the range specified by: " + this); | |||||
| } | |||||
| // Given a sequence inside a range defined by this specification, return the highest sequence | |||||
| // in the current range (possibly just the given sequence). | |||||
| private DigitSequence currentRangeEnd(DigitSequence s) { | |||||
| // Build up a value representing the trailing digits (which must always be 9's). | |||||
| long nines = 0; | |||||
| for (int n = length() - 1; n >= 0; n--, nines = (10 * nines) + 9) { | |||||
| int mask = bitmasks.charAt(n); | |||||
| if (mask == ALL_DIGITS_MASK) { | |||||
| continue; | |||||
| } | |||||
| // The new digit is the top of the current range that the current sequence digit is in. | |||||
| int d = nextUnsetBit(mask, s.getDigit(n)) - 1; | |||||
| DigitSequence end = | |||||
| s.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines)); | |||||
| // Edge case for cases like "12[34][09]x" where "1239x" and "1240x" abut. This adjustment | |||||
| // will happen at most once because the second range cannot also include an upper bound | |||||
| // ending at '9', since otherwise (mask == ALL_DIGITS_MASK) at this position. The next | |||||
| // sequence must be terminated with zeros starting at the current position having "rolled | |||||
| // over" on the digit above. | |||||
| if (d == 9) { | |||||
| DigitSequence next = end.next(); | |||||
| if (matches(next)) { | |||||
| d = nextUnsetBit(mask, 0) - 1; | |||||
| end = next.first(n).extendBy(d).extendBy(new DigitSequence((length() - n) - 1, nines)); | |||||
| } | |||||
| } | |||||
| return end; | |||||
| } | |||||
| // The range specification is entirely 'x', which means it's a single range. | |||||
| return max; | |||||
| } | |||||
| /** | |||||
| * Returns a generating iterator which iterates in forward order over the disjoint ranges defined | |||||
| * by this specification. This is not actually as useful as you might expect because in a lot of | |||||
| * cases you would be dealing with a sequence of range specifications and it's not true that all | |||||
| * ranges from multiple specifications are disjoint. | |||||
| */ | |||||
| Iterable<Range<DigitSequence>> asRanges() { | |||||
| return () -> new Iterator<Range<DigitSequence>>() { | |||||
| // Start is always in a range. | |||||
| private DigitSequence start = min; | |||||
| @Override | |||||
| public boolean hasNext() { | |||||
| return start != null; | |||||
| } | |||||
| @Override | |||||
| public Range<DigitSequence> next() { | |||||
| DigitSequence end = currentRangeEnd(start); | |||||
| Range<DigitSequence> r = Range.closed(start, end).canonical(DigitSequence.domain()); | |||||
| start = nextRangeStart(end.next()); | |||||
| return r; | |||||
| } | |||||
| }; | |||||
| } | |||||
| /** | |||||
| * Returns a new range specification which is extended by the given mask value. For example: | |||||
| * <pre>{@code | |||||
| * "0123[4-6]".extendByMask(7) == "0123[4-6][0-2]" | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification extendByMask(int mask) { | |||||
| checkArgument(mask > 0 && mask <= ALL_DIGITS_MASK, "bad mask value '%s'", mask); | |||||
| return new RangeSpecification(bitmasks + ((char) mask)); | |||||
| } | |||||
| /** | |||||
| * Returns a new range specification which is extended by the given specification. For example: | |||||
| * <pre>{@code | |||||
| * "0123[4-6]".extendBy("7[89]") == "0123[4-6]7[89]" | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification extendBy(RangeSpecification extra) { | |||||
| return new RangeSpecification(bitmasks + extra.bitmasks); | |||||
| } | |||||
| /** | |||||
| * Returns a new range specification which is extended by a sequence of any digits of the given | |||||
| * length. For example: | |||||
| * <pre>{@code | |||||
| * "012".extendByLength(4) == "012xxxx" | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification extendByLength(int length) { | |||||
| return this.extendBy(any(length)); | |||||
| } | |||||
| /** | |||||
| * Returns a range specification containing only the first {@code n} digits. If the given length | |||||
| * is the same or greater than the specification's length, this specification is returned. | |||||
| * For example: | |||||
| * <pre>{@code | |||||
| * "01[2-4]xx".first(8) == "01[2-4]xx" (same instance) | |||||
| * "01[2-4]xx".first(5) == "01[2-4]xx" (same instance) | |||||
| * "01[2-4]xx".first(3) == "01[2-4]" | |||||
| * "01[2-4]xx".first(0) == "" (the empty specification) | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification first(int n) { | |||||
| checkArgument(n >= 0); | |||||
| if (n == 0) { | |||||
| return empty(); | |||||
| } | |||||
| return n < length() ? new RangeSpecification(bitmasks.substring(0, n)) : this; | |||||
| } | |||||
| /** | |||||
| * Returns a range specification containing only the last {@code n} digits. If the given length | |||||
| * is the same or greater than the specification's length, this specification is returned. | |||||
| * For example: | |||||
| * <pre>{@code | |||||
| * "01[2-4]xx".last(8) == "01[2-4]xx" (same instance) | |||||
| * "01[2-4]xx".last(5) == "01[2-4]xx" (same instance) | |||||
| * "01[2-4]xx".last(3) == "[2-4]xx" | |||||
| * "01[2-4]xx".last(0) == "" (the empty specification) | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification last(int n) { | |||||
| checkArgument(n >= 0); | |||||
| if (n == 0) { | |||||
| return empty(); | |||||
| } | |||||
| return n < length() ? new RangeSpecification(bitmasks.substring(length() - n)) : this; | |||||
| } | |||||
| /** | |||||
| * Returns a range specification with any trailing "any digit" sequence removed. For example: | |||||
| * <pre>{@code | |||||
| * "0123".getPrefix() == "0123" (same instance) | |||||
| * "0123xx".getPrefix() == "0123" | |||||
| * "xxx".getPrefix() == "" (the empty specification) | |||||
| * }</pre> | |||||
| */ | |||||
| public RangeSpecification getPrefix() { | |||||
| int length = length(); | |||||
| while (length > 0 && getBitmask(length - 1) == ALL_DIGITS_MASK) { | |||||
| length--; | |||||
| } | |||||
| return first(length); | |||||
| } | |||||
| @Override | |||||
| public int compareTo(RangeSpecification other) { | |||||
| int length = Math.min(length(), other.length()); | |||||
| for (int i = 0; i < length; i++) { | |||||
| int mask = getBitmask(i); | |||||
| int otherMask = other.getBitmask(i); | |||||
| if (mask == otherMask) { | |||||
| continue; | |||||
| } | |||||
| int commonBits = mask & otherMask; | |||||
| mask -= commonBits; | |||||
| otherMask -= commonBits; | |||||
| // At least one mask is still non-zero and they don't overlap. | |||||
| // | |||||
| // The mask with the lowest set bit is the smaller mask in the ordering, since that bit | |||||
| // distinguishes a smaller prefix than can never exist in the other specification. | |||||
| // Testing the number of trailing zeros is equivalent to finding the lowest set bit. | |||||
| return Integer.compare(numberOfTrailingZeros(mask), numberOfTrailingZeros(otherMask)); | |||||
| } | |||||
| return Integer.compare(length(), other.length()); | |||||
| } | |||||
| @Override | |||||
| public boolean equals(Object o) { | |||||
| return (o instanceof RangeSpecification) && bitmasks.equals(((RangeSpecification) o).bitmasks); | |||||
| } | |||||
| @Override | |||||
| public int hashCode() { | |||||
| return bitmasks.hashCode(); | |||||
| } | |||||
| /** | |||||
| * If you want lexicographical ordering of range specifications, don't use this method, use the | |||||
| * {@code min().toString()}. This works assuming the ranges being compared are disjoint. | |||||
| */ | |||||
| @Override | |||||
| public String toString() { | |||||
| // Consider caching if it turns out that we are serializing a lot of these. | |||||
| StringBuilder s = new StringBuilder(); | |||||
| for (int n = 0; n < bitmasks.length(); n++) { | |||||
| appendMask(bitmasks.charAt(n), s); | |||||
| } | |||||
| return s.toString(); | |||||
| } | |||||
| /** Returns the string representation of a single bit-mask. */ | |||||
| public static String toString(int bitMask) { | |||||
| checkArgument(bitMask > 0 && bitMask < (1 << 10), "bad mask value: %s", bitMask); | |||||
| return appendMask(bitMask, new StringBuilder()).toString(); | |||||
| } | |||||
| static StringBuilder appendMask(int mask, StringBuilder out) { | |||||
| if (mask == ALL_DIGITS_MASK) { | |||||
| out.append('x'); | |||||
| } else if (hasOneBit(mask)) { | |||||
| out.append(asChar(numberOfTrailingZeros(mask))); | |||||
| } else { | |||||
| out.append('['); | |||||
| for (int loBit = numberOfTrailingZeros(mask); | |||||
| loBit != 32; | |||||
| loBit = numberOfTrailingZeros(mask)) { | |||||
| // Always append the loBit digit into the range. | |||||
| out.append(asChar(loBit)); | |||||
| int hiBit = nextUnsetBit(mask, loBit); | |||||
| int numBits = hiBit - loBit; | |||||
| if (numBits > 1) { | |||||
| // Stylistically prefer "[34]" to "[3-4]" for compactness. | |||||
| if (numBits > 2) { | |||||
| out.append('-'); | |||||
| } | |||||
| out.append(asChar(hiBit - 1)); | |||||
| } | |||||
| // Clear the bits we've just processed before going back round the loop. | |||||
| mask &= ~((1 << hiBit) - 1); | |||||
| } | |||||
| out.append(']'); | |||||
| } | |||||
| return out; | |||||
| } | |||||
| // Turns a value in the range [0-9] into the corresponding ASCII character. | |||||
| private static char asChar(int digit) { | |||||
| return (char) ('0' + digit); | |||||
| } | |||||
| // Determines if the given bit-mask has only one bit set. | |||||
| private static boolean hasOneBit(int mask) { | |||||
| return (mask & (mask - 1)) == 0; | |||||
| } | |||||
| private static int nextUnsetBit(int mask, int bit) { | |||||
| // Example mask transform for [013-589] if bit=3: | |||||
| // v-- bit=3 | |||||
| // 01100111011 | |||||
| // 00000000111 (1 << 3) - 1 | |||||
| // 01100111111 OR with mask | |||||
| // 10011000000 Bitwise NOT | |||||
| // ^-- return=6 | |||||
| return numberOfTrailingZeros(~(mask | ((1 << bit) - 1))); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,194 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| /** | |||||
| * Factor a range tree into a sequence of trees which attempts to minimize overall complexity in | |||||
| * the face of non-determinism. This can be used to reduce the size of any generated regular | |||||
| * expressions. | |||||
| */ | |||||
| public final class RangeTreeFactorizer { | |||||
| /** Strategies to control how merging is achieved when building factors.*/ | |||||
| public enum MergeStrategy { | |||||
| /** | |||||
| * Edges are only merged if they accept exactly the same set of digits. If the existing factor | |||||
| * contains "[0-5]" it will not be merged with the candidate edge "[0-8]". | |||||
| */ | |||||
| REQUIRE_EQUAL_EDGES, | |||||
| /** | |||||
| * Edges can be merged if the candidate edge accepts more digits than the existing edge. If the | |||||
| * existing factor contains "[0-5]" and the candidate edge is "[0-8]", the candidate edge is | |||||
| * split so that "[0-5]" is merged as normal and an additional edge "[6-8]" is branched off. | |||||
| */ | |||||
| ALLOW_EDGE_SPLITTING, | |||||
| } | |||||
| /** | |||||
| * Factors the given range tree. | |||||
| * <p> | |||||
| * Paths are processed longest-first, and a path belongs in particular "factor" if it can be | |||||
| * added without "causing a split" in the existing factor. For example, given an existing factor | |||||
| * {@code {"12[3-6]x", "45xx"}}: | |||||
| * <ul> | |||||
| * <li>The path "12[3-6]" can be added, since it is a prefix of one of the existing paths in | |||||
| * the DFA. | |||||
| * <li>The path "13xx" can be added since it forms a new branch in the DFA, which does not | |||||
| * affect any existing branches ("13..." is disjoint with "12..."). | |||||
| * <li>The path "12[34]" cannot be added since it would "split" the existing path | |||||
| * "12[3-6]x" in the DFA ("[34]" is a subset of "[3-6]"). " | |||||
| * <li>Depending on the merge strategy, the path "12[0-6]x" might be added ("[0-6]" is a | |||||
| * superset of "[3-6]"). See {@link MergeStrategy} for more information. | |||||
| * </ul> | |||||
| */ | |||||
| public static ImmutableList<RangeTree> factor(RangeTree ranges, MergeStrategy strategy) { | |||||
| // If only one length on all paths, the DFA is already "factored". | |||||
| if (ranges.getLengths().size() == 1) { | |||||
| return ImmutableList.of(ranges); | |||||
| } | |||||
| List<RangeTree> factors = new ArrayList<>(); | |||||
| // Start with the "naive" factors (splitting by length) from longest to shortest. | |||||
| for (int n : ranges.getLengths().descendingSet()) { | |||||
| factors.add(ranges.intersect(RangeTree.from(RangeSpecification.any(n)))); | |||||
| } | |||||
| // Now attempt to merge as much of each of the shorter factors as possible into the longer ones. | |||||
| // In each loop we subsume a candidate factor into previous factors, either in whole or in part. | |||||
| int index = 1; | |||||
| while (index < factors.size()) { | |||||
| // Merge (as much as possible) each "naive" factor into earlier factors. | |||||
| RangeTree r = factors.get(index); | |||||
| for (int n = 0; n < index && !r.isEmpty(); n++) { | |||||
| RangeTree merged = new RangeTreeFactorizer(factors.get(n), strategy).mergeFrom(r); | |||||
| factors.set(n, merged); | |||||
| // Calculate the ranges which haven't yet been merged into any earlier factor. | |||||
| r = r.subtract(merged); | |||||
| } | |||||
| if (r.isEmpty()) { | |||||
| // All ranges merged, so remove the original factor (index now references the next factor). | |||||
| factors.remove(index); | |||||
| } else { | |||||
| // We have some un-factorable ranges which are kept to start a new factor. | |||||
| factors.set(index, r); | |||||
| index++; | |||||
| } | |||||
| } | |||||
| return ImmutableList.copyOf(factors); | |||||
| } | |||||
| // This is modified as paths are added. | |||||
| private RangeTree factor; | |||||
| private final MergeStrategy strategy; | |||||
| RangeTreeFactorizer(RangeTree factor, MergeStrategy strategy) { | |||||
| this.factor = checkNotNull(factor); | |||||
| this.strategy = strategy; | |||||
| } | |||||
| RangeTree mergeFrom(RangeTree ranges) { | |||||
| recursivelyMerge(ranges.getInitial(), factor.getInitial(), RangeSpecification.empty()); | |||||
| return factor; | |||||
| } | |||||
| void recursivelyMerge(DfaNode srcNode, DfaNode dstNode, RangeSpecification path) { | |||||
| if (srcNode.canTerminate()) { | |||||
| factor = factor.union(RangeTree.from(path)); | |||||
| } else { | |||||
| srcNode.accept(new FactoringVisitor(dstNode, path)); | |||||
| } | |||||
| } | |||||
| private final class FactoringVisitor implements DfaVisitor { | |||||
| private final RangeSpecification path; | |||||
| private final DfaNode dstNode; | |||||
| // True if we encountered a situation when an edge we are merging (srcMask) has a partial | |||||
| // overlap with the existing edge (dstMask) (e.g. merging "[0-6]" into "[4-9]"). This is | |||||
| // distinct from the case where the existing edge is a subset of the edge being merged (e.g. | |||||
| // merging "[0-6]" into "[2-4]", where the edge being merged can be split into "[0156]" and | |||||
| // "[2-4]"). In either strategy, a partial overlap will prevent merging. | |||||
| private boolean partialOverlap = false; | |||||
| // Records the union of all edge ranges visited for the current node. This is used to determine | |||||
| // the remaining edges that must be added after visiting the existing factor (especially in the | |||||
| // case of ALLOW_EDGE_SPLITTING). | |||||
| private int allDstMask = 0; | |||||
| FactoringVisitor(DfaNode dstNode, RangeSpecification path) { | |||||
| this.dstNode = dstNode; | |||||
| this.path = path; | |||||
| } | |||||
| @Override | |||||
| public void visit(DfaNode source, DfaEdge srcEdge, DfaNode srcTarget) { | |||||
| int srcMask = srcEdge.getDigitMask(); | |||||
| dstNode.accept((s, dstEdge, dstTarget) -> { | |||||
| int dstMask = dstEdge.getDigitMask(); | |||||
| if ((strategy == REQUIRE_EQUAL_EDGES) ? (dstMask == srcMask) : (dstMask & ~srcMask) == 0) { | |||||
| // The set of digits accepted by the edge being merged (mask) is equal-to or a superset | |||||
| // of the digits of the edge in the factor we are merging into. The path is extended by | |||||
| // the destination edge because during recursion we only follow paths already in the | |||||
| // factor. | |||||
| recursivelyMerge(srcTarget, dstTarget, path.extendByMask(dstMask)); | |||||
| } else { | |||||
| partialOverlap |= (dstMask & srcMask) != 0; | |||||
| } | |||||
| allDstMask |= dstMask; | |||||
| }); | |||||
| if (!partialOverlap) { | |||||
| // Work out the digits that weren't in any of the edges of the factor we were processing | |||||
| // and merge the sub-tree under that edge into the current factor. For REQUIRE_EQUAL_EDGES | |||||
| // the extraMask is always either srcMask or 0 (since the edge was either added in full, | |||||
| // or disjoint with all the existing edges). For ALLOW_EDGE_SPLITTING it's the remaining | |||||
| // range that wasn't merged with any of the existing paths. | |||||
| int extraMask = srcMask & ~allDstMask; | |||||
| if (extraMask != 0) { | |||||
| new MergingVisitor(path).recurse(srcTarget, extraMask); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| private final class MergingVisitor implements DfaVisitor { | |||||
| private final RangeSpecification path; | |||||
| MergingVisitor(RangeSpecification path) { | |||||
| this.path = checkNotNull(path); | |||||
| } | |||||
| void recurse(DfaNode node, int mask) { | |||||
| RangeSpecification newPath = path.extendByMask(mask); | |||||
| if (node.canTerminate()) { | |||||
| factor = factor.union(RangeTree.from(newPath)); | |||||
| } else { | |||||
| node.accept(new MergingVisitor(newPath)); | |||||
| } | |||||
| } | |||||
| @Override | |||||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||||
| recurse(target, edge.getDigitMask()); | |||||
| } | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,112 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.base.CaseFormat.LOWER_CAMEL; | |||||
| import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PAGER; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PERSONAL_NUMBER; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UAN; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOICEMAIL; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.VOIP; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PAGER; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PERSONAL_NUMBER; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_PREMIUM_RATE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_SHARED_COST; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_TOLL_FREE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UAN; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOICEMAIL; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_VOIP; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.common.collect.ImmutableBiMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.Stream; | |||||
| /** Static utility for conversion of number types. */ | |||||
| public final class Types { | |||||
| private static final ImmutableBiMap<String, XmlNumberType> XML_TYPE_MAP = | |||||
| Stream.of(XmlNumberType.values()) | |||||
| .filter(t -> t != XML_UNKNOWN && t != XmlNumberType.UNRECOGNIZED) | |||||
| .collect(toImmutableBiMap(Types::toXmlName, identity())); | |||||
| // Map the subset of XmlNumberType values which correspond to valid number types. Note that while | |||||
| // FIXED_LINE and MOBILE exist in both types, and can be converted, their semantics change. | |||||
| private static final ImmutableBiMap<XmlNumberType, ValidNumberType> XML_TO_SCHEMA_TYPE_MAP = | |||||
| ImmutableBiMap.<XmlNumberType, ValidNumberType>builder() | |||||
| .put(XML_FIXED_LINE, FIXED_LINE) | |||||
| .put(XML_MOBILE, MOBILE) | |||||
| .put(XML_PAGER, PAGER) | |||||
| .put(XML_TOLL_FREE, TOLL_FREE) | |||||
| .put(XML_PREMIUM_RATE, PREMIUM_RATE) | |||||
| .put(XML_SHARED_COST, SHARED_COST) | |||||
| .put(XML_PERSONAL_NUMBER, PERSONAL_NUMBER) | |||||
| .put(XML_VOIP, VOIP) | |||||
| .put(XML_UAN, UAN) | |||||
| .put(XML_VOICEMAIL, VOICEMAIL) | |||||
| .build(); | |||||
| /** Returns the set of valid XML type names. */ | |||||
| public static ImmutableSet<String> getXmlNames() { | |||||
| return XML_TYPE_MAP.keySet(); | |||||
| } | |||||
| /** Returns the XML element name based on the given XML range type. */ | |||||
| public static String toXmlName(XmlNumberType type) { | |||||
| checkState(type.name().startsWith("XML_"), "Bad type: %s", type); | |||||
| return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(4)); | |||||
| } | |||||
| /** Returns the XML element name based on the given XML shortcode type. */ | |||||
| public static String toXmlName(XmlShortcodeType type) { | |||||
| checkState(type.name().startsWith("SC_"), "Bad type: %s", type); | |||||
| return UPPER_UNDERSCORE.to(LOWER_CAMEL, type.name().substring(3)); | |||||
| } | |||||
| /** | |||||
| * Returns the XML range type based on the given case-sensitive XML element name (e.g. | |||||
| * "fixedLine"). | |||||
| */ | |||||
| public static Optional<XmlNumberType> forXmlName(String xmlName) { | |||||
| return Optional.ofNullable(XML_TYPE_MAP.get(xmlName)); | |||||
| } | |||||
| /** Returns the {@code ValidNumberType} equivalent of the given XML range type (if it exists). */ | |||||
| public static Optional<ValidNumberType> toSchemaType(XmlNumberType rangeType) { | |||||
| return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.get(rangeType)); | |||||
| } | |||||
| /** Returns the {@code XmlNumberType} equivalent of the given schema range type (if it exists). */ | |||||
| public static Optional<XmlNumberType> toXmlType(ValidNumberType schemaType) { | |||||
| return Optional.ofNullable(XML_TO_SCHEMA_TYPE_MAP.inverse().get(schemaType)); | |||||
| } | |||||
| private Types() {} | |||||
| } | |||||
| @ -0,0 +1,99 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static java.util.Comparator.comparing; | |||||
| import static java.util.Comparator.naturalOrder; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.ibm.icu.util.ULocale; | |||||
| import java.util.Comparator; | |||||
| import java.util.regex.Pattern; | |||||
| /** | |||||
| * A simple type-safe identifier for CLDR regions for phone numbers. Only basic checking of regions | |||||
| * is performed, but this should be fine since the set of input regions is tightly controlled. | |||||
| * | |||||
| * <p>The metadata tooling makes only minimal use of the semantics of region codes, relying on | |||||
| * them mainly as key values, and never tries to canonicalize or modify them. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class PhoneRegion implements Comparable<PhoneRegion> { | |||||
| // We limit the non XX region codes to just "world" for this project. | |||||
| private static final Pattern VALID_CODE = Pattern.compile("[A-Z]{2}|001"); | |||||
| // Since we want "ZZ" < "001" in the ordering. | |||||
| private static Comparator<PhoneRegion> ORDERING = | |||||
| comparing(r -> r.locale().getCountry(), | |||||
| comparing(String::length).thenComparing(naturalOrder())); | |||||
| private static final PhoneRegion UNKNOWN = of("ZZ"); | |||||
| private static final PhoneRegion WORLD = of("001"); | |||||
| /** Returns the "world" region (001). */ | |||||
| public static PhoneRegion getWorld() { | |||||
| return PhoneRegion.WORLD; | |||||
| } | |||||
| /** Returns the "unknown" region (ZZ). */ | |||||
| public static PhoneRegion getUnknown() { | |||||
| return PhoneRegion.UNKNOWN; | |||||
| } | |||||
| /** | |||||
| * Returns the region identified by the given case-insensitive CLDR String representation. | |||||
| * | |||||
| * @throws IllegalArgumentException if there is no region for {@code cldrCode} | |||||
| */ | |||||
| public static PhoneRegion of(String cldrCode) { | |||||
| checkArgument(VALID_CODE.matcher(cldrCode).matches(), "invalid region code: %s", cldrCode); | |||||
| return new AutoValue_PhoneRegion(new ULocale.Builder().setRegion(cldrCode).build()); | |||||
| } | |||||
| @Override | |||||
| public int compareTo(PhoneRegion other) { | |||||
| return ORDERING.compare(this, other); | |||||
| } | |||||
| /** Returns the string representation for the region (either a two-letter or three-digit code). */ | |||||
| @Override public final String toString() { | |||||
| String s = locale().getCountry(); | |||||
| checkArgument(!s.isEmpty(), "invalid (empty) country: %s", locale()); | |||||
| return s; | |||||
| } | |||||
| // Visible for AutoValue only. | |||||
| abstract ULocale locale(); | |||||
| /** | |||||
| * Return an English identifier for the region in the form {@code "<region name> (<cldr code>)"}. | |||||
| * If the English name is not available, then {@code "Region: <cldr code>"} is returned. This | |||||
| * This string is only suitable for use in comments. | |||||
| * | |||||
| * @throws IllegalStateException if this method is called on the "world" region. | |||||
| */ | |||||
| public String getEnglishNameForXmlComments() { | |||||
| checkState(!equals(getWorld()), "cannot ask for display name of 'world' region"); | |||||
| String regionStr = locale().getCountry(); | |||||
| // Use "US" so we get "en_US", and not just "en", since the policy is to use the name as it | |||||
| // would appear in America. | |||||
| String displayCountry = locale().getDisplayCountry(ULocale.US); | |||||
| return !displayCountry.isEmpty() && !displayCountry.equals(regionStr) | |||||
| ? String.format("%s (%s)", displayCountry, regionStr) | |||||
| : String.format("Region: %s", regionStr); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,60 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import java.util.regex.Pattern; | |||||
| /** | |||||
| * A simple type-safe identifier for BCP 47 language tags containing only language code and an | |||||
| * optional script (e.g. "en" or "zh-Hant"). This class does no canonicalization on the values its | |||||
| * given, apart from normalizing the separator to a hyphen. | |||||
| * | |||||
| * <p>We can't really use {@code Locale} here because there's an issue whereby the JDK deliberately | |||||
| * uses deprecated language tags and would, for example, convert "id" (Indonesian) to "in", which | |||||
| * is at odds with BCP 47. See {@link java.util.Locale#forLanguageTag(String) forLanguageTag()} for | |||||
| * more information. | |||||
| * | |||||
| * <p>The metadata tooling makes only minimal use of the semantics of language codes, relying on | |||||
| * them mainly as key values, and never tries to canonicalize or modify them (i.e. it is possible | |||||
| * that a language code used for this data may end up being non-canonical). It is up to any library | |||||
| * which loads the metadata at runtime to ensure that its mappings to the data account for current | |||||
| * canonicalization. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class SimpleLanguageTag { | |||||
| // This can be extended or modified to use Locale as necessary. | |||||
| private static final Pattern SIMPLE_TAG = Pattern.compile("[a-z]{2,3}(?:[-_][A-Z][a-z]{3})?"); | |||||
| /** | |||||
| * Returns a language tag instance for the given string with minimal structural checking. If the | |||||
| * given tag uses {@code '_'} for separating language and script it's converted into {@code '-'}. | |||||
| */ | |||||
| public static SimpleLanguageTag of(String lang) { | |||||
| checkArgument(SIMPLE_TAG.matcher(lang).matches(), "invalid language tag: %s", lang); | |||||
| return new AutoValue_SimpleLanguageTag(lang.replace('_', '-')); | |||||
| } | |||||
| // Visible for AutoValue only. | |||||
| abstract String lang(); | |||||
| @Override | |||||
| public final String toString() { | |||||
| return lang(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,94 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.auto.value.extension.memoized.Memoized; | |||||
| import com.google.common.base.Ascii; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatGroup; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||||
| import java.util.Optional; | |||||
| /** | |||||
| * An alternate format, used to describe less common ways we believe a phone number can be | |||||
| * formatted in a region. These can be derived from an "alias" in the formats table, or as | |||||
| * "historical" formats which are not associated with any specific current format. | |||||
| * | |||||
| * <p>Note that alternate formats can be defined with the same template, and they are merged | |||||
| * together to produce a canonical map in which the format template is the key. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class AltFormatSpec { | |||||
| private static final CharMatcher OPT_DIGIT = CharMatcher.is('*'); | |||||
| private static final CharMatcher ANY_DIGIT = CharMatcher.is('X'); | |||||
| private static final CharMatcher ALLOWED_TEMPLATE_CHARS = CharMatcher.anyOf("X* "); | |||||
| public static AltFormatSpec create( | |||||
| FormatTemplate template, RangeSpecification prefix, String parent, Optional<String> comment) { | |||||
| // As only a limited set of chars is allowed, we know things like national prefix or carrier | |||||
| // codes cannot be present. We're just interested in basic grouping like "XXX XXX**". | |||||
| String spec = template.getSpecifier(); | |||||
| checkArgument(ALLOWED_TEMPLATE_CHARS.matchesAllOf(spec) && !template.getXmlPrefix().isPresent(), | |||||
| "invalid alternate format template: %s", template); | |||||
| // Prefix must be shorter than the template and not contain any trailing 'x'. | |||||
| checkArgument(prefix.length() <= template.minLength() && prefix.equals(prefix.getPrefix()), | |||||
| "invalid prefix '%s' for alternate format template: %s", prefix, template); | |||||
| // If variable length, the spec must have room for the prefix before the '*' characters. | |||||
| checkArgument( | |||||
| OPT_DIGIT.matchesNoneOf(spec) | |||||
| || prefix.length() <= ANY_DIGIT.countIn(spec.substring(0, OPT_DIGIT.indexIn(spec))), | |||||
| "invalid prefix '%s' for alternate format template: %s", prefix, template); | |||||
| return new AutoValue_AltFormatSpec(template, prefix, parent, comment); | |||||
| } | |||||
| /** Return the alternate format template containing only simple grouping (e.g. "XXX XXX**"). */ | |||||
| public abstract FormatTemplate template(); | |||||
| /** | |||||
| * Returns the prefix for this alternate format which (along with the template length) defines | |||||
| * the bounds over which this format can apply based. | |||||
| */ | |||||
| public abstract RangeSpecification prefix(); | |||||
| /** Returns the ID of the format for which this specifier is an alternative. */ | |||||
| public abstract String parentFormatId(); | |||||
| /** Returns the arbitrary comment, possibly containing newlines, for this format. */ | |||||
| public abstract Optional<String> comment(); | |||||
| /** Returns the format specifier as used in the CSV representation (e.g. "20 XXX XXX"). */ | |||||
| @Memoized | |||||
| public String specifier() { | |||||
| RangeSpecification prefix = prefix(); | |||||
| int digitIdx = 0; | |||||
| StringBuilder buf = new StringBuilder(); | |||||
| for (FormatGroup g : template().getGroups()) { | |||||
| for (int i = 0; i < g.maxLength(); i++, digitIdx++) { | |||||
| // Uppercasing is so that 'x' --> 'X' | |||||
| buf.append(digitIdx < prefix.length() | |||||
| ? Ascii.toUpperCase(RangeSpecification.toString(prefix.getBitmask(digitIdx))) | |||||
| : (i < g.minLength() ? "X" : "*")); | |||||
| } | |||||
| buf.append(" "); | |||||
| } | |||||
| buf.setLength(buf.length() - 1); | |||||
| return buf.toString(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,146 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.CharMatcher.whitespace; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.common.annotations.VisibleForTesting; | |||||
| import com.google.common.base.Ascii; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector; | |||||
| import java.io.BufferedReader; | |||||
| import java.io.IOException; | |||||
| import java.io.Reader; | |||||
| import java.io.Writer; | |||||
| import java.nio.file.Files; | |||||
| import java.nio.file.Path; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.function.Consumer; | |||||
| import java.util.function.Function; | |||||
| import java.util.function.Supplier; | |||||
| import java.util.stream.Stream; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * The schema of the "AltFormats" table with rows identified by an "alternate format specifier": | |||||
| * <ol> | |||||
| * <li>{@link #PARENT}: The ID of the "main" format that this is an alternate of. | |||||
| * <li>{@link #COMMENT}: Freeform comment text. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||||
| * <ol> | |||||
| * <li>{@code Format}: The alternate format specifier including prefix and grouping information | |||||
| * (e.g. "20 XXXX XXXX"). | |||||
| * </ol> | |||||
| */ | |||||
| public final class AltFormatsSchema { | |||||
| private static final String FORMAT = "Format"; | |||||
| private static final String PARENT = "Parent Format"; | |||||
| private static final String COMMENT = "Comment"; | |||||
| public static final ImmutableList<String> HEADER = ImmutableList.of(FORMAT, PARENT, COMMENT); | |||||
| private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace(); | |||||
| private static final RowMapper ROW_MAPPER = | |||||
| rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected alt-format header: %s", h)); | |||||
| /** Loads the alternate formats from a given file path. */ | |||||
| public static ImmutableList<AltFormatSpec> loadAltFormats(Path path) { | |||||
| if (!Files.exists(path)) { | |||||
| return ImmutableList.of(); | |||||
| } | |||||
| try (Reader csv = Files.newBufferedReader(path)) { | |||||
| return importAltFormats(csv); | |||||
| } catch (IOException e) { | |||||
| throw new RuntimeException(e); | |||||
| } | |||||
| } | |||||
| @VisibleForTesting | |||||
| static ImmutableList<AltFormatSpec> importAltFormats(Reader csv) throws IOException { | |||||
| List<AltFormatSpec> altFormats = new ArrayList<>(); | |||||
| Consumer<Stream<String>> rowCallback = getRowCallback(altFormats); | |||||
| try (BufferedReader r = new BufferedReader(csv)) { | |||||
| CSV_PARSER.parse(r.lines(), | |||||
| row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText))); | |||||
| } | |||||
| return ImmutableList.copyOf(altFormats); | |||||
| } | |||||
| public static ImmutableList<AltFormatSpec> importAltFormats(Supplier<List<String>> rows) { | |||||
| List<AltFormatSpec> altFormats = new ArrayList<>(); | |||||
| Consumer<Stream<String>> rowCallback = getRowCallback(altFormats); | |||||
| // Expect header row always. | |||||
| rowCallback.accept(rows.get().stream()); | |||||
| List<String> row; | |||||
| while ((row = rows.get()) != null) { | |||||
| rowCallback.accept(row.stream()); | |||||
| } | |||||
| return ImmutableList.copyOf(altFormats); | |||||
| } | |||||
| private static Consumer<Stream<String>> getRowCallback(List<AltFormatSpec> altFormats) { | |||||
| return ROW_MAPPER.mapTo( | |||||
| row -> altFormats.add(parseAltFormat(row.get(FORMAT), row.get(PARENT), row.get(COMMENT)))); | |||||
| } | |||||
| public static AltFormatSpec parseAltFormat( | |||||
| String altId, String parent, @Nullable String comment) { | |||||
| // "1X [2-8]XXX** XXX" --> "XX XXXX** XXX" | |||||
| FormatTemplate template = FormatTemplate.parse(altId.replaceAll("[0-9]|\\[[-0-9]+\\]", "X")); | |||||
| // "1X [2-8]XXX** XXX" --> "1X [2-8]" --> "1X[2-8]" --> "1x[2-8]" | |||||
| // The prefix here can (and often will be) the empty string. | |||||
| // This fails if '*' is ever left in the specification, but that really should not happen. | |||||
| RangeSpecification prefix = RangeSpecification.parse( | |||||
| Ascii.toLowerCase(whitespace().removeFrom(altId.replaceAll("[X* ]*$", "")))); | |||||
| return AltFormatSpec.create(template, prefix, parent, Optional.ofNullable(comment)); | |||||
| } | |||||
| /** Exports alternate formats to a collector (potentially escaping fields for CSV). */ | |||||
| public static void export( | |||||
| List<AltFormatSpec> altFormats, Consumer<Stream<String>> collector, boolean toCsv) { | |||||
| collector.accept(HEADER.stream()); | |||||
| Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity(); | |||||
| altFormats.forEach( | |||||
| f -> collector.accept( | |||||
| Stream.of(f.specifier(), f.parentFormatId(), f.comment().map(escapeFn).orElse("")))); | |||||
| } | |||||
| /** Helper method to write alternate formats in same CSV format as CsvTable. */ | |||||
| public static boolean exportCsv(Writer csv, List<AltFormatSpec> altFormats) { | |||||
| if (altFormats.isEmpty()) { | |||||
| return false; | |||||
| } | |||||
| CsvTableCollector collector = new CsvTableCollector(true); | |||||
| export(altFormats, collector, true); | |||||
| collector.writeCsv(csv); | |||||
| return true; | |||||
| } | |||||
| private AltFormatsSchema() {} | |||||
| } | |||||
| @ -0,0 +1,132 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||||
| import static java.util.Comparator.comparing; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.common.annotations.VisibleForTesting; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTableCollector; | |||||
| import java.io.BufferedReader; | |||||
| import java.io.IOException; | |||||
| import java.io.Reader; | |||||
| import java.io.Writer; | |||||
| import java.nio.file.Files; | |||||
| import java.nio.file.Path; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.function.Consumer; | |||||
| import java.util.function.Function; | |||||
| import java.util.function.Supplier; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * The data schema for handling XML comments. Note that, unlike other "table" schemas, this does | |||||
| * not represent comments in the form of a CsvTable. This is because comment anchors can appear | |||||
| * multiple times in the CSV file (so there's no unique key). This is not an issue since the | |||||
| * internal data representation handles this, but it just means that code cannot be reused as much. | |||||
| */ | |||||
| public class CommentsSchema { | |||||
| private static final String REGION = "Region"; | |||||
| private static final String LABEL = "Label"; | |||||
| private static final String COMMENT = "Comment"; | |||||
| public static final ImmutableList<String> HEADER = ImmutableList.of(REGION, LABEL, COMMENT); | |||||
| private static final Comparator<Comment> ORDERING = comparing(Comment::getAnchor); | |||||
| private static final CsvParser CSV_PARSER = CsvParser.withSeparator(';').trimWhitespace(); | |||||
| private static final RowMapper ROW_MAPPER = | |||||
| rowMapper(h -> checkArgument(h.equals(HEADER), "unexpected comment header: %s", h)); | |||||
| /** Loads the comments from a given file path. */ | |||||
| public static ImmutableList<Comment> loadComments(Path path) { | |||||
| if (!Files.exists(path)) { | |||||
| return ImmutableList.of(); | |||||
| } | |||||
| try (Reader csv = Files.newBufferedReader(path)) { | |||||
| return importComments(csv); | |||||
| } catch (IOException e) { | |||||
| throw new RuntimeException(e); | |||||
| } | |||||
| } | |||||
| @VisibleForTesting | |||||
| static ImmutableList<Comment> importComments(Reader csv) throws IOException { | |||||
| List<Comment> comments = new ArrayList<>(); | |||||
| Consumer<Stream<String>> rowCallback = getRowCallback(comments); | |||||
| try (BufferedReader r = new BufferedReader(csv)) { | |||||
| CSV_PARSER.parse(r.lines(), | |||||
| row -> rowCallback.accept(row.map(CsvTable::unescapeSingleLineCsvText))); | |||||
| } | |||||
| return ImmutableList.sortedCopyOf(ORDERING, comments); | |||||
| } | |||||
| public static ImmutableList<Comment> importComments(Supplier<List<String>> rows) { | |||||
| List<Comment> comments = new ArrayList<>(); | |||||
| Consumer<Stream<String>> rowCallback = getRowCallback(comments); | |||||
| // Expect header row always. | |||||
| rowCallback.accept(rows.get().stream()); | |||||
| List<String> row; | |||||
| while ((row = rows.get()) != null) { | |||||
| rowCallback.accept(row.stream()); | |||||
| } | |||||
| return ImmutableList.sortedCopyOf(ORDERING, comments); | |||||
| } | |||||
| private static Consumer<Stream<String>> getRowCallback(List<Comment> comments) { | |||||
| return ROW_MAPPER.mapTo(row -> { | |||||
| if (row.containsKey(COMMENT)) { | |||||
| comments.add( | |||||
| Comment.fromText( | |||||
| Anchor.of(PhoneRegion.of(row.get(REGION)), row.get(LABEL)), | |||||
| row.get(COMMENT))); | |||||
| } | |||||
| }); | |||||
| } | |||||
| /** Exports alternate formats to a collector (potentially escaping fields for CSV). */ | |||||
| public static void export( | |||||
| List<Comment> comments, Consumer<Stream<String>> collector, boolean toCsv) { | |||||
| collector.accept(HEADER.stream()); | |||||
| Function<String, String> escapeFn = toCsv ? CsvTable::escapeForSingleLineCsv : identity(); | |||||
| comments.stream() | |||||
| .sorted(ORDERING) | |||||
| .forEach(c -> collector.accept(Stream.of( | |||||
| c.getAnchor().region().toString(), c.getAnchor().label(), escapeFn.apply(c.toText())))); | |||||
| } | |||||
| /** Helper method to write comments in same CSV format as CsvTable. */ | |||||
| public static boolean exportCsv(Writer csv, List<Comment> comments) { | |||||
| if (comments.isEmpty()) { | |||||
| return false; | |||||
| } | |||||
| CsvTableCollector collector = new CsvTableCollector(true); | |||||
| export(comments, collector, true); | |||||
| collector.writeCsv(csv); | |||||
| return true; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,236 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.auto.value.extension.memoized.Memoized; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedMap; | |||||
| import com.google.common.collect.ImmutableTable; | |||||
| import com.google.common.collect.Iterables; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey; | |||||
| import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeKey; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode; | |||||
| import com.google.i18n.phonenumbers.metadata.table.DiffKey; | |||||
| import com.google.i18n.phonenumbers.metadata.table.DiffKey.Status; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import java.io.IOException; | |||||
| import java.util.HashMap; | |||||
| import java.util.Map; | |||||
| import java.util.Optional; | |||||
| /** | |||||
| * All CSV based tables and legacy XML for a single calling code. This is the data from which all | |||||
| * legacy data can be reconstructed (metadata XML, carrier/geocode/timezone mappings). | |||||
| * | |||||
| * <p>This is loaded at once, possibly from multiple files, since conversion to legacy formats | |||||
| * often requires more than one of these data structures. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class CsvData { | |||||
| /** CSV data loading API. */ | |||||
| public interface CsvDataProvider { | |||||
| /** Loads the top-level metadata table which containing data for all supported calling codes.*/ | |||||
| CsvTable<DigitSequence> loadMetadata() throws IOException; | |||||
| /** Loads the CSV data for a single calling code. */ | |||||
| CsvData loadData(DigitSequence cc) throws IOException; | |||||
| } | |||||
| /** | |||||
| * Creates a single CsvData instance, either directly or from a provider. The given metadata | |||||
| * table will have the single row relating to the specified calling code removed. | |||||
| */ | |||||
| public static CsvData create( | |||||
| DigitSequence cc, | |||||
| CsvTable<DigitSequence> allMetadata, | |||||
| CsvTable<RangeKey> ranges, | |||||
| CsvTable<ShortcodeKey> shortcodes, | |||||
| CsvTable<ExampleNumberKey> examples, | |||||
| CsvTable<String> formats, | |||||
| ImmutableList<AltFormatSpec> altFormats, | |||||
| CsvTable<String> operators, | |||||
| ImmutableList<Comment> comments) { | |||||
| // Row keys are unique, so we end up with at most 1 row in the filtered table. | |||||
| CsvTable<DigitSequence> ccMetadata = | |||||
| allMetadata.toBuilder().filterRows(r -> r.equals(cc)).build(); | |||||
| checkMetadata(!ccMetadata.getKeys().isEmpty(), "no such calling code %s in metadata", cc); | |||||
| checkRegions(ccMetadata, ranges, shortcodes); | |||||
| checkNoOverlappingRows(ranges); | |||||
| checkNoOverlappingShortcodeRows(shortcodes); | |||||
| return new AutoValue_CsvData( | |||||
| cc, ccMetadata, ranges, shortcodes, examples, formats, altFormats, operators, comments); | |||||
| } | |||||
| private static void checkNoOverlappingRows(CsvTable<RangeKey> csv) { | |||||
| RangeTree allRanges = RangeTree.empty(); | |||||
| for (RangeKey key : csv.getKeys()) { | |||||
| RangeTree ranges = key.asRangeTree(); | |||||
| checkMetadata(allRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key); | |||||
| allRanges = allRanges.union(ranges); | |||||
| } | |||||
| } | |||||
| private static void checkNoOverlappingShortcodeRows(CsvTable<ShortcodeKey> csv) { | |||||
| Map<PhoneRegion, RangeTree> allRangesMap = new HashMap<>(); | |||||
| for (ShortcodeKey key : csv.getKeys()) { | |||||
| RangeTree allRegionRanges = allRangesMap.getOrDefault(key.getRegion(), RangeTree.empty()); | |||||
| RangeTree ranges = key.getRangeKey().asRangeTree(); | |||||
| checkMetadata(allRegionRanges.intersect(ranges).isEmpty(), "overlapping row in CSV: %s", key); | |||||
| allRangesMap.put(key.getRegion(), allRegionRanges.union(ranges)); | |||||
| } | |||||
| } | |||||
| private static void checkRegions( | |||||
| CsvTable<DigitSequence> metadata, | |||||
| CsvTable<RangeKey> ranges, | |||||
| CsvTable<ShortcodeKey> shortcodes) { | |||||
| DigitSequence cc = Iterables.getOnlyElement(metadata.getKeys()); | |||||
| PhoneRegion mainRegion = metadata.getOrDefault(cc, MetadataTableSchema.MAIN_REGION); | |||||
| Regions extraRegions = metadata.getOrDefault(cc, MetadataTableSchema.EXTRA_REGIONS); | |||||
| ImmutableSet<PhoneRegion> csvRegions = ranges | |||||
| .getValues(RangesTableSchema.CSV_REGIONS).stream() | |||||
| .flatMap(r -> r.getValues().stream()) | |||||
| .collect(toImmutableSet()); | |||||
| if (extraRegions.getValues().isEmpty()) { | |||||
| checkMetadata(csvRegions.size() == 1 && csvRegions.contains(mainRegion), | |||||
| "inconsistent regions:\nmetadata: %s\nranges table: %s", mainRegion, csvRegions); | |||||
| } else { | |||||
| checkMetadata(!extraRegions.getValues().contains(mainRegion), | |||||
| "invalid metadata: main region is duplicated in 'extra regions' column"); | |||||
| checkMetadata( | |||||
| csvRegions.contains(mainRegion) | |||||
| && csvRegions.containsAll(extraRegions.getValues()) | |||||
| && csvRegions.size() == extraRegions.getValues().size() + 1, | |||||
| "inconsistent regions:\nmetadata: %s + %s\nranges table: %s", | |||||
| mainRegion, extraRegions, csvRegions); | |||||
| } | |||||
| ImmutableSet<PhoneRegion> shortcodeRegions = | |||||
| shortcodes.getKeys().stream().map(ShortcodeKey::getRegion).collect(toImmutableSet()); | |||||
| checkMetadata(csvRegions.containsAll(shortcodeRegions), | |||||
| "unexpected regions for shortcodes:\nmetadata: %s\nshortcode regions: %s", | |||||
| csvRegions, shortcodeRegions); | |||||
| } | |||||
| /** The difference between two CSV snapshots captured as a set of CVS tables. */ | |||||
| @AutoValue | |||||
| public abstract static class Diff { | |||||
| private static <K> Optional<CsvTable<DiffKey<K>>> diff(CsvTable<K> lhs, CsvTable<K> rhs) { | |||||
| CsvTable<DiffKey<K>> diff = CsvTable.diff(lhs, rhs, DiffMode.CHANGES); | |||||
| if (diff.getKeys().stream().anyMatch(k -> k.getStatus() != Status.UNCHANGED)) { | |||||
| return Optional.of(diff); | |||||
| } | |||||
| return Optional.empty(); | |||||
| } | |||||
| // Visible for AutoValue | |||||
| Diff() {} | |||||
| /** Returns the contextualized diff of the ranges table. */ | |||||
| public abstract Optional<CsvTable<DiffKey<RangeKey>>> rangesDiff(); | |||||
| /** Returns the contextualized diff of the shortcodes table. */ | |||||
| public abstract Optional<CsvTable<DiffKey<ShortcodeKey>>> shortcodesDiff(); | |||||
| /** Returns the contextualized diff of the examples table. */ | |||||
| public abstract Optional<CsvTable<DiffKey<ExampleNumberKey>>> examplesDiff(); | |||||
| /** Returns the contextualized diff of the formats table. */ | |||||
| public abstract Optional<CsvTable<DiffKey<String>>> formatsDiff(); | |||||
| /** Returns the contextualized diff of the operators table. */ | |||||
| public abstract Optional<CsvTable<DiffKey<String>>> operatorsDiff(); | |||||
| } | |||||
| /** Creates the diff between two CSV data snapshots. */ | |||||
| public static Diff diff(CsvData before, CsvData after) { | |||||
| // TODO: Add diffing for comments and/or alternate formats. | |||||
| return new AutoValue_CsvData_Diff( | |||||
| Diff.diff(before.getRanges(), after.getRanges()), | |||||
| Diff.diff(before.getShortcodes(), after.getShortcodes()), | |||||
| Diff.diff(before.getExamples(), after.getExamples()), | |||||
| Diff.diff(before.getFormats(), after.getFormats()), | |||||
| Diff.diff(before.getOperators(), after.getOperators())); | |||||
| } | |||||
| // Visible for AutoValue | |||||
| CsvData() {} | |||||
| /** Returns the calling code for this CSV data. */ | |||||
| public abstract DigitSequence getCallingCode(); | |||||
| /** | |||||
| * Returns the single row of the metadata table for the calling code (see | |||||
| * {@code MetadataTableSchema}). | |||||
| */ | |||||
| public abstract CsvTable<DigitSequence> getMetadata(); | |||||
| /** Returns the ranges table for the calling code (see {@code RangesTableSchema}) */ | |||||
| public abstract CsvTable<RangeKey> getRanges(); | |||||
| /** Returns the shortcode table for the calling code (see {@code ShortcodesTableSchema}) */ | |||||
| public abstract CsvTable<ShortcodeKey> getShortcodes(); | |||||
| /** Returns the examples table for the calling code (see {@code ExamplesTableSchema}). */ | |||||
| public abstract CsvTable<ExampleNumberKey> getExamples(); | |||||
| /** Returns the format table for the calling code (see {@code FormatsTableSchema}). */ | |||||
| public abstract CsvTable<String> getFormats(); | |||||
| /** | |||||
| * Returns the alternate format table for the calling code (see {@code AltFormatsTableSchema}). | |||||
| */ | |||||
| public abstract ImmutableList<AltFormatSpec> getAltFormats(); | |||||
| /** Returns the operator table for the calling code (see {@code OperatorsTableSchema}). */ | |||||
| public abstract CsvTable<String> getOperators(); | |||||
| /** Returns the set of comments for the calling code. */ | |||||
| public abstract ImmutableList<Comment> getComments(); | |||||
| @Memoized | |||||
| public RangeTable getRangesAsTable() { | |||||
| return RangesTableSchema.toRangeTable(getRanges()); | |||||
| } | |||||
| @Memoized | |||||
| public ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodesAsTables() { | |||||
| return ShortcodesTableSchema.toShortcodeTables(getShortcodes()); | |||||
| } | |||||
| @Memoized | |||||
| public ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExamplesAsTable() { | |||||
| return ExamplesTableSchema.toExampleTable(getExamples()); | |||||
| } | |||||
| /** Canonicalizes range tables in the CSV data. This is potentially slow for large regions. */ | |||||
| // TODO: Is there any way to reliably detect canonical CSV for sub-regions? | |||||
| public final CsvData canonicalizeRangeTables() { | |||||
| CsvTable<RangeKey> ranges = RangesTableSchema.toCsv(getRangesAsTable()); | |||||
| CsvTable<ShortcodeKey> shortcodes = ShortcodesTableSchema.toCsv(getShortcodesAsTables()); | |||||
| return create( | |||||
| getCallingCode(), | |||||
| getMetadata(), | |||||
| ranges, | |||||
| shortcodes, | |||||
| getExamples(), | |||||
| getFormats(), | |||||
| getAltFormats(), | |||||
| getOperators(), | |||||
| getComments() | |||||
| ); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,126 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey.ORDERING; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableTable; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.common.collect.Table.Cell; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * The schema of the "Example Numbers" table with rows keyed by {@link ExampleNumberKey} and | |||||
| * columns: | |||||
| * <ol> | |||||
| * <li>{@link #NUMBER}: The national number | |||||
| * <li>{@link #COMMENT}: Evidence for why an example number was chosen. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||||
| * <ol> | |||||
| * <li>{@code Region}: The region code of the example number. | |||||
| * <li>{@code Type}: The {@link ValidNumberType} of the example number. | |||||
| * </ol> | |||||
| */ | |||||
| public final class ExamplesTableSchema { | |||||
| /** A key for rows in the example numbers table. */ | |||||
| @AutoValue | |||||
| public abstract static class ExampleNumberKey { | |||||
| public static final Comparator<ExampleNumberKey> ORDERING = | |||||
| Comparator.comparing(ExampleNumberKey::getRegion).thenComparing(ExampleNumberKey::getType); | |||||
| public static ExampleNumberKey of(PhoneRegion region, ValidNumberType type) { | |||||
| return new AutoValue_ExamplesTableSchema_ExampleNumberKey(region, type); | |||||
| } | |||||
| public abstract PhoneRegion getRegion(); | |||||
| public abstract ValidNumberType getType(); | |||||
| } | |||||
| /** A number column containing the digit sequence of a national number. */ | |||||
| public static final Column<DigitSequence> NUMBER = Column.create( | |||||
| DigitSequence.class, "Number", DigitSequence.empty(), DigitSequence::of); | |||||
| /** A general comment field, usually describing how an example number was determined. */ | |||||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||||
| private static final CsvKeyMarshaller<ExampleNumberKey> MARSHALLER = new CsvKeyMarshaller<>( | |||||
| ExamplesTableSchema::write, | |||||
| ExamplesTableSchema::read, | |||||
| Optional.of(ORDERING), | |||||
| "Region", | |||||
| "Type"); | |||||
| private static final Schema COLUMNS = Schema.builder() | |||||
| .add(NUMBER) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** Schema instance defining the example numbers CSV table. */ | |||||
| public static final CsvSchema<ExampleNumberKey> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||||
| /** | |||||
| * Converts a {@link Table} of example numbers into a {@link CsvTable}, using | |||||
| * {@link ExampleNumberKey}s as row keys. | |||||
| */ | |||||
| public static CsvTable<ExampleNumberKey> toCsv( | |||||
| Table<PhoneRegion, ValidNumberType, DigitSequence> table) { | |||||
| ImmutableTable.Builder<ExampleNumberKey, Column<?>, Object> out = ImmutableTable.builder(); | |||||
| out.orderRowsBy(ORDERING).orderColumnsBy(COLUMNS.ordering()); | |||||
| for (Cell<PhoneRegion, ValidNumberType, DigitSequence> c : table.cellSet()) { | |||||
| out.put(ExampleNumberKey.of(c.getRowKey(), c.getColumnKey()), NUMBER, c.getValue()); | |||||
| } | |||||
| return CsvTable.from(SCHEMA, out.build()); | |||||
| } | |||||
| /** | |||||
| * Converts a {@link Table} of example numbers into a {@link CsvTable}, using | |||||
| * {@link ExampleNumberKey}s as row keys. | |||||
| */ | |||||
| public static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> | |||||
| toExampleTable(CsvTable<ExampleNumberKey> csv) { | |||||
| ImmutableTable.Builder<PhoneRegion, ValidNumberType, DigitSequence> out = | |||||
| ImmutableTable.builder(); | |||||
| for (ExampleNumberKey k : csv.getKeys()) { | |||||
| out.put(k.getRegion(), k.getType(), csv.getOrDefault(k, NUMBER)); | |||||
| } | |||||
| return out.build(); | |||||
| } | |||||
| private static Stream<String> write(ExampleNumberKey key) { | |||||
| return Stream.of(key.getRegion().toString(), key.getType().toString()); | |||||
| } | |||||
| private static ExampleNumberKey read(List<String> parts) { | |||||
| return ExampleNumberKey.of( | |||||
| PhoneRegion.of(parts.get(0)), ValidNumberType.valueOf(parts.get(1))); | |||||
| } | |||||
| private ExamplesTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,68 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.model.CsvData.CsvDataProvider; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import java.io.IOException; | |||||
| import java.nio.file.Path; | |||||
| /** | |||||
| * A CSV provider which reads files rooted in a given directory. The file layout should match that | |||||
| * in the CSV metadata directory ({@code googledata/third_party/i18n/phonenumbers/metadata}). | |||||
| */ | |||||
| public final class FileBasedCsvLoader implements CsvDataProvider { | |||||
| /** Returns a CSV loader which reads files from the given base directory. */ | |||||
| public static FileBasedCsvLoader using(Path dir) throws IOException { | |||||
| return new FileBasedCsvLoader(dir); | |||||
| } | |||||
| private final Path root; | |||||
| private final CsvTable<DigitSequence> metadata; | |||||
| private FileBasedCsvLoader(Path root) throws IOException { | |||||
| this.root = checkNotNull(root); | |||||
| this.metadata = MetadataTableSchema.SCHEMA.load(root.resolve("metadata.csv")); | |||||
| } | |||||
| @Override | |||||
| public CsvTable<DigitSequence> loadMetadata() { | |||||
| return metadata; | |||||
| } | |||||
| @Override | |||||
| public CsvData loadData(DigitSequence cc) throws IOException { | |||||
| Path ccDir = root.resolve(cc.toString()); | |||||
| return CsvData.create( | |||||
| cc, | |||||
| metadata, | |||||
| RangesTableSchema.SCHEMA.load(csvFile(ccDir, "ranges")), | |||||
| ShortcodesTableSchema.SCHEMA.load(csvFile(ccDir, "shortcodes")), | |||||
| ExamplesTableSchema.SCHEMA.load(csvFile(ccDir, "examples")), | |||||
| FormatsTableSchema.SCHEMA.load(csvFile(ccDir, "formats")), | |||||
| AltFormatsSchema.loadAltFormats(csvFile(ccDir, "altformats")), | |||||
| OperatorsTableSchema.SCHEMA.load(csvFile(ccDir, "operators")), | |||||
| CommentsSchema.loadComments(csvFile(ccDir, "comments")) | |||||
| ); | |||||
| } | |||||
| private static Path csvFile(Path dir, String name) { | |||||
| return dir.resolve(name + ".csv"); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,637 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.base.Strings; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.function.ToIntFunction; | |||||
| import java.util.stream.Collectors; | |||||
| import java.util.stream.IntStream; | |||||
| /** | |||||
| * A specifier for the three types of format available in a formatting rule, "national", | |||||
| * "international" and "carrier specific". Each format is represented by a single string which acts | |||||
| * as a format template, and from which the necessary XML regular expressions can be recovered. | |||||
| * | |||||
| * <p>The basic syntax of a specifier is something like {@code "XX XXX-XXXX"}, where '{@code X}' | |||||
| * represents a digit from the phone number being formatted. When converted into the legacy XML | |||||
| * syntax, a national specifier with this format would represent the "pattern" attribute | |||||
| * {@code "(\d{2})(\d{3})(\d{4})"} and the "format" element {@code "$1 $2-$3"}. | |||||
| * | |||||
| * <p>By adding the '{@code *}' character, one group of variable length may be defined. Thus | |||||
| * {@code "XX XXX-XX**"} represents the pattern {@code "(\d{2})(\d{3})(\d{2,4})"}. | |||||
| * | |||||
| * <p>If the national prefix should be present, for either national or carrier specific formatting, | |||||
| * it is represented by the '{@code #}' symbol. Similarly, for carrier specific formatting, the | |||||
| * '{@code @}' symbol represents the carrier code placeholder (and must be present exactly once in | |||||
| * any carrier specific format specifier). | |||||
| * | |||||
| * <p>By analyzing the unique prefixes of both national and carrier specific specifiers, the XML | |||||
| * syntax can be derived. In a fairly simple example, the format specifiers: | |||||
| * <ul> | |||||
| * <li>national: {@code "(#XX) XXX-XXXX"} | |||||
| * <li>carrier: {@code "#@ XX XXX-XXXX"} | |||||
| * <li>international: {@code "XX XXX XXXX"} | |||||
| * </ul> | |||||
| * would result in: | |||||
| * <ul> | |||||
| * <li>pattern: {@code "(\d{2})(\d{3})(\d{4})"} | |||||
| * <li>national_prefix_formatting_rule: {@code "($NP$FG)"} | |||||
| * <li>carrier_specific_formatting_rule: {@code "$NP$CC $FG"} | |||||
| * <li>format: {@code "$1 $2-$3"} | |||||
| * <li>international_format: {@code "$1 $2 $3"} | |||||
| * </ul> | |||||
| * The derived "pattern" groups must be the same between all specifiers, while the "national" and | |||||
| * "carrier" specifiers must share a common suffix after the "first group". This is a limitation of | |||||
| * the XML representation which must be preserved here. | |||||
| * | |||||
| * <p>If no carrier specific format specifier is present, the extraction of a format rule will | |||||
| * still occur (since the formatting rule also affects "as you type" formatting"). Thus: | |||||
| * <ul> | |||||
| * <li>national: {@code "(XX) XXX"} | |||||
| * </ul> | |||||
| * will result in: | |||||
| * <ul> | |||||
| * <li>format: {@code "$1 $2"} | |||||
| * <li>national_prefix_formatting_rule: {@code "($FG)"} | |||||
| * </ul> | |||||
| * and not: | |||||
| * <ul> | |||||
| * <li>format: {@code "($1) $2"} | |||||
| * </ul> | |||||
| * | |||||
| * <p>An international format specifier must exist if international formatting is possible (even if | |||||
| * it is identical to the national format specifier). If no international specifier exists, then | |||||
| * the range of phone numbers associated with this format must be a subset of the "no international | |||||
| * dialling" range, and the derived XML element "intlFormat" will contain the value "NA". | |||||
| * | |||||
| * <p>If literal characters such as "*" are required to be present in the format string, they can | |||||
| * be escaped via a '{@code \}' (backslash) character. The set of characters that might need | |||||
| * escaping is '{@code X}', '{@code *}', '{@code #}' and '{@code @}'. Note that the dollar symbol | |||||
| * '{@code $}' is special, and is prohibited from ever appearing in a format specifier (even though | |||||
| * it's not strictly part of the syntax). | |||||
| * | |||||
| * <p>A {@code FormatSpec} also defines the ranges of numbers for which this format applies. This | |||||
| * is a {@link RangeTree}, rather than a {@code PrefixTree}, since length matters (different | |||||
| * formats are sometimes distinguished purely on the basis of number length). The possible lengths | |||||
| * of the range tree must match the possible lengths of all defined specifier strings. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class FormatSpec { | |||||
| /** | |||||
| * Returns a format specifier from the serialized fields. Note that the given non-local | |||||
| * specifiers must share certain properties (e.g. same number of format groups, same min/max | |||||
| * length, same trailing group format). Some of this is necessary due to limitations in how | |||||
| * formats are represented in the legacy XML schema (e.g. between national and carrier specific | |||||
| * formats). Exceptions are raised when any of these properties are violated. | |||||
| * | |||||
| * @param nationalSpec the national format specifier string (can contain \-escaped characters). | |||||
| * @param carrierSpec the optional carrier format specifier string. | |||||
| * @param intlSpec the optional international format specifier string. | |||||
| * @param localSpec additional local format specifier string. | |||||
| * @param nationalPrefixOptional allows the national prefix omitted during parsing even if | |||||
| * present in the format, or given during parsing when not present in the format. | |||||
| * @param comment a free-from comment for this specifier. | |||||
| */ | |||||
| public static FormatSpec of( | |||||
| String nationalSpec, | |||||
| Optional<String> carrierSpec, | |||||
| Optional<String> intlSpec, | |||||
| Optional<String> localSpec, | |||||
| boolean nationalPrefixOptional, | |||||
| Optional<Comment> comment) { | |||||
| FormatTemplate national = FormatTemplate.parse(nationalSpec); | |||||
| checkArgument(!national.hasCarrierCode(), | |||||
| "national format specifier must not contain carrier code: %s", nationalSpec); | |||||
| Optional<FormatTemplate> carrier = carrierSpec.map(s -> parseCarrierSpec(s, national)); | |||||
| Optional<FormatTemplate> intl = intlSpec.map(s -> parseIntlSpec(s, national)); | |||||
| Optional<FormatTemplate> local = localSpec.map(s -> parseLocalSpec(s, national)); | |||||
| int minLength = national.minLength(); | |||||
| int maxLength = national.maxLength(); | |||||
| return new AutoValue_FormatSpec( | |||||
| national, carrier, intl, local, minLength, maxLength, nationalPrefixOptional, comment); | |||||
| } | |||||
| /** | |||||
| * Returns a local format specifier for the given template. Local specifiers only have a national | |||||
| * template and national prefix prohibited. | |||||
| */ | |||||
| public static FormatSpec localFormat(FormatTemplate local) { | |||||
| checkArgument(!local.hasNationalPrefix(), | |||||
| "a local template must not have national prefix: %s", local); | |||||
| return new AutoValue_FormatSpec( | |||||
| local, | |||||
| Optional.empty(), | |||||
| Optional.empty(), | |||||
| Optional.empty(), | |||||
| local.minLength(), | |||||
| local.maxLength(), | |||||
| false, | |||||
| Optional.empty()); | |||||
| } | |||||
| /** Returns the national format template (e.g. "#XX XXX XXXX"). */ | |||||
| public abstract FormatTemplate national(); | |||||
| /** Returns the carrier specific format template (e.g. "(@ #XX) XXX XXXX"). */ | |||||
| public abstract Optional<FormatTemplate> carrier(); | |||||
| /** Returns the international format template (e.g. "XX-XXX-XXXX"). */ | |||||
| public abstract Optional<FormatTemplate> international(); | |||||
| /** | |||||
| * Returns the local format template (e.g. "XXX-XXXX"). Local formats must correspond to the | |||||
| * "Area Code Length" values in at least some of the ranges to which they are assigned. | |||||
| */ | |||||
| public abstract Optional<FormatTemplate> local(); | |||||
| /** Returns the minimum number of digits which this format matches. */ | |||||
| public abstract int minLength(); | |||||
| /** Returns the maximum number of digits which this format matches. */ | |||||
| public abstract int maxLength(); | |||||
| /** | |||||
| * Returns whether, for formats without a national prefix specified, it is still possible to | |||||
| * trigger this format by adding a national prefix (even though its is not shown). Formats for | |||||
| * which this method returns {@code true} are grouped alongside formats with an explicit national | |||||
| * prefix (since they must be ordered carefully with respect to each other to account for their | |||||
| * "leading digits"). | |||||
| */ | |||||
| public abstract boolean nationalPrefixOptional(); | |||||
| /** Returns the free-form comment associated with this format specifier. */ | |||||
| public abstract Optional<Comment> comment(); | |||||
| /** | |||||
| * Returns the length based bounds for this format (e.g. all digit sequences between the minimum | |||||
| * and maximum lengths). | |||||
| */ | |||||
| public RangeTree getLengthBasedBounds() { | |||||
| return RangeTree.from(IntStream.rangeClosed(minLength(), maxLength()) | |||||
| .mapToObj(RangeSpecification::any)); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| StringBuilder out = new StringBuilder("FormatSpec{national=").append(national()); | |||||
| carrier().ifPresent(t -> out.append(", carrier=").append(t)); | |||||
| local().ifPresent(t -> out.append(", local=").append(t)); | |||||
| international().ifPresent(t -> out.append(", international=").append(t)); | |||||
| out.append(", minLength=").append(minLength()); | |||||
| out.append(", maxLength=").append(maxLength()); | |||||
| comment().ifPresent(c -> out.append(", comment='").append(c).append('\'')); | |||||
| return out.append('}').toString(); | |||||
| } | |||||
| // ---- RULE PARSING AND CONVERSION METHODS ---- | |||||
| private static FormatTemplate parseCarrierSpec(String spec, FormatTemplate national) { | |||||
| FormatTemplate carrier = FormatTemplate.parse(spec); | |||||
| checkArgument(carrier.hasCarrierCode(), | |||||
| "carrier format specifier must contain carrier code: %s", spec); | |||||
| // This verifies the groups have the same lengths, but does not check for same formatting. | |||||
| checkArgument(carrier.isCompatibleWith(national), | |||||
| "carrier format specifier must have compatible groups: %s - %s", | |||||
| national.getSpecifier(), spec); | |||||
| // This is really ugly, since carrier formats must share the same format in the legacy XML, but | |||||
| // can have different formatting rules for the first group. The best way to test this is just | |||||
| // compare the XML output directly instead of trying to reason about groups, since group replace | |||||
| // also needs to be taken into account. | |||||
| checkArgument(carrier.getXmlFormat().equals(national.getXmlFormat()), | |||||
| "carrier format specifier must have equal trailing groups: %s - %s", | |||||
| national.getSpecifier(), spec); | |||||
| // Artificial check (currently true everywhere and likely to never be broken). If this is ever | |||||
| // relaxed, the nationalPrefixForParsing regeneration code will need changing to take account | |||||
| // of ordering (e.g. generate "(<CC>)?<NP>" instead of "<NP>(<CC>)?"). | |||||
| checkArgument(!carrier.hasNationalPrefix() || spec.indexOf('#') < spec.indexOf('@'), | |||||
| "national prefix must precede carrier code in carrier format: %s", spec); | |||||
| return carrier; | |||||
| } | |||||
| private static FormatTemplate parseIntlSpec(String spec, FormatTemplate national) { | |||||
| FormatTemplate intl = FormatTemplate.parse(spec); | |||||
| // In theory this could be relaxed, but then when the spec is written it cannot just call | |||||
| // getFormat(). For now, it's always true the international formats don't have "fancy" | |||||
| // formatting around the first group (i.e. never "(XXX) XXX XXX") which makes sense since | |||||
| // international formats cannot be assumed to be read by people with local knowledge. | |||||
| // TODO: To reactivate this check after we are sure that first digit of | |||||
| // SN of MX is no more 1 and need not to be swallowed when formatting i.e after parsing change. | |||||
| // Context: We have disabled the following check to fix a MX formatting issue i.e using this | |||||
| // logic {X>} to remove the mobile token(1) in international format, which is the first digit of | |||||
| // the mobile subscriber number. More details in b/111967450. In general, international | |||||
| // format should not have such special formatting. Can be fixed as part of b/138727490. | |||||
| // checkArgument(!intl.getXmlPrefix().isPresent(), | |||||
| // "international format specifier must not have separate prefix: %s", spec); | |||||
| checkArgument( | |||||
| !intl.hasNationalPrefix(), | |||||
| "international format specifier must not contain national prefix: %s", | |||||
| spec); | |||||
| checkArgument(!intl.hasCarrierCode(), | |||||
| "international format specifier must not contain carrier code: %s", spec); | |||||
| checkArgument(intl.isCompatibleWith(national), | |||||
| "international format specifier must have compatible groups: %s - %s", | |||||
| national.getSpecifier(), spec); | |||||
| return intl; | |||||
| } | |||||
| private static FormatTemplate parseLocalSpec(String spec, FormatTemplate national) { | |||||
| FormatTemplate local = FormatTemplate.parse(spec); | |||||
| checkArgument(!local.getXmlPrefix().isPresent(), | |||||
| "local format specifier must not have separate prefix: %s", spec); | |||||
| checkArgument(!local.hasNationalPrefix(), | |||||
| "local format specifier must not contain national prefix: %s", spec); | |||||
| checkArgument(!local.hasCarrierCode(), | |||||
| "local format specifier must not contain carrier code: %s", spec); | |||||
| checkArgument(local.minLength() < national.minLength(), | |||||
| "local format specifier must be shorter than the national format: %s - %s", | |||||
| national.getSpecifier(), spec); | |||||
| return local; | |||||
| } | |||||
| // ---- TEMPLATE CLASSES ---- | |||||
| /** | |||||
| * A single template corresponding to a format specifier such as {@code "(# XXX) XXX-XXXX"}. | |||||
| * A template represents one of the types of format (national, international, carrier specific) | |||||
| * and enforces as much structural correctness as possible. | |||||
| * | |||||
| * <p>Templates bridge between the specifier syntax and the XML syntax, with its split prefixes | |||||
| * and confusing semantics. As such, there's a lot of slightly subtle business logic in the | |||||
| * parsing of templates that, over time, might need to adapt to real world changes (e.g. suffix | |||||
| * separators and precise expectations of format structure). | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract static class FormatTemplate { | |||||
| // This could be extended, but must never overlap with the escape characters used in the | |||||
| // "skeleton" string. It must also always be limited to the Basic Multilingual Plane (BMP). | |||||
| // It's really important that '$' is never a meta-character in this syntax, since we escape | |||||
| // strings like "$FG" which would otherwise be broken. | |||||
| private static final CharMatcher VALID_TEMPLATE_CHARS = | |||||
| CharMatcher.ascii().and(CharMatcher.javaIsoControl().negate()).and(CharMatcher.isNot('$')); | |||||
| private static final CharMatcher VALID_METACHARS = CharMatcher.anyOf("#@X*{>}\\"); | |||||
| // Need to include '$' as a separator, since groups can abut. | |||||
| private static final CharMatcher SUFFIX_SEPARATOR = CharMatcher.anyOf(". /-$"); | |||||
| private static final char NATIONAL_PREFIX = '#'; | |||||
| private static final char CARRIER_CODE = '@'; | |||||
| private static final char REQUIRED_DIGIT = 'X'; | |||||
| private static final char OPTIONAL_DIGIT = '*'; | |||||
| private static final char SUBSTITUTION_START = '{'; | |||||
| private static final char SUBSTITUTION_MAP = '>'; | |||||
| private static final char SUBSTITUTION_END = '}'; | |||||
| private static final String ESCAPED_NATIONAL_PREFIX = "$NP"; | |||||
| private static final String ESCAPED_CARRIER_CODE = "$CC"; | |||||
| static FormatTemplate parse(String spec) { | |||||
| checkArgument(VALID_TEMPLATE_CHARS.matchesAllOf(spec), | |||||
| "illegal characters in template: %s", spec); | |||||
| List<FormatGroup> groups = new ArrayList<>(); | |||||
| StringBuilder skeleton = new StringBuilder(); | |||||
| boolean hasNationalPrefix = false; | |||||
| boolean hasCarrierCode = false; | |||||
| boolean hasVariableLengthGroup = false; | |||||
| // Used to avoid abutting groups (i.e. "XXX**XX"). | |||||
| boolean canStartGroup = true; | |||||
| for (int n = 0; n < spec.length(); n++) { | |||||
| char c = spec.charAt(n); | |||||
| if (c == REQUIRED_DIGIT) { | |||||
| checkArgument(canStartGroup, "illegal group start: %s", spec); | |||||
| FormatGroup group = extractGroup(spec, n); | |||||
| checkArgument(!(hasVariableLengthGroup && group.isVariableLength()), | |||||
| "multiple variable length groups not allowed: %s", spec); | |||||
| hasVariableLengthGroup = group.isVariableLength(); | |||||
| groups.add(group); | |||||
| skeleton.append(escapeGroupNumber(groups.size())); | |||||
| // Move to the last character of the group (since we increment again as we loop). | |||||
| n += group.maxLength() - 1; | |||||
| canStartGroup = false; | |||||
| continue; | |||||
| } | |||||
| if (c == SUBSTITUTION_START) { | |||||
| // Expect {GROUP>REPLACEMENT} where group can have optional digits (but normally won't). | |||||
| checkArgument(canStartGroup, "illegal group start: %s", spec); | |||||
| checkArgument(spec.charAt(n + 1) == REQUIRED_DIGIT, | |||||
| "illegal group replacement start: %s", spec); | |||||
| FormatGroup group = extractGroup(spec, n + 1); | |||||
| checkArgument(!(hasVariableLengthGroup && group.isVariableLength()), | |||||
| "multiple variable length groups not allowed: %s", spec); | |||||
| hasVariableLengthGroup = group.isVariableLength(); | |||||
| // Now expect mapping character and substitution string. | |||||
| n += group.maxLength() + 1; | |||||
| checkArgument(spec.charAt(n) == SUBSTITUTION_MAP, | |||||
| "illegal group replacement (expected %s): '%s'", SUBSTITUTION_MAP, spec); | |||||
| int end = spec.indexOf(SUBSTITUTION_END, n + 1); | |||||
| checkArgument(end != -1, "missing group replacement end: %s", spec); | |||||
| groups.add(group.withReplacement(spec.substring(n + 1, end))); | |||||
| skeleton.append(escapeGroupNumber(groups.size())); | |||||
| // Unlike the "normal" case above, you can start another group immediately after this | |||||
| // (since the {,} make it unambiguous). | |||||
| n = end; | |||||
| continue; | |||||
| } | |||||
| canStartGroup = true; | |||||
| if (c == NATIONAL_PREFIX) { | |||||
| checkArgument(!hasNationalPrefix, "multiple national prefixes not allowed: %s", spec); | |||||
| hasNationalPrefix = true; | |||||
| skeleton.append(ESCAPED_NATIONAL_PREFIX); | |||||
| continue; | |||||
| } | |||||
| if (c == CARRIER_CODE) { | |||||
| checkArgument(!hasCarrierCode, "multiple carrier codes not allowed: %s", spec); | |||||
| hasCarrierCode = true; | |||||
| skeleton.append(ESCAPED_CARRIER_CODE); | |||||
| continue; | |||||
| } | |||||
| if (c == '\\') { | |||||
| // Blows up if trailing '\', but that's fine. | |||||
| c = spec.charAt(++n); | |||||
| checkArgument(VALID_METACHARS.matches(c), "invalid escaped character '%s': %s", c, spec); | |||||
| } else { | |||||
| checkArgument(c != OPTIONAL_DIGIT, "unexpected optional marker: %s", spec); | |||||
| } | |||||
| skeleton.append(c); | |||||
| } | |||||
| checkArgument(!groups.isEmpty(), "format specifiers must have at least one group: %s", spec); | |||||
| // Find the first group which has a replacement (one must exist). This is important for | |||||
| // determining where the prefix and suffix should be split when considering hoisting the | |||||
| // prefix into a format rule (see getSuffixStart() / getXmlPrefix() / getXmlFormat()). | |||||
| int fgIndex = 0; | |||||
| while (fgIndex < groups.size() && groups.get(fgIndex).replacement().isPresent()) { | |||||
| fgIndex++; | |||||
| } | |||||
| checkArgument(fgIndex < groups.size(), "cannot replace all groups in a template: %s", spec); | |||||
| return new AutoValue_FormatSpec_FormatTemplate( | |||||
| spec, | |||||
| hasNationalPrefix, | |||||
| hasCarrierCode, | |||||
| ImmutableList.copyOf(groups), | |||||
| fgIndex, | |||||
| skeleton.toString()); | |||||
| } | |||||
| /** | |||||
| * Returns the specifier string (e.g. "# XXX-XXXX") which is the serialized form of the | |||||
| * template. | |||||
| */ | |||||
| public abstract String getSpecifier(); | |||||
| /** Whether this template formats a national prefix. */ | |||||
| public abstract boolean hasNationalPrefix(); | |||||
| /** Whether this template formats a carrier selection code prefix. */ | |||||
| public abstract boolean hasCarrierCode(); | |||||
| /** Returns the information about the groups in this template. */ | |||||
| public abstract ImmutableList<FormatGroup> getGroups(); | |||||
| /** | |||||
| * Returns the index of the first group which does not have a replacement (at least one must). | |||||
| */ | |||||
| public abstract int getFirstAvailableGroupIndex(); | |||||
| // This is an internal representation of the format string used by the XML. It differs in that | |||||
| // it isn't split into prefix and suffix (as required in some situations for the XML). As such | |||||
| // it only contains "$NP", "$CC", "$<N>", but never "$FG". All valid specifier skeletons must | |||||
| // contain "$1"..."$<N>" rather than any replacement strings. | |||||
| abstract String skeleton(); | |||||
| /** Returns the minumin number of digits which can be matched by this template. */ | |||||
| public int minLength() { | |||||
| return getLength(this, FormatGroup::minLength); | |||||
| } | |||||
| /** Returns the maximum number of digits which can be matched by this template. */ | |||||
| public int maxLength() { | |||||
| return getLength(this, FormatGroup::maxLength); | |||||
| } | |||||
| /** | |||||
| * Returns the maximum number of digits which can be formatted as a single block by this | |||||
| * template. If no more than this number of digits are entered, they will be formatted as a | |||||
| * single block by this template. | |||||
| * | |||||
| * <p>This is useful when calculating the leading digits of a format since it might be | |||||
| * acceptable to match shortcodes to some formats if they would still format the shortcode | |||||
| * within the first block. This simplifies the leading digits in some cases. | |||||
| */ | |||||
| public int getBlockFormatLength() { | |||||
| // If only one group everything is a block, otherwise take the minimum length of the first | |||||
| // group. | |||||
| return (getGroups().size() == 1) ? maxLength() : getGroups().get(0).minLength(); | |||||
| } | |||||
| /** Returns a regex to capture the groups for this template (e.g. "(\d{3})(\d{4,5})") */ | |||||
| public String getXmlCapturingPattern() { | |||||
| return getGroups().stream() | |||||
| .map(FormatGroup::toRegex) | |||||
| .collect(Collectors.joining(")(", "(", ")")); | |||||
| } | |||||
| /** | |||||
| * Returns the format string for use in the XML (e.g. "$1 $2-$3"). | |||||
| * | |||||
| * <p>For example given the following templates: | |||||
| * <ul> | |||||
| * <li>{@code "XXX XXX-XXX"} ==> {@code "$1 $2-$3"} | |||||
| * <li>{@code "(#XXX) XXX-XXX"} ==> {@code "$1 $2-$3"} (the prefix is hoisted) | |||||
| * <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$2-$3"} ($1 was replaced and hoisted) | |||||
| * <li>{@code "{X>}XXX-XXX"} ==> {@code "$2-$3"} ($1 was removed) | |||||
| * </ul> | |||||
| */ | |||||
| public String getXmlFormat() { | |||||
| int fgIndex = getFirstAvailableGroupIndex(); | |||||
| // Always replace the prefix with $N (which is what $FG maps to). This might be a no-op. | |||||
| String format = "$" + (fgIndex + 1) + skeleton().substring(getSuffixStart()); | |||||
| // Finally do any group replacement from the skeleton after the "first available group". | |||||
| // | |||||
| // Note that this code isn't exercised in data at the moment (2018) but is here to avoid | |||||
| // needing to place artificial limitations on where group replacement can occur. | |||||
| for (int n = fgIndex + 1; n < getGroups().size(); n++) { | |||||
| Optional<String> replacement = getGroups().get(n).replacement(); | |||||
| if (replacement.isPresent()) { | |||||
| format = format.replace("$" + (n + 1), replacement.get()); | |||||
| } | |||||
| } | |||||
| return format; | |||||
| } | |||||
| /** | |||||
| * Returns the format prefix for use in the XML formatting rules (e.g. "($NP $FG)"). If the | |||||
| * calculated prefix is just "$FG" then nothing is returned (since that's a no-op value). | |||||
| * | |||||
| * <p>For example given the following templates: | |||||
| * <ul> | |||||
| * <li>{@code "XXX XXX-XXX"} ==> XML prefix is empty | |||||
| * <li>{@code "(#XXX) XXX-XXX"} ==> {@code "($NP$FG)"} | |||||
| * <li>{@code "#{XXX>123} XXX-XXX"} ==> {@code "$NP123 $FG"} | |||||
| * <li>{@code "{X>}XXX-XXX"} ==> XML prefix is empty (but the format will not contain $1) | |||||
| * </ul> | |||||
| */ | |||||
| public Optional<String> getXmlPrefix() { | |||||
| String prefix = skeleton().substring(0, getSuffixStart()); | |||||
| // We know that "$<fgIndex + 1>" (substitutions are 1-indexed) is in the prefix and | |||||
| // should be replaced with "$FG", and everything before that has a replacement. | |||||
| int fgIndex = getFirstAvailableGroupIndex(); | |||||
| for (int n = 0; n < fgIndex; n++) { | |||||
| // Everything before the "first available group" must have a replacement (by definition). | |||||
| prefix = prefix.replace("$" + (n + 1), getGroups().get(n).replacement().get()); | |||||
| } | |||||
| prefix = prefix.replace("$" + (fgIndex + 1), "$FG"); | |||||
| checkState(prefix.contains("$FG"), | |||||
| "XML prefix must always contain '$FG' (this must be a code error): %s", prefix); | |||||
| // After all this work we could still end up with a no-op substitution! | |||||
| return prefix.equals("$FG") ? Optional.empty() : Optional.of(prefix); | |||||
| } | |||||
| /** | |||||
| * Returns whether all groups have the same "structure" (i.e. min/max length). They can | |||||
| * differ in terms of having replacements however. | |||||
| */ | |||||
| boolean isCompatibleWith(FormatTemplate other) { | |||||
| if (getGroups().size() != other.getGroups().size()) { | |||||
| return false; | |||||
| } | |||||
| for (int n = 0; n < getGroups().size(); n++) { | |||||
| if (!getGroups().get(n).isCompatibleWith(other.getGroups().get(n))) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| private int getSuffixStart() { | |||||
| // This is only safe because "\$1" cannot be present ('$' cannot be escaped). | |||||
| int suffixStart = SUFFIX_SEPARATOR.indexIn(skeleton(), skeleton().indexOf("$1") + 1); | |||||
| // If no suffix start found, the entire skeleton is the prefix. | |||||
| if (suffixStart == -1) { | |||||
| suffixStart = skeleton().length(); | |||||
| } | |||||
| // Now account for the fact that the first group (and others) could have replacements, which | |||||
| // pushes the suffix start to just after the "first available group" (which is what becomes | |||||
| // $FG). If the first available group is "$1" then we just get suffixStart. | |||||
| int fgNumber = getFirstAvailableGroupIndex() + 1; | |||||
| checkState(fgNumber < 10, "invalid first group number: %s", fgNumber); | |||||
| return Math.max(suffixStart, skeleton().indexOf("$" + fgNumber) + 2); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| return getSpecifier(); | |||||
| } | |||||
| private static int getLength(FormatTemplate template, ToIntFunction<FormatGroup> lengthFn) { | |||||
| return template.getGroups().stream().mapToInt(lengthFn).sum(); | |||||
| } | |||||
| private static FormatGroup extractGroup(String template, int start) { | |||||
| // We know that 'start' references a group start (i.e. 'X') so length must be at least 1. | |||||
| int endRequired = findEndOf(REQUIRED_DIGIT, template, start); | |||||
| int endGroup = findEndOf(OPTIONAL_DIGIT, template, endRequired); | |||||
| return FormatGroup.of(endRequired - start, endGroup - start); | |||||
| } | |||||
| private static int findEndOf(char c, String template, int start) { | |||||
| int endRequired = CharMatcher.isNot(c).indexIn(template, start); | |||||
| return endRequired != -1 ? endRequired : template.length(); | |||||
| } | |||||
| private static String escapeGroupNumber(int n) { | |||||
| checkArgument(n >= 1 && n <= 9, "bad group number: %s", n); | |||||
| return "$" + n; | |||||
| } | |||||
| } | |||||
| /** Represents contiguous digit groups in a format (e.g. "XXX" or "XXX***"). */ | |||||
| @AutoValue | |||||
| public abstract static class FormatGroup { | |||||
| private static FormatGroup of(int min, int max) { | |||||
| checkArgument(max >= min, "bad group lengths: %s, %s", min, max); | |||||
| return new AutoValue_FormatSpec_FormatGroup(min, max, Optional.empty()); | |||||
| } | |||||
| private FormatGroup withReplacement(String s) { | |||||
| return new AutoValue_FormatSpec_FormatGroup(minLength(), maxLength(), Optional.of(s)); | |||||
| } | |||||
| /** Returns the minimum number of digits in this group. */ | |||||
| public abstract int minLength(); | |||||
| /** Returns the maximum number of digits in this group. */ | |||||
| public abstract int maxLength(); | |||||
| /** Returns the optional, arbitrary (possibly empty) replacement string for this group. */ | |||||
| abstract Optional<String> replacement(); | |||||
| /** | |||||
| * Returns if this group can match a variable number of digits. Only one group in any format | |||||
| * specifier can have variable length. | |||||
| */ | |||||
| private boolean isVariableLength() { | |||||
| return maxLength() > minLength(); | |||||
| } | |||||
| /** | |||||
| * Returns whether two groups have the same "structure" (i.e. min/max lengths), but does not | |||||
| * compare replacement values. Used only for internal checks. | |||||
| */ | |||||
| private boolean isCompatibleWith(FormatGroup other) { | |||||
| return minLength() == other.minLength() && maxLength() == other.maxLength(); | |||||
| } | |||||
| private String toRegex() { | |||||
| if (maxLength() > minLength()) { | |||||
| return String.format("\\d{%d,%d}", minLength(), maxLength()); | |||||
| } else if (minLength() > 1) { | |||||
| return String.format("\\d{%d}", minLength()); | |||||
| } else { | |||||
| return "\\d"; | |||||
| } | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| String group = | |||||
| Strings.repeat("X", minLength()) + Strings.repeat("*", maxLength() - minLength()); | |||||
| return replacement().map(r -> String.format("{%s>%s}", group, r)).orElse(group); | |||||
| } | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,96 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.Optional; | |||||
| /** | |||||
| * The schema of the "Formats" table with rows keyed by ID, and columns: | |||||
| * <ol> | |||||
| * <li>{@link #NATIONAL}: Required national format (may contain '#' for national prefix). | |||||
| * <li>{@link #CARRIER}: Optional carrier format (may contain '#' and '@' for carrier | |||||
| * specifier). Must be compatible with the national format (same suffix). | |||||
| * <li>{@link #INTERNATIONAL}: International format (must not contain '#' or '@'). | |||||
| * <li>{@link #LOCAL}: Local format (must not contain '#' or '@', and must correspond to assigned | |||||
| * area code lengths if present). | |||||
| * <li>{@link #COMMENT}: Freeform comment text. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||||
| * <ol> | |||||
| * <li>{@code Id}: The format ID. | |||||
| * </ol> | |||||
| */ | |||||
| public final class FormatsTableSchema { | |||||
| public static final Column<String> NATIONAL = Column.ofString("National"); | |||||
| public static final Column<String> CARRIER = Column.ofString("Carrier"); | |||||
| public static final Column<String> INTERNATIONAL = Column.ofString("International"); | |||||
| public static final Column<String> LOCAL = Column.ofString("Local"); | |||||
| public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL = | |||||
| Column.ofBoolean("National Prefix Optional"); | |||||
| /** An arbitrary optional text comment. */ | |||||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||||
| private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id"); | |||||
| private static final Schema COLUMNS = | |||||
| Schema.builder() | |||||
| .add(NATIONAL) | |||||
| .add(CARRIER) | |||||
| .add(INTERNATIONAL) | |||||
| .add(LOCAL) | |||||
| .add(NATIONAL_PREFIX_OPTIONAL) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** Schema instance defining the operators CSV table. */ | |||||
| public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||||
| /** Converts a CSV table into a map of format specifiers. */ | |||||
| public static ImmutableMap<String, FormatSpec> toFormatSpecs(CsvTable<String> formats) { | |||||
| ImmutableMap.Builder<String, FormatSpec> specs = ImmutableMap.builder(); | |||||
| for (String id : formats.getKeys()) { | |||||
| specs.put( | |||||
| id, | |||||
| FormatSpec.of( | |||||
| formats.getOrDefault(id, NATIONAL), | |||||
| toOptional(formats.getOrDefault(id, CARRIER)), | |||||
| toOptional(formats.getOrDefault(id, INTERNATIONAL)), | |||||
| toOptional(formats.getOrDefault(id, LOCAL)), | |||||
| formats.getOrDefault(id, NATIONAL_PREFIX_OPTIONAL), | |||||
| toComment(formats.getOrDefault(id, COMMENT)))); | |||||
| } | |||||
| return specs.build(); | |||||
| } | |||||
| private static Optional<String> toOptional(String s) { | |||||
| return s.isEmpty() ? Optional.empty() : Optional.of(s); | |||||
| } | |||||
| private static Optional<Comment> toComment(String s) { | |||||
| return s.isEmpty() ? Optional.empty() : Optional.of(Comment.fromText(s)); | |||||
| } | |||||
| private FormatsTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,36 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import com.google.errorprone.annotations.FormatMethod; | |||||
| /** | |||||
| * Represents an error related to CSV metadata, either structural issues in the CSV or semantic | |||||
| * errors in the XML representation. MetadataExceptions should only correspond to problems fixable | |||||
| * by editing the CSV data. | |||||
| */ | |||||
| public final class MetadataException extends RuntimeException { | |||||
| @FormatMethod | |||||
| public static void checkMetadata(boolean cond, String msg, Object... args) { | |||||
| if (!cond) { | |||||
| throw new MetadataException(String.format(msg, args)); | |||||
| } | |||||
| } | |||||
| public MetadataException(String message) { | |||||
| super(message); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,168 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static java.util.Comparator.naturalOrder; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.Timezones; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.MultiValue; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.Arrays; | |||||
| import java.util.Comparator; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * The schema of the "Metadata" table with rows keyed by {@link DigitSequence} and columns: | |||||
| * | |||||
| * <ol> | |||||
| * <li>{@link #MAIN_REGION}: The primary region associated with a calling code. | |||||
| * <li>{@link #EXTRA_REGIONS}: A list of additional regions shared by the calling code. | |||||
| * <li>{@link #NATIONAL_PREFIX}: The (optional) prefix used when dialling national numbers. | |||||
| * <li>{@link #IDD_PREFIX}: The default international dialling (IDD) prefix. | |||||
| * <li>{@link #TIMEZONE}: The default timezone name(s) for a calling code. Multiple timezones | |||||
| * can be specific if separated by {@code '&'}. | |||||
| * <li>{@link #MOBILE_PORTABLE_REGIONS}: A list of regions in which mobile numbers are portable | |||||
| * between operators. | |||||
| * <li>{@link #NATIONAL_PREFIX_OPTIONAL}: True if the national prefix is optional throughout the | |||||
| * numbering plan (e.g. a prefix is defined, but does not have to be present when numbers are | |||||
| * used). | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||||
| * <ol> | |||||
| * <li>{@code Calling Code}: The country calling code. | |||||
| * </ol> | |||||
| */ | |||||
| public final class MetadataTableSchema { | |||||
| /** Values in the "REGIONS" column are a sorted list of region codes. */ | |||||
| public static final class Regions extends MultiValue<PhoneRegion, Regions> { | |||||
| private static final Regions EMPTY = new Regions(ImmutableSet.of()); | |||||
| public static Column<Regions> column(String name) { | |||||
| return Column.create(Regions.class, name, EMPTY, Regions::new); | |||||
| } | |||||
| public static Regions of(PhoneRegion... regions) { | |||||
| return new Regions(Arrays.asList(regions)); | |||||
| } | |||||
| public static Regions of(Iterable<PhoneRegion> regions) { | |||||
| return new Regions(regions); | |||||
| } | |||||
| private Regions(Iterable<PhoneRegion> regions) { | |||||
| super(regions, ',', naturalOrder(), true); | |||||
| } | |||||
| private Regions(String s) { | |||||
| super(s, PhoneRegion::of, ',', naturalOrder(), true); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Values in the "NATIONAL_PREFIX" column are an (unsorted) list of prefixes, with the preferred | |||||
| * prefix first. | |||||
| */ | |||||
| public static final class DigitSequences extends MultiValue<DigitSequence, DigitSequences> { | |||||
| private static final DigitSequences EMPTY = new DigitSequences(ImmutableSet.of()); | |||||
| public static Column<DigitSequences> column(String name) { | |||||
| return Column.create(DigitSequences.class, name, EMPTY, DigitSequences::new); | |||||
| } | |||||
| public static DigitSequences of(DigitSequence... numbers) { | |||||
| return new DigitSequences(Arrays.asList(numbers)); | |||||
| } | |||||
| private DigitSequences(Iterable<DigitSequence> numbers) { | |||||
| super(numbers, ',', naturalOrder(), false); | |||||
| } | |||||
| private DigitSequences(String s) { | |||||
| super(s, DigitSequence::of, ',', naturalOrder(), false); | |||||
| } | |||||
| } | |||||
| /** The primary region associated with a calling code (e.g. "US" for NANPA). */ | |||||
| public static final Column<PhoneRegion> MAIN_REGION = | |||||
| Column.create(PhoneRegion.class, "Main Region", PhoneRegion.getUnknown(), PhoneRegion::of); | |||||
| /** A comma separated list of expected regions for the calling code. */ | |||||
| public static final Column<Regions> EXTRA_REGIONS = Regions.column("Extra Regions"); | |||||
| /** | |||||
| * A list of prefixes used when dialling national numbers (e.g. "0" for "US"). If more than one | |||||
| * prefix is given, the first prefix is assumed to be "preferred" and the others are considered | |||||
| * alternatives. Having multiple prefixes is useful if a country switches between prefixes and | |||||
| * a period of "parallel running" is needed. | |||||
| */ | |||||
| public static final Column<DigitSequences> NATIONAL_PREFIX = | |||||
| DigitSequences.column("National Prefix"); | |||||
| /** | |||||
| * The default international dialling (IDD) prefix. This is a string, rather than a digit | |||||
| * sequence, because it can optionally contain a single '~' character to indicate a pause while | |||||
| * dialling (e.g. "8~10" in Russia). This is stripped everywhere except when used to populate | |||||
| * the "preferredInternationalPrefix" attribute in the libphonenumber XML file. | |||||
| */ | |||||
| public static final Column<String> IDD_PREFIX = Column.ofString("IDD Prefix"); | |||||
| /** | |||||
| * The default value for the "Timezone" column in the ranges table (in many regions, this is a | |||||
| * single constant value). | |||||
| */ | |||||
| public static final Column<Timezones> TIMEZONE = RangesTableSchema.TIMEZONE; | |||||
| /** A comma separated list of regions in which mobile numbers are portable between carriers. */ | |||||
| public static final Column<Regions> MOBILE_PORTABLE_REGIONS = | |||||
| Regions.column("Mobile Portable Regions"); | |||||
| /** Describes whether the "national prefix" is optional when parsing a national number. */ | |||||
| public static final Column<Boolean> NATIONAL_PREFIX_OPTIONAL = | |||||
| Column.ofBoolean("National Prefix Optional"); | |||||
| /** The preferred prefix for specifying extensions to numbers (e.g. "ext" for "1234 ext 56"). */ | |||||
| public static final Column<String> EXTENSION_PREFIX = Column.ofString("Extension Prefix"); | |||||
| private static final CsvKeyMarshaller<DigitSequence> MARSHALLER = new CsvKeyMarshaller<>( | |||||
| k -> Stream.of(k.toString()), | |||||
| p -> DigitSequence.of(p.get(0)), | |||||
| Optional.of(Comparator.comparing(Object::toString)), | |||||
| "Calling Code"); | |||||
| private static final Schema COLUMNS = Schema.builder() | |||||
| .add(MAIN_REGION) | |||||
| .add(EXTRA_REGIONS) | |||||
| .add(NATIONAL_PREFIX) | |||||
| .add(IDD_PREFIX) | |||||
| .add(TIMEZONE) | |||||
| .add(MOBILE_PORTABLE_REGIONS) | |||||
| .add(NATIONAL_PREFIX_OPTIONAL) | |||||
| .add(EXTENSION_PREFIX) | |||||
| .build(); | |||||
| /** Schema instance defining the metadata CSV table. */ | |||||
| public static final CsvSchema<DigitSequence> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||||
| private MetadataTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,750 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.AREA_CODE_LENGTH; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.FORMAT; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.NATIONAL_ONLY; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.PER_REGION_COLUMNS; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema.REGIONS; | |||||
| import static java.lang.Boolean.TRUE; | |||||
| import static java.util.Comparator.comparing; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.Joiner; | |||||
| import com.google.common.base.Splitter; | |||||
| import com.google.common.collect.ContiguousSet; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedMap; | |||||
| import com.google.common.collect.ImmutableSortedSet; | |||||
| import com.google.common.collect.ImmutableTable; | |||||
| import com.google.common.collect.Ordering; | |||||
| import com.google.common.collect.Sets; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.Map; | |||||
| import java.util.Optional; | |||||
| import java.util.Set; | |||||
| import java.util.TreeSet; | |||||
| /** | |||||
| * An abstraction of all the phone number metadata known about for a single calling code. | |||||
| * <p> | |||||
| * Note that there is no builder for NumberingScheme. The expectation is that CSV tables and other | |||||
| * primary sources will be used to build numbering schemes at a single point in the business logic. | |||||
| * Handling incremental modification of a builder, or partially built schemes just isn't something | |||||
| * that's expected to be needed (though there is {@code TestNumberingScheme} for use in unit tests. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class NumberingScheme { | |||||
| // Bitmask for [1-9] (bits 1..9 set, bit 0 clear). | |||||
| private static final int NOT_ZERO_MASK = 0x3FE; | |||||
| /** Top level information about a numbering scheme. */ | |||||
| @AutoValue | |||||
| public abstract static class Attributes { | |||||
| /** Returns a new attributes instance for the given data. */ | |||||
| public static Attributes create( | |||||
| DigitSequence cc, | |||||
| PhoneRegion mainRegion, | |||||
| Set<PhoneRegion> extraRegions, | |||||
| ImmutableSet<DigitSequence> nationalPrefix, | |||||
| RangeTree carrierPrefixes, | |||||
| String defaultIddPrefix, | |||||
| RangeTree allIddRanges, | |||||
| String extensionPrefix, | |||||
| Set<PhoneRegion> mobilePortableRegions) { | |||||
| // In theory there could be IDD prefix for a non-geographic region (and this check could be | |||||
| // removed) but it's not something we've ever seen and don't have any expectation of. | |||||
| checkMetadata(!mainRegion.equals(PhoneRegion.getWorld()) || allIddRanges.isEmpty(), | |||||
| "[%s] IDD prefixes must not be present for non-geographic regions", cc); | |||||
| checkMetadata(mainRegion.equals(PhoneRegion.getWorld()) || !allIddRanges.isEmpty(), | |||||
| "[%s] IDD prefixes must be present for all geographic regions", cc); | |||||
| checkMetadata(nationalPrefix.stream().noneMatch(allIddRanges::contains), | |||||
| "[%s] National prefix %s and IDD prefixes (%s) must be disjoint", | |||||
| cc, nationalPrefix, allIddRanges); | |||||
| checkMetadata(nationalPrefix.stream().noneMatch(carrierPrefixes::contains), | |||||
| "[%s] National prefix %s and carrier prefixes (%s) must be disjoint", | |||||
| cc, nationalPrefix, carrierPrefixes); | |||||
| // Allow exactly one '~' to separate the prefix digits to indicate a pause during dialling | |||||
| // (this check could be relaxed in future, but it's currently true for all data). | |||||
| checkMetadata(defaultIddPrefix.isEmpty() || defaultIddPrefix.matches("[0-9]+(?:~[0-9]+)?"), | |||||
| "[%s] Invalid IDD prefix: %s", cc, defaultIddPrefix); | |||||
| DigitSequence iddPrefix = DigitSequence.of(defaultIddPrefix.replace("~", "")); | |||||
| checkMetadata(iddPrefix.isEmpty() || allIddRanges.contains(iddPrefix), | |||||
| "[%s] IDD ranges must contain the default prefix: %s", cc, iddPrefix); | |||||
| checkMetadata(!extraRegions.contains(mainRegion), | |||||
| "[%s] duplicated main region '%s' in extra regions: %s", | |||||
| cc, mainRegion, extraRegions); | |||||
| // Main region comes first in iteration order, remaining regions are ordered naturally. | |||||
| ImmutableSet.Builder<PhoneRegion> set = ImmutableSet.builder(); | |||||
| set.add(mainRegion); | |||||
| extraRegions.stream().sorted().forEach(set::add); | |||||
| ImmutableSet<PhoneRegion> allRegions = set.build(); | |||||
| checkMetadata(allRegions.containsAll(mobilePortableRegions), | |||||
| "invalid mobile portable regions: %s", mobilePortableRegions); | |||||
| return new AutoValue_NumberingScheme_Attributes( | |||||
| cc, | |||||
| allRegions, | |||||
| nationalPrefix, | |||||
| carrierPrefixes, | |||||
| defaultIddPrefix, | |||||
| allIddRanges, | |||||
| !extensionPrefix.isEmpty() ? Optional.of(extensionPrefix) : Optional.empty(), | |||||
| ImmutableSortedSet.copyOf(Ordering.natural(), mobilePortableRegions)); | |||||
| } | |||||
| /** Returns the unique calling code of this numbering scheme. */ | |||||
| public abstract DigitSequence getCallingCode(); | |||||
| /** | |||||
| * Returns the regions represented by this numbering scheme. The main region is always present | |||||
| * and listed first, and remaining regions are listed in "natural" order. | |||||
| */ | |||||
| public abstract ImmutableSet<PhoneRegion> getRegions(); | |||||
| /** | |||||
| * Returns the "main" region for this numbering scheme. The notion of a main region for a | |||||
| * country calling code is slightly archaic and mostly comes from the way in which the legacy | |||||
| * XML data is structured. However there are a few places in the public API where the "main" | |||||
| * region is returned in situations of ambiguity, so it can be useful to know it. | |||||
| */ | |||||
| public final PhoneRegion getMainRegion() { | |||||
| return getRegions().asList().get(0); | |||||
| } | |||||
| /** | |||||
| * Returns all possible national prefixes which can be used when dialling national numbers. In | |||||
| * most cases this set just contains the preferred prefix, but alternate values may be present | |||||
| * when a region switches between prefixes or for other reasons. Any "non preferred" prefixes | |||||
| * are recognized only during parsing, and otherwise ignored. | |||||
| * | |||||
| * <p>If there is a preferred prefix, it is listed first, otherwise the set is empty. | |||||
| */ | |||||
| public abstract ImmutableSet<DigitSequence> getNationalPrefixes(); | |||||
| /** | |||||
| * Returns the (possibly empty) prefix used when dialling national numbers (e.g. "0" for "US"). | |||||
| * Not all regions require a prefix for national dialling. | |||||
| */ | |||||
| public DigitSequence getPreferredNationalPrefix() { | |||||
| ImmutableSet<DigitSequence> prefixes = getNationalPrefixes(); | |||||
| return prefixes.isEmpty() ? DigitSequence.empty() : prefixes.iterator().next(); | |||||
| } | |||||
| /** | |||||
| * Returns all carrier prefixes for national dialling. This range must not contain the national | |||||
| * prefix. | |||||
| */ | |||||
| public abstract RangeTree getCarrierPrefixes(); | |||||
| /** | |||||
| * Returns the (possible empty) default international dialling (IDD) prefix, possibly | |||||
| * containing a '~' to indicate a pause during dialling (e.g. "8~10" for Russia). | |||||
| */ | |||||
| public abstract String getDefaultIddPrefix(); | |||||
| /** | |||||
| * Returns all IDD prefixes which may be used for international dialling. If the default prefix | |||||
| * is not empty it must be contained in this range. | |||||
| */ | |||||
| public abstract RangeTree getIddPrefixes(); | |||||
| /** Returns the preferred label to use for indicating extensions for numbers. */ | |||||
| public abstract Optional<String> getExtensionPrefix(); | |||||
| /** Returns the regions in which mobile numbers are portable between carriers. */ | |||||
| public abstract ImmutableSet<PhoneRegion> getMobilePortableRegions(); | |||||
| } | |||||
| /** | |||||
| * Creates a numbering scheme from a range table and example numbers. No rules are applied to the | |||||
| * data in the tables, and they are assumed to be complete. | |||||
| */ | |||||
| public static NumberingScheme from( | |||||
| Attributes attributes, | |||||
| RangeTable xmlTable, | |||||
| Map<PhoneRegion, RangeTable> shortcodeMap, | |||||
| Map<String, FormatSpec> formats, | |||||
| ImmutableList<AltFormatSpec> altFormats, | |||||
| Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers, | |||||
| List<Comment> comments) { | |||||
| checkPossibleRegions(attributes.getRegions(), xmlTable); | |||||
| checkNationalOnly(attributes, xmlTable); | |||||
| checkUnambiguousIdd(attributes, xmlTable, formats); | |||||
| ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes = | |||||
| checkShortCodeConsistency(shortcodeMap, xmlTable); | |||||
| return new AutoValue_NumberingScheme( | |||||
| attributes, | |||||
| xmlTable, | |||||
| shortcodes, | |||||
| checkFormatConsistency(attributes, formats, xmlTable, shortcodes), | |||||
| checkAltFormatConsistency(altFormats, formats, xmlTable), | |||||
| checkExampleNumbers(attributes.getRegions(), xmlTable, exampleNumbers), | |||||
| addSyntheticComments(comments, attributes)); | |||||
| } | |||||
| // Adds the first comments for main and auxiliary regions, giving the English name and detailing | |||||
| // auxiliary region information if necessary. | |||||
| private static ImmutableList<Comment> addSyntheticComments( | |||||
| List<Comment> comments, Attributes attributes) { | |||||
| PhoneRegion mainRegion = attributes.getMainRegion(); | |||||
| if (!mainRegion.equals(PhoneRegion.getWorld())) { | |||||
| List<Comment> modified = new ArrayList<>(getRegionNameComments(mainRegion)); | |||||
| List<PhoneRegion> auxRegions = | |||||
| attributes.getRegions().asList().subList(1, attributes.getRegions().size()); | |||||
| if (!auxRegions.isEmpty()) { | |||||
| String comment = String.format("Main region for '%s'", Joiner.on(',').join(auxRegions)); | |||||
| modified.add(Comment.create(Comment.anchor(mainRegion), ImmutableList.of(comment))); | |||||
| for (PhoneRegion r : auxRegions) { | |||||
| modified.addAll(getRegionNameComments(r)); | |||||
| String auxComment = | |||||
| String.format("Calling code and formatting shared with '%s'", mainRegion); | |||||
| modified.add(Comment.create(Comment.anchor(r), ImmutableList.of(auxComment))); | |||||
| } | |||||
| } | |||||
| // Do this last, since order matters (because anchors are not unique) and we want the | |||||
| // synthetic comments to come first. | |||||
| modified.addAll(comments); | |||||
| comments = modified; | |||||
| } | |||||
| return ImmutableList.copyOf(comments); | |||||
| } | |||||
| private static List<Comment> getRegionNameComments(PhoneRegion region) { | |||||
| ImmutableList<String> enName = ImmutableList.of(region.getEnglishNameForXmlComments()); | |||||
| return ImmutableList.of( | |||||
| Comment.create(Comment.anchor(region), enName), | |||||
| Comment.create(Comment.shortcodeAnchor(region), enName)); | |||||
| } | |||||
| private static void checkPossibleRegions(Set<PhoneRegion> regions, RangeTable xmlTable) { | |||||
| ImmutableSet<PhoneRegion> actual = REGIONS.extractGroupColumns(xmlTable.getColumns()).keySet(); | |||||
| // Allow no region column in the table if there's only one region (since it's implicit). | |||||
| checkState((actual.isEmpty() && regions.size() == 1) || actual.equals(regions), | |||||
| "regions added to range table do not match the expected numbering scheme regions\n" | |||||
| + "expected: %s\n" | |||||
| + "actual: %s\n", | |||||
| regions, actual); | |||||
| } | |||||
| // An assumption has generally been that if a range is "national only" then it either: | |||||
| // a) belongs to only one region (the one it's national only for) | |||||
| // b) belongs to at least the main region (since in some schemes ranges mostly just overlap all | |||||
| // possible regions). | |||||
| // Thus we preclude the possibility of having a "national only" number that appears in multiple | |||||
| // regions, but not the main region. | |||||
| // | |||||
| // If this check is ever removed (because there is real data where this is not the case), then | |||||
| // the code which generates the "<noInternationalDialling>" patterns will have to be revisited. | |||||
| private static void checkNationalOnly(Attributes attributes, RangeTable xmlTable) { | |||||
| RangeTree allNationalOnly = xmlTable.getRanges(NATIONAL_ONLY, true); | |||||
| if (allNationalOnly.isEmpty()) { | |||||
| return; | |||||
| } | |||||
| ImmutableList<PhoneRegion> regions = attributes.getRegions().asList(); | |||||
| PhoneRegion main = regions.get(0); | |||||
| // Anything assigned to the main region can be ignored as we allow it to have multiple regions. | |||||
| // Now we have to ensure that these ranges are assigned to exactly one auxiliary region. | |||||
| RangeTree remaining = | |||||
| allNationalOnly.subtract(xmlTable.getRanges(REGIONS.getColumn(main), true)); | |||||
| if (remaining.isEmpty()) { | |||||
| return; | |||||
| } | |||||
| DigitSequence cc = attributes.getCallingCode(); | |||||
| for (PhoneRegion r : regions.subList(1, regions.size())) { | |||||
| RangeTree auxNationalOnly = | |||||
| xmlTable.getRanges(REGIONS.getColumn(r), true).intersect(allNationalOnly); | |||||
| // Anything already removed from "remaining" was already accounted for by another region. | |||||
| checkMetadata(remaining.containsAll(auxNationalOnly), | |||||
| "[%s] %s has national-only ranges which overlap other regions: %s", | |||||
| cc, r, auxNationalOnly.subtract(remaining)); | |||||
| remaining = remaining.subtract(auxNationalOnly); | |||||
| } | |||||
| // This is not data issue since it should have been checked already, this is bug. | |||||
| checkState(remaining.isEmpty(), "[%s] ranges not assigned to any region: %s", cc, remaining); | |||||
| } | |||||
| /** | |||||
| * Ensures no national range can start with an IDD (international dialling code of any kind). | |||||
| * This is slightly more complex than just looking for any IDD prefix at the start of a range | |||||
| * because of cases like India, where "00800..." is a valid range and does start with IDD. | |||||
| * | |||||
| * <p>We allow this because: | |||||
| * <ol> | |||||
| * <li>The number is required to have the national prefix in front, so must be dialled as | |||||
| * {@code 000800...} (according to the Indian numbering plan) | |||||
| * <li>and {@code 000...} is not a valid sequence that would lead to dialing into another region, | |||||
| * because all calling codes start with {@code [1-9]}. | |||||
| * </ol> | |||||
| */ | |||||
| private static void checkUnambiguousIdd( | |||||
| Attributes attributes, RangeTable xmlTable, Map<String, FormatSpec> formats) { | |||||
| // It can be empty for non-geographic (world) numbering schemes. | |||||
| if (attributes.getIddPrefixes().isEmpty()) { | |||||
| return; | |||||
| } | |||||
| // All IDDs extended by one non-zero digit. These are the prefixes which if dialled may end | |||||
| // up in another region, so they cannot be allowed at the start of any national number. | |||||
| RangeTree iddPlusOneDigit = attributes.getIddPrefixes().map(r -> r.extendByMask(NOT_ZERO_MASK)); | |||||
| // We only care about ranges up to this length, which can speed things up. | |||||
| int maxPrefixLength = iddPlusOneDigit.getLengths().last(); | |||||
| // Now prefix any ranges which could be dialled with a national prefix with all possible | |||||
| // national prefixes, based on how they are formatted (and assume that no format means no | |||||
| // national prefix). | |||||
| RangeTree withNationalPrefix = RangeTree.empty(); | |||||
| RangeTree withoutNationalPrefix = xmlTable.getRanges(FORMAT, FORMAT.defaultValue()); | |||||
| for (String fid : formats.keySet()) { | |||||
| FormatSpec spec = formats.get(fid); | |||||
| // Only bother with ranges up to the maximum prefix length we care about. | |||||
| RangeTree r = xmlTable.getRanges(FORMAT, fid).slice(0, maxPrefixLength); | |||||
| if (spec.nationalPrefixOptional()) { | |||||
| withNationalPrefix = withNationalPrefix.union(r); | |||||
| withoutNationalPrefix = withoutNationalPrefix.union(r); | |||||
| } else if (spec.national().hasNationalPrefix()) { | |||||
| withNationalPrefix = withNationalPrefix.union(r); | |||||
| } else { | |||||
| withoutNationalPrefix = withoutNationalPrefix.union(r); | |||||
| } | |||||
| } | |||||
| // Only here due to lambdas requiring an effectively final field (this makes me sad). | |||||
| RangeTree withNationalPrefixCopy = withNationalPrefix; | |||||
| RangeTree allDiallablePrefixes = | |||||
| withoutNationalPrefix | |||||
| .union(attributes.getNationalPrefixes().stream() | |||||
| .map(RangeSpecification::from) | |||||
| .map(p -> withNationalPrefixCopy.prefixWith(p)) | |||||
| .reduce(RangeTree.empty(), RangeTree::union)); | |||||
| // These are prefixes which are claimed to be nationally diallable but overlap with the IDD. | |||||
| RangeTree iddOverlap = PrefixTree.from(iddPlusOneDigit).retainFrom(allDiallablePrefixes); | |||||
| checkMetadata(iddOverlap.isEmpty(), | |||||
| "[%s] ranges cannot start with IDD: %s", attributes.getCallingCode(), iddOverlap); | |||||
| } | |||||
| /** | |||||
| * Ensures the shortcodes are disjoint from main ranges and consistent with each other by format | |||||
| * (since format information isn't held separately for each shortcode table). | |||||
| */ | |||||
| private static ImmutableSortedMap<PhoneRegion, RangeTable> checkShortCodeConsistency( | |||||
| Map<PhoneRegion, RangeTable> shortcodeMap, RangeTable table) { | |||||
| ImmutableSortedMap<PhoneRegion, RangeTable> shortcodes = | |||||
| ImmutableSortedMap.copyOf(shortcodeMap); | |||||
| shortcodes.forEach((region, shortcodeTable) -> { | |||||
| RangeTree overlap = table.getAllRanges().intersect(shortcodeTable.getAllRanges()); | |||||
| checkMetadata(overlap.isEmpty(), | |||||
| "Shortcode and national numbers overlap for %s: %s", region, overlap); | |||||
| }); | |||||
| return shortcodes; | |||||
| } | |||||
| private static final Schema FORMAT_SCHEMA = | |||||
| Schema.builder().add(AREA_CODE_LENGTH).add(FORMAT).build(); | |||||
| // We actually explicitly permit duplicate formats (for now) since the XML has them. Later, once | |||||
| // everything is settled, it might be possible to add a check here. | |||||
| private static ImmutableMap<String, FormatSpec> checkFormatConsistency( | |||||
| Attributes attributes, | |||||
| Map<String, FormatSpec> formatMap, | |||||
| RangeTable table, | |||||
| Map<PhoneRegion, RangeTable> shortcodes) { | |||||
| DigitSequence cc = attributes.getCallingCode(); | |||||
| RangeTable.Builder allFormats = RangeTable.builder(FORMAT_SCHEMA); | |||||
| allFormats.copyNonDefaultValues(AREA_CODE_LENGTH, table, OverwriteMode.ALWAYS); | |||||
| allFormats.copyNonDefaultValues(FORMAT, table, OverwriteMode.ALWAYS); | |||||
| // Throws a RangeException (IllegalArgumentException) if inconsistent write occurs. | |||||
| shortcodes.values() | |||||
| .forEach(t -> allFormats.copyNonDefaultValues(FORMAT, t, OverwriteMode.SAME)); | |||||
| RangeTable formatTable = allFormats.build(); | |||||
| ImmutableMap<String, FormatSpec> formats = ImmutableMap.copyOf(formatMap); | |||||
| // TODO: Make this "equals" eventually (since it currently sees "synthetic" IDs). | |||||
| checkMetadata( | |||||
| formats.keySet().containsAll(formatTable.getAssignedValues(FORMAT)), | |||||
| "[%s] mismatched format IDs: %s", | |||||
| cc, Sets.symmetricDifference(formatTable.getAssignedValues(FORMAT), formats.keySet())); | |||||
| // If any of the checks relating to carrier formats are relaxed here, it might be necessary to | |||||
| // re-evaluate the logic around regeneration of nationalPrefixForParsing (so be careful!). | |||||
| boolean carrierTemplatesExist = false; | |||||
| boolean nationalPrefixExistsForFormatting = false; | |||||
| boolean nationalPrefixSometimesOptional = false; | |||||
| for (String id : formats.keySet()) { | |||||
| FormatSpec spec = formats.get(id); | |||||
| RangeTree assigned = allFormats.getRanges(FORMAT, id); | |||||
| checkMetadata(!assigned.isEmpty(), | |||||
| "[%s] format specifier '%s' not assigned to any range: %s", cc, id, spec); | |||||
| checkFormatLengths(cc, spec, assigned); | |||||
| checkLocalFormatLengths(cc, formatTable, spec, assigned); | |||||
| carrierTemplatesExist |= spec.carrier().isPresent(); | |||||
| nationalPrefixExistsForFormatting |= | |||||
| spec.national().hasNationalPrefix() | |||||
| || spec.carrier().map(FormatTemplate::hasNationalPrefix).orElse(false); | |||||
| nationalPrefixSometimesOptional |= spec.nationalPrefixOptional(); | |||||
| } | |||||
| checkMetadata(attributes.getCarrierPrefixes().isEmpty() || carrierTemplatesExist, | |||||
| "[%s] carrier prefixes exist but no formats have carrier templates: %s", | |||||
| cc, formats.values()); | |||||
| checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixExistsForFormatting, | |||||
| "[%s] if no national prefix exists, it cannot be specified in any format template: %s", | |||||
| cc, formats.values()); | |||||
| checkMetadata(!attributes.getNationalPrefixes().isEmpty() || !nationalPrefixSometimesOptional, | |||||
| "[%s] if no national prefix exists, it cannot be optional for formatting: %s", | |||||
| cc, formats.values()); | |||||
| return formats; | |||||
| } | |||||
| // Checks that the ranges to which formats are assigned don't have lengths outside the possible | |||||
| // lengths of that format (e.g. we don't have "12xx" assigned to the format "XXX-XXX"). | |||||
| private static void checkFormatLengths(DigitSequence cc, FormatSpec spec, RangeTree assigned) { | |||||
| TreeSet<Integer> unexpected = new TreeSet<>(assigned.getLengths()); | |||||
| unexpected.removeAll(ContiguousSet.closed(spec.minLength(), spec.maxLength())); | |||||
| if (!unexpected.isEmpty()) { | |||||
| RangeTree bad = RangeTree.empty(); | |||||
| for (int n : unexpected) { | |||||
| bad = bad.union(assigned.intersect(RangeTree.from(RangeSpecification.any(n)))); | |||||
| } | |||||
| throw new IllegalArgumentException(String.format( | |||||
| "[%s] format %s assigned to ranges of invalid length: %s", cc, spec, bad)); | |||||
| } | |||||
| } | |||||
| // Checks that the local lengths for ranges (as determined by area code length) is compatible | |||||
| // with the assigned local format specifier. Note that it is allowed to have an area code length | |||||
| // of zero and still be assigned a format with a local specifier (the specifier may be shared | |||||
| // with other ranges which do have an area code length). | |||||
| private static void checkLocalFormatLengths( | |||||
| DigitSequence cc, RangeTable formatTable, FormatSpec spec, RangeTree assigned) { | |||||
| if (!spec.local().isPresent()) { | |||||
| return; | |||||
| } | |||||
| ImmutableSet<Integer> lengths = | |||||
| formatTable.subTable(assigned, AREA_CODE_LENGTH).getAssignedValues(AREA_CODE_LENGTH); | |||||
| FormatTemplate local = spec.local().get(); | |||||
| // Format specifiers either vary length in the area code or the local number, but not both. | |||||
| int localLength = local.minLength(); | |||||
| int localVariance = local.maxLength() - local.minLength(); | |||||
| if (localVariance == 0) { | |||||
| // If there's no length variation in the "local" part, it means the area code length can | |||||
| // be variable. | |||||
| ContiguousSet<Integer> acls = | |||||
| ContiguousSet.closed(spec.minLength() - localLength, spec.maxLength() - localLength); | |||||
| checkMetadata(acls.containsAll(lengths), | |||||
| "[%s] area code lengths '%s' not supported by format: %s", cc, acls, spec); | |||||
| } else { | |||||
| // If the length variation of the format is in the trailing "local" part, we expect the a | |||||
| // unique area code length (only one "group" in the format can be variable). | |||||
| checkMetadata((spec.maxLength() - spec.minLength()) == localVariance, | |||||
| "[%s] invalid local format (bad length) in format specifier %s", cc, spec); | |||||
| int acl = spec.minLength() - localLength; | |||||
| checkMetadata(lengths.size() == 1 && lengths.contains(acl), | |||||
| "[%s] implied area code length(s) %s does not match expected length (%s) of format: %s", | |||||
| cc, lengths, acl, spec); | |||||
| } | |||||
| } | |||||
| private static ImmutableList<AltFormatSpec> checkAltFormatConsistency( | |||||
| ImmutableList<AltFormatSpec> altFormats, | |||||
| Map<String, FormatSpec> formats, | |||||
| RangeTable xmlTable) { | |||||
| for (AltFormatSpec altFormat : altFormats) { | |||||
| String parentId = altFormat.parentFormatId(); | |||||
| FormatSpec parent = formats.get(parentId); | |||||
| checkMetadata(parent != null, "unknown parent format ID in alternate format: %s", altFormat); | |||||
| Set<Integer> altLengths = getLengths(altFormat.template()); | |||||
| checkMetadata(getLengths(parent.national()).containsAll(altLengths), | |||||
| "alternate format lengths must be bounded by parent format lengths: %s", altFormat); | |||||
| // Only care about the parent ranges which have the same length(s) as the alt format. | |||||
| RangeTree lengthMask = RangeTree.from(altLengths.stream().map(RangeSpecification::any)); | |||||
| RangeTree ranges = xmlTable.getRanges(FORMAT, parentId).intersect(lengthMask); | |||||
| RangeTree captured = PrefixTree.from(altFormat.prefix()).retainFrom(ranges); | |||||
| checkMetadata(!captured.isEmpty(), | |||||
| "alternate format must capture some of the parent format ranges: %s", altFormat); | |||||
| int prefixLength = altFormat.prefix().length(); | |||||
| if (prefixLength > 0) { | |||||
| // A really ugly, but useful check to find if there's a better prefix. Specifically, it | |||||
| // determines if the given prefix is "over-capturing" ranges (e.g. prefix is "1[2-8]" but | |||||
| // only "1[3-6]" exists in the parent format's assigned ranges). Since this is an odd, non | |||||
| // set-like operation, it's just done "manually" using bit masks. It's not a union of the | |||||
| // paths, it's a "squashing" (since it results in the smallest single range specification). | |||||
| // | |||||
| // Start with all the paths trimmed to the prefix length (e.g. "123", "145", "247"). All | |||||
| // range specifications in the slice are the same length as the prefix we started with. | |||||
| RangeTree slice = captured.slice(prefixLength); | |||||
| // Now union the digit masks at each depth for all paths in the slice (in theory there | |||||
| // could be a "squash" operation on RangeSpecification to do all this). | |||||
| int[] masks = new int[prefixLength]; | |||||
| slice.asRangeSpecifications().forEach(s -> { | |||||
| for (int n = 0; n < prefixLength; n++) { | |||||
| masks[n] |= s.getBitmask(n); | |||||
| } | |||||
| }); | |||||
| // Now reconstruct the single "squashed" range specification (e.g. "[12][24][357]"). | |||||
| RangeSpecification minSpec = RangeSpecification.empty(); | |||||
| for (int n = 0; n < prefixLength; n++) { | |||||
| minSpec = minSpec.extendByMask(masks[n]); | |||||
| } | |||||
| checkMetadata(minSpec.equals(altFormat.prefix()), | |||||
| "alternate format prefix '%s' is too broad, it should be '%s' for: %s", | |||||
| altFormat.prefix(), minSpec, altFormat); | |||||
| } | |||||
| } | |||||
| return altFormats; | |||||
| } | |||||
| private static Set<Integer> getLengths(FormatTemplate t) { | |||||
| return ContiguousSet.closed(t.minLength(), t.maxLength()); | |||||
| } | |||||
| // Checks that example numbers are valid numbers in the ranges for their type. | |||||
| private static ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> checkExampleNumbers( | |||||
| Set<PhoneRegion> regions, | |||||
| RangeTable table, | |||||
| Table<PhoneRegion, ValidNumberType, DigitSequence> exampleNumbers) { | |||||
| for (PhoneRegion r : regions) { | |||||
| RangeTable regionTable = | |||||
| table.subTable(table.getRanges(REGIONS.getColumn(r), TRUE), XmlRangesSchema.TYPE); | |||||
| Map<ValidNumberType, DigitSequence> regionExamples = exampleNumbers.row(r); | |||||
| ImmutableSet<ValidNumberType> types = regionTable.getAssignedValues(XmlRangesSchema.TYPE); | |||||
| checkMetadata(types.equals(regionExamples.keySet()), | |||||
| "mismatched types for example numbers in region %s\nExpected: %s\nActual: %s", | |||||
| r, types, regionExamples); | |||||
| for (ValidNumberType t : types) { | |||||
| DigitSequence exampleNumber = regionExamples.get(t); | |||||
| RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, t); | |||||
| // Special case, since we permit example numbers for fixed line/mobile to be valid for the | |||||
| // combined range as well. | |||||
| // | |||||
| // This logic smells, since it reveals information about the XML structure (in which fixed | |||||
| // line and mobile ranges can overlap). However if we insist that a fixed line examples are | |||||
| // in the "fixed line only" range, we end up with problems if (mobile == fixed line), since | |||||
| // there is no "fixed line only" range (but there is an example number in the XML). | |||||
| if (t == ValidNumberType.MOBILE || t == ValidNumberType.FIXED_LINE) { | |||||
| ranges = ranges.union( | |||||
| regionTable.getRanges(XmlRangesSchema.TYPE, ValidNumberType.FIXED_LINE_OR_MOBILE)); | |||||
| } | |||||
| checkMetadata(ranges.contains(exampleNumber), | |||||
| "invalid example number '%s' of type %s in region %s", exampleNumber, t, r); | |||||
| } | |||||
| } | |||||
| return ImmutableTable.copyOf(exampleNumbers); | |||||
| } | |||||
| public abstract Attributes getAttributes(); | |||||
| // TODO: Inline the wrapper methods below. | |||||
| /** Returns the unique calling code of this numbering scheme. */ | |||||
| public DigitSequence getCallingCode() { | |||||
| return getAttributes().getCallingCode(); | |||||
| } | |||||
| /** | |||||
| * Returns the regions represented by this numbering scheme. The main region is always present | |||||
| * and listed first, and remaining regions are listed in "natural" order. | |||||
| */ | |||||
| public ImmutableSet<PhoneRegion> getRegions() { | |||||
| return getAttributes().getRegions(); | |||||
| } | |||||
| /** | |||||
| * Returns a range table containing per-range attributes according to | |||||
| * {@link XmlRangesSchema#COLUMNS}. | |||||
| */ | |||||
| public abstract RangeTable getTable(); | |||||
| /** | |||||
| * Returns a RangeTable restricted to the given region, which conforms to the | |||||
| * {@link XmlRangesSchema} schema, with the exception that no region columns exist. | |||||
| */ | |||||
| public final RangeTable getTableFor(PhoneRegion region) { | |||||
| checkArgument(getRegions().contains(region), | |||||
| "invalid region '%s' for calling code '%s'", region, getCallingCode()); | |||||
| return getTable() | |||||
| .subTable(getTable().getRanges(REGIONS.getColumn(region), TRUE), PER_REGION_COLUMNS); | |||||
| } | |||||
| public abstract ImmutableSortedMap<PhoneRegion, RangeTable> getShortcodes(); | |||||
| /** Returns the RangeTable for the shortcodes of the given region. */ | |||||
| public final Optional<RangeTable> getShortcodesFor(PhoneRegion region) { | |||||
| checkArgument(getRegions().contains(region), | |||||
| "invalid region '%s' for calling code '%s'", region, getCallingCode()); | |||||
| return Optional.ofNullable(getShortcodes().get(region)); | |||||
| } | |||||
| /** Returns the map of format ID to format specifier. */ | |||||
| public abstract ImmutableMap<String, FormatSpec> getFormats(); | |||||
| /** Returns a list of alternate formats which are also expected for this numbering scheme. */ | |||||
| public abstract ImmutableList<AltFormatSpec> getAlternateFormats(); | |||||
| /** Returns a table of example numbers for each region code and number type. */ | |||||
| public abstract ImmutableTable<PhoneRegion, ValidNumberType, DigitSequence> getExampleNumbers(); | |||||
| /** | |||||
| * Returns all comments known about by this numbering scheme. Internal method, callers should | |||||
| * always use {@link #getComments(Anchor)} instead. | |||||
| */ | |||||
| abstract ImmutableList<Comment> getAllComments(); | |||||
| /** Returns comments with a specified anchor for this numbering scheme. */ | |||||
| public ImmutableList<Comment> getComments(Anchor anchor) { | |||||
| checkArgument(getAttributes().getRegions().contains(anchor.region()), | |||||
| "invalid region: %s", anchor.region()); | |||||
| return getAllComments().stream() | |||||
| .filter(c -> c.getAnchor().equals(anchor)) | |||||
| .collect(toImmutableList()); | |||||
| } | |||||
| /** | |||||
| * An encapsulation of a comment to be associated with an element in the XML. Rather than have | |||||
| * many APIs for setting/getting comments on a {@link NumberingScheme}, the approach taken here | |||||
| * is to let comments describe for themselves where they go but keep them in one big bucket. | |||||
| * <p> | |||||
| * This simplifies a lot of the intermediate APIs in the builders, but is less efficient (since | |||||
| * finding comments is now a linear search). If this is ever an issue, they should be mapped by | |||||
| * key, using a {@code ListMultimap<String, Comment>} (since comments are also ordered by their | |||||
| * number). | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract static class Comment { | |||||
| private static final Joiner JOIN_LINES = Joiner.on('\n'); | |||||
| private static final Splitter SPLIT_LINES = Splitter.on('\n'); | |||||
| /** An anchor defining which element, in which territory, a comment should be attached to. */ | |||||
| @AutoValue | |||||
| public abstract static class Anchor implements Comparable<Anchor> { | |||||
| // Special anchor for comments that are not stored in the comment table, but are attached to | |||||
| // data directly (e.g. formats). | |||||
| private static final Anchor ANONYMOUS = of(PhoneRegion.getUnknown(), ""); | |||||
| private static final Comparator<Anchor> ORDERING = | |||||
| comparing(Anchor::region).thenComparing(Anchor::label); | |||||
| /** Creates a comment anchor from a region and xml type. */ | |||||
| static Anchor of(PhoneRegion region, String label) { | |||||
| // TODO: Add check for valid label. | |||||
| return anchor(region, label); | |||||
| } | |||||
| /** The region of the territory this comment should be attached to. */ | |||||
| public abstract PhoneRegion region(); | |||||
| /** | |||||
| * The type in the territory this comment should be attached to. If missing, attach this | |||||
| * comment to the main comment block for the territory. | |||||
| */ | |||||
| public abstract String label(); | |||||
| @Override | |||||
| public int compareTo(Anchor that) { | |||||
| return ORDERING.compare(this, that); | |||||
| } | |||||
| } | |||||
| // Private since we want to funnel people through type safe factory methods. | |||||
| private static Anchor anchor(PhoneRegion region, String label) { | |||||
| return new AutoValue_NumberingScheme_Comment_Anchor(region, label); | |||||
| } | |||||
| /** Returns a key identifying a comment for a region. */ | |||||
| public static Anchor anchor(PhoneRegion region) { | |||||
| return anchor(region, "XML"); | |||||
| } | |||||
| /** Returns a key identifying a comment for the validation range of a given type in a region. */ | |||||
| public static Anchor anchor(PhoneRegion region, XmlNumberType xmlType) { | |||||
| return anchor(region, xmlType.toString()); | |||||
| } | |||||
| /** | |||||
| * Returns a key identifying a comment for the validation range of a given shortcode type in | |||||
| * a region. | |||||
| */ | |||||
| public static Anchor shortcodeAnchor(PhoneRegion region) { | |||||
| return anchor(region, "SC"); | |||||
| } | |||||
| /** | |||||
| * Returns a key identifying a comment for the validation range of a given shortcode type in | |||||
| * a region. | |||||
| */ | |||||
| public static Anchor shortcodeAnchor(PhoneRegion region, XmlShortcodeType xmlType) { | |||||
| return anchor(region, xmlType.toString()); | |||||
| } | |||||
| /** Creates a comment the applies to data identified by the specified key. */ | |||||
| public static Comment create(Anchor anchor, List<String> lines) { | |||||
| return new AutoValue_NumberingScheme_Comment(anchor, ImmutableList.copyOf(lines)); | |||||
| } | |||||
| /** Creates a comment the applies to data identified by the specified key. */ | |||||
| public static Comment createAnonymous(List<String> lines) { | |||||
| return new AutoValue_NumberingScheme_Comment(Anchor.ANONYMOUS, ImmutableList.copyOf(lines)); | |||||
| } | |||||
| public static Comment fromText(Anchor anchor, String text) { | |||||
| return create(anchor, SPLIT_LINES.splitToList(text)); | |||||
| } | |||||
| public static Comment fromText(String text) { | |||||
| return createAnonymous(SPLIT_LINES.splitToList(text)); | |||||
| } | |||||
| /** | |||||
| * Returns the key which defines what this comment relates to (and thus where it should appear | |||||
| * in the XML file). | |||||
| */ | |||||
| public abstract Anchor getAnchor(); | |||||
| /** The lines of a single mulit-line comment. */ | |||||
| // TODO: Switch to a single string (with newlines) which is what's done elsewhere. | |||||
| public abstract ImmutableList<String> getLines(); | |||||
| public String toText() { | |||||
| return JOIN_LINES.join(getLines()); | |||||
| } | |||||
| // Visible for AutoValue. | |||||
| Comment() {} | |||||
| } | |||||
| // Visible for AutoValue. | |||||
| NumberingScheme() {} | |||||
| } | |||||
| @ -0,0 +1,63 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.MetadataKey; | |||||
| import java.util.List; | |||||
| /** | |||||
| * Collection of numbering schemes, mapped primarily by calling code, but available via other | |||||
| * mappings (e.g. metadata key) for convenience. | |||||
| */ | |||||
| // TODO: Delete this (it's hardly used and very little more than a simple collection). | |||||
| @AutoValue | |||||
| public abstract class NumberingSchemes { | |||||
| /** | |||||
| * Aggregates a list of numbering schemes into a single collection which mirrors the structure and | |||||
| * mapping of the libphonenumber XML metadata file. | |||||
| */ | |||||
| public static NumberingSchemes from(List<NumberingScheme> schemes) { | |||||
| ImmutableMap<DigitSequence, NumberingScheme> map = | |||||
| schemes.stream().collect(toImmutableMap(NumberingScheme::getCallingCode, identity())); | |||||
| ImmutableSet<MetadataKey> allKeys = map.values().stream() | |||||
| .flatMap(s -> s.getRegions().stream().map(r -> MetadataKey.create(r, s.getCallingCode()))) | |||||
| .collect(toImmutableSet()); | |||||
| return new AutoValue_NumberingSchemes(map, allKeys); | |||||
| } | |||||
| /** Returns a mapping of top-level numbering schemes by calling code. */ | |||||
| // TODO: Rename to getSchemeMap() since it's confusing, or add a direct getter. | |||||
| public abstract ImmutableMap<DigitSequence, NumberingScheme> getSchemes(); | |||||
| /** Returns the set of all calling codes for top-level schemes in this collection. */ | |||||
| public ImmutableSet<DigitSequence> getCallingCodes() { | |||||
| return getSchemes().keySet(); | |||||
| } | |||||
| /** Returns the set of all metadata keys for regional schemes in this collection. */ | |||||
| public abstract ImmutableSet<MetadataKey> getKeys(); | |||||
| // Visible for AutoValue. | |||||
| NumberingSchemes() {} | |||||
| } | |||||
| @ -0,0 +1,88 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| /** | |||||
| * The schema of the "Operators" table with rows keyed by operator ID and columns: | |||||
| * <ol> | |||||
| * <li>{@link #SELECTION_CODES}: Operator selection codes for national dialling. | |||||
| * <li>{@link #IDD_PREFIXES}: International direct dialling codes. | |||||
| * <li>{@link #NAMES}: A group of columns containing the name of the operator, potential in | |||||
| * multiple languages. Note that English translations for all operators need not be present. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce the leading column: | |||||
| * <ol> | |||||
| * <li>{@code Id}: The operator ID. | |||||
| * </ol> | |||||
| * | |||||
| * <p>The default IDD prefix should not be in this table, but is instead stored in the top-level | |||||
| * {@link MetadataTableSchema#IDD_PREFIX} column. | |||||
| * | |||||
| * <p>Note that there is a special case in which we need to store a selection code or IDD code, but | |||||
| * it does not below to a operator with an assigned range (e.g. it's a universally available code). | |||||
| * In these situations, you should ensure that the operator ID starts with "__" (double underscore) | |||||
| * to prevent consistency checks from complaining about unassigned operators. You can also omit a | |||||
| * name for the row, but should probably add a comment. | |||||
| */ | |||||
| public final class OperatorsTableSchema { | |||||
| /** | |||||
| * A comma separated list of "selection codes" (as range specifications) which are added to | |||||
| * national numbers (not always as a prefix) to select an operator for national dialling. | |||||
| * This will often contain many of the same values as IDD_CODES but need not be identical. | |||||
| * | |||||
| * <p>Note that while a single operator may have more than one code associated with it, the same | |||||
| * code cannot appear in more than one row in this table. | |||||
| */ | |||||
| public static final Column<String> SELECTION_CODES = Column.ofString("Domestic Selection Codes"); | |||||
| /** | |||||
| * A comma separated list of "International Direct Dialing" codes (as range specifications) which | |||||
| * are prefixes for international dialling. This will often contain many of the same prefixes as | |||||
| * SELECTION_CODES but need not be identical. | |||||
| * | |||||
| * <p>Note that while a single operator may have more than one code associated with it, the same | |||||
| * code cannot appear in more than one row in this table. | |||||
| */ | |||||
| public static final Column<String> IDD_PREFIXES = Column.ofString("International Dialling Codes"); | |||||
| /** The "Name:XXX" column group in the operator table. */ | |||||
| public static final ColumnGroup<SimpleLanguageTag, String> NAMES = | |||||
| ColumnGroup.byLanguage(Column.ofString("Name")); | |||||
| public static final Column<String> COMMENT = RangesTableSchema.COMMENT; | |||||
| private static final CsvKeyMarshaller<String> MARSHALLER = CsvKeyMarshaller.ofSortedString("Id"); | |||||
| private static final Schema COLUMNS = Schema.builder() | |||||
| .add(SELECTION_CODES) | |||||
| .add(IDD_PREFIXES) | |||||
| .add(NAMES) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** Schema instance defining the operators CSV table. */ | |||||
| public static final CsvSchema<String> SCHEMA = CsvSchema.of(MARSHALLER, COLUMNS); | |||||
| private OperatorsTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,396 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.collect.DiscreteDomain.integers; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||||
| import static java.util.Comparator.comparing; | |||||
| import static java.util.function.Function.identity; | |||||
| import static java.util.stream.Collectors.joining; | |||||
| import com.google.common.base.Splitter; | |||||
| import com.google.common.collect.ContiguousSet; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableRangeSet; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedSet; | |||||
| import com.google.common.collect.Range; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||||
| import com.google.i18n.phonenumbers.metadata.model.MetadataTableSchema.Regions; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Change; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.MultiValue; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.time.ZoneId; | |||||
| import java.util.List; | |||||
| import java.util.NavigableSet; | |||||
| import java.util.Optional; | |||||
| import java.util.TreeSet; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * The schema of the standard "Ranges" table with rows keyed by {@link RangeKey} and columns: | |||||
| * <ol> | |||||
| * <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not | |||||
| * the same a XmlNumberType or ValidNumberType). All ranges should be assigned a type. | |||||
| * <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF | |||||
| * can yield the internal ValidNumberType). All ranges should be assigned a tariff. | |||||
| * <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from | |||||
| * numbers in a range for local dialling. Local only lengths are derived using this column. | |||||
| * <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its | |||||
| * region. The "noInternationalDialling" ranges are derived from this column. | |||||
| * <li>{@link #SMS}: True if numbers in a range are expected to support SMS. | |||||
| * <li>{@link #OPERATOR}: The expected operator (carrier) ID for a range (or empty if no carrier | |||||
| * is known). | |||||
| * <li>{@link #FORMAT}: The expected format ID for a range (or empty if no formatting should be | |||||
| * applied). | |||||
| * <li>{@link #TIMEZONE}: The timezone names for a range (or empty to imply the default | |||||
| * timezones). Multiple timezones can be specific if separated by {@code '&'}. | |||||
| * <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are | |||||
| * set {@code true} that range is valid within the region {@code XX}. | |||||
| * <li>{@link #GEOCODES}: A group of String columns in the form "Geocode:XXX" containing the | |||||
| * geocode string for a range, where {@code XXX} is the language code of the string. | |||||
| * <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid. | |||||
| * <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||||
| * <ol> | |||||
| * <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]"). | |||||
| * <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9"). | |||||
| * </ol> | |||||
| */ | |||||
| public final class RangesTableSchema { | |||||
| /** | |||||
| * External number type enum. This is technically much better than ValidNumberType since it | |||||
| * splits type and cost properly. Unfortunately the internal logic of the phonenumber library | |||||
| * doesn't really cope with this, which is why we convert to {@code XmlRangesSchema} before | |||||
| * creating legacy data structures. | |||||
| * | |||||
| * <p>This enum can be modified as new types are requested from data providers, providing the | |||||
| * type mapping to ValidNumberType is updated appropriately. Note that until it's clear that | |||||
| * mapping types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we | |||||
| * should be very careful about using the additional types. Additional types need to be removed | |||||
| * before the generated table can be turned into a {@link NumberingScheme}. | |||||
| */ | |||||
| public enum ExtType { | |||||
| /** Default value not permitted in real data. */ | |||||
| UNKNOWN, | |||||
| /** Maps to {@link ValidNumberType#FIXED_LINE}. */ | |||||
| FIXED_LINE, | |||||
| /** Maps to {@link ValidNumberType#MOBILE}. */ | |||||
| MOBILE, | |||||
| /** Maps to {@link ValidNumberType#FIXED_LINE_OR_MOBILE}. */ | |||||
| FIXED_LINE_OR_MOBILE, | |||||
| /** Maps to {@link ValidNumberType#VOIP}. */ | |||||
| VOIP, | |||||
| /** Maps to {@link ValidNumberType#PAGER}. */ | |||||
| PAGER, | |||||
| /** Maps to {@link ValidNumberType#PERSONAL_NUMBER}. */ | |||||
| PERSONAL_NUMBER, | |||||
| /** Maps to {@link ValidNumberType#UAN}. */ | |||||
| UAN, | |||||
| /** Maps to {@link ValidNumberType#VOICEMAIL}. */ | |||||
| VOICEMAIL, | |||||
| /** Machine-to-machine numbers (additional type for future support). */ | |||||
| M2M, | |||||
| /** ISP dial-up numbers (additional type for future support). */ | |||||
| ISP; | |||||
| private static final ImmutableMap<ExtType, ValidNumberType> TYPE_MAP = | |||||
| Stream.of( | |||||
| ExtType.FIXED_LINE, | |||||
| ExtType.MOBILE, | |||||
| ExtType.FIXED_LINE_OR_MOBILE, | |||||
| ExtType.PAGER, | |||||
| ExtType.PERSONAL_NUMBER, | |||||
| ExtType.UAN, | |||||
| ExtType.VOICEMAIL, | |||||
| ExtType.VOIP) | |||||
| .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name()))); | |||||
| public Optional<ValidNumberType> toValidNumberType() { | |||||
| return Optional.ofNullable(TYPE_MAP.get(this)); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * External tariff enum. By splitting tariff information out from the "line type", we can | |||||
| * represent a much wider (and more realistic) set of combinations for number ranges. When | |||||
| * combined with {@link ExtType}, this maps back to {@code ValidNumberType}. | |||||
| */ | |||||
| public enum ExtTariff { | |||||
| /** Does not affect ValidNumberType mapping. */ | |||||
| STANDARD_RATE, | |||||
| /** Maps to {@link ValidNumberType#TOLL_FREE}. */ | |||||
| TOLL_FREE, | |||||
| /** Maps to {@link ValidNumberType#SHARED_COST}. */ | |||||
| SHARED_COST, | |||||
| /** Maps to {@link ValidNumberType#PREMIUM_RATE}. */ | |||||
| PREMIUM_RATE; | |||||
| private static final ImmutableMap<ExtTariff, ValidNumberType> TARIFF_MAP = | |||||
| Stream.of(ExtTariff.TOLL_FREE, ExtTariff.SHARED_COST, ExtTariff.PREMIUM_RATE) | |||||
| .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name()))); | |||||
| public Optional<ValidNumberType> toValidNumberType() { | |||||
| return Optional.ofNullable(TARIFF_MAP.get(this)); | |||||
| } | |||||
| } | |||||
| /** The value in the "TIMEZONE" column, which is effectively a list of timezone strings. */ | |||||
| public static final class Timezones extends MultiValue<ZoneId, Timezones> { | |||||
| public static Column<Timezones> column(String name) { | |||||
| return Column.create(Timezones.class, name, new Timezones(""), Timezones::new); | |||||
| } | |||||
| public Timezones(Iterable<ZoneId> ids) { | |||||
| super(ids, '&', comparing(ZoneId::getId), true); | |||||
| } | |||||
| public Timezones(String s) { | |||||
| super(s, ZoneId::of, '&', comparing(ZoneId::getId), true); | |||||
| } | |||||
| } | |||||
| public static final Column<ExtType> TYPE = Column.of(ExtType.class, "Type", ExtType.UNKNOWN); | |||||
| public static final Column<ExtTariff> TARIFF = | |||||
| Column.of(ExtTariff.class, "Tariff", ExtTariff.STANDARD_RATE); | |||||
| /** | |||||
| * The "Area Code Length" column in the range table, denoting the length of a prefix which can | |||||
| * be removed from all numbers in a range to obtain locally diallable numbers. If an | |||||
| * "area code" is not optional for dialling, then no value should be set here. | |||||
| */ | |||||
| public static final Column<Integer> AREA_CODE_LENGTH = | |||||
| Column.ofUnsignedInteger("Area Code Length"); | |||||
| /** Denotes ranges which cannot be dialled internationally. */ | |||||
| public static final Column<Boolean> NATIONAL_ONLY = Column.ofBoolean("National Only"); | |||||
| /** Denotes ranges which can reasonably be expected to receive SMS. */ | |||||
| public static final Column<Boolean> SMS = Column.ofBoolean("Sms"); | |||||
| /** The ID of the primary/original operator assigned to a range. */ | |||||
| public static final Column<String> OPERATOR = Column.ofString("Operator"); | |||||
| /** The ID of the format assigned to a range. */ | |||||
| public static final Column<String> FORMAT = Column.ofString("Format"); | |||||
| /** An '&'-separated list of timezone IDs associated with this range. */ | |||||
| public static final Column<Timezones> TIMEZONE = Timezones.column("Timezone"); | |||||
| /** The "Region:XX" column group in the range table. */ | |||||
| public static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||||
| /** The "Regions" column in the CSV table. */ | |||||
| public static final Column<Regions> CSV_REGIONS = Regions.column("Regions"); | |||||
| /** The "Geocode:XXX" column group in the range table. */ | |||||
| public static final ColumnGroup<SimpleLanguageTag, String> GEOCODES = | |||||
| ColumnGroup.byLanguage(Column.ofString("Geocode")); | |||||
| /** The provenance column indicating why a range is considered valid. */ | |||||
| public static final Column<Provenance> PROVENANCE = | |||||
| Column.of(Provenance.class, "Provenance", Provenance.UNKNOWN); | |||||
| /** An arbitrary text comment, usually (at least) supplying information about the provenance. */ | |||||
| public static final Column<String> COMMENT = Column.ofString("Comment"); | |||||
| /** Marshaller for constructing CsvTable from RangeTable. */ | |||||
| private static final CsvKeyMarshaller<RangeKey> MARSHALLER = new CsvKeyMarshaller<>( | |||||
| RangesTableSchema::write, | |||||
| RangesTableSchema::read, | |||||
| Optional.of(RangeKey.ORDERING), | |||||
| "Prefix", | |||||
| "Length"); | |||||
| /** The non-key columns of a range table. */ | |||||
| public static final Schema TABLE_COLUMNS = | |||||
| Schema.builder() | |||||
| .add(TYPE) | |||||
| .add(TARIFF) | |||||
| .add(AREA_CODE_LENGTH) | |||||
| .add(NATIONAL_ONLY) | |||||
| .add(SMS) | |||||
| .add(OPERATOR) | |||||
| .add(FORMAT) | |||||
| .add(TIMEZONE) | |||||
| .add(REGIONS) | |||||
| .add(GEOCODES) | |||||
| .add(PROVENANCE) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** | |||||
| * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced | |||||
| * by the CSV regions multi-value. This allows region codes to be serialize in a single column | |||||
| * (which is far nicer when looking at data in a spreadsheet). In the range table, this is | |||||
| * normalized into the boolean column group (because that's far nicer to work with). | |||||
| */ | |||||
| private static final Schema CSV_COLUMNS = | |||||
| Schema.builder() | |||||
| .add(TYPE) | |||||
| .add(TARIFF) | |||||
| .add(AREA_CODE_LENGTH) | |||||
| .add(NATIONAL_ONLY) | |||||
| .add(SMS) | |||||
| .add(OPERATOR) | |||||
| .add(FORMAT) | |||||
| .add(TIMEZONE) | |||||
| .add(CSV_REGIONS) | |||||
| .add(GEOCODES) | |||||
| .add(PROVENANCE) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** Schema instance defining the ranges CSV table. */ | |||||
| public static final CsvSchema<RangeKey> SCHEMA = CsvSchema.of(MARSHALLER, CSV_COLUMNS); | |||||
| /** | |||||
| * Converts a {@link RangeTable} to a {@link CsvTable}, using {@link RangeKey}s as row keys and | |||||
| * preserving the original table columns. The {@link CsvSchema} of the returned table is not | |||||
| * guaranteed to be the {@link #SCHEMA} instance if the given table had different columns. | |||||
| */ | |||||
| @SuppressWarnings("unchecked") | |||||
| public static CsvTable<RangeKey> toCsv(RangeTable table) { | |||||
| CsvTable.Builder<RangeKey> csv = CsvTable.builder(SCHEMA); | |||||
| ImmutableSet<Column<Boolean>> regionColumns = | |||||
| REGIONS.extractGroupColumns(table.getColumns()).values(); | |||||
| TreeSet<PhoneRegion> regions = new TreeSet<>(); | |||||
| for (Change c : table.toChanges()) { | |||||
| for (RangeKey k : RangeKey.decompose(c.getRanges())) { | |||||
| regions.clear(); | |||||
| c.getAssignments().forEach(a -> { | |||||
| // We special case the regions column, converting a group of boolean columns into a | |||||
| // multi-value of region codes. If the column is in the group, it must hold Booleans. | |||||
| if (regionColumns.contains(a.column())) { | |||||
| if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) { | |||||
| regions.add(REGIONS.getKey(a.column())); | |||||
| } | |||||
| } else { | |||||
| csv.put(k, a); | |||||
| } | |||||
| }); | |||||
| // We can do this out-of-sequence because the table will order its columns. | |||||
| if (!regions.isEmpty()) { | |||||
| csv.put(k, CSV_REGIONS, Regions.of(regions)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return csv.build(); | |||||
| } | |||||
| /** | |||||
| * Converts a {@link RangeKey} based {@link CsvTable} to a {@link RangeTable}, preserving the | |||||
| * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be | |||||
| * the {@link #SCHEMA} instance if the given table had different columns. | |||||
| */ | |||||
| public static RangeTable toRangeTable(CsvTable<RangeKey> csv) { | |||||
| RangeTable.Builder out = RangeTable.builder(TABLE_COLUMNS); | |||||
| for (RangeKey k : csv.getKeys()) { | |||||
| Change.Builder change = Change.builder(k.asRangeTree()); | |||||
| csv.getRow(k).forEach((c, v) -> { | |||||
| // We special case the regions column, converting a comma separated list of region codes | |||||
| // into a series of boolean column assignments. | |||||
| if (c.equals(CSV_REGIONS)) { | |||||
| CSV_REGIONS.cast(v).getValues().forEach(r -> change.assign(REGIONS.getColumn(r), true)); | |||||
| } else { | |||||
| change.assign(c, v); | |||||
| } | |||||
| }); | |||||
| out.apply(change.build(), OverwriteMode.NEVER); | |||||
| } | |||||
| return out.build(); | |||||
| } | |||||
| // Shared by ShortcodeTableSchema | |||||
| public static Stream<String> write(RangeKey key) { | |||||
| return Stream.of(key.getPrefix().toString(), formatLength(key.getLengths())); | |||||
| } | |||||
| // Shared by ShortcodeTableSchema | |||||
| public static RangeKey read(List<String> parts) { | |||||
| return RangeKey.create(RangeSpecification.parse(parts.get(0)), parseLengths(parts.get(1))); | |||||
| } | |||||
| private static String formatLength(ImmutableSortedSet<Integer> lengthSet) { | |||||
| checkArgument(!lengthSet.isEmpty()); | |||||
| ImmutableRangeSet<Integer> r = | |||||
| ImmutableRangeSet.unionOf( | |||||
| lengthSet.stream() | |||||
| .map(n -> Range.singleton(n).canonical(integers())) | |||||
| .collect(toImmutableList())); | |||||
| return r.asRanges().stream().map(RangesTableSchema::formatRange).collect(joining(",")); | |||||
| } | |||||
| private static String formatRange(Range<Integer> r) { | |||||
| ContiguousSet<Integer> s = ContiguousSet.create(r, integers()); | |||||
| switch (s.size()) { | |||||
| case 1: | |||||
| return String.valueOf(s.first()); | |||||
| case 2: | |||||
| return s.first() + "," + s.last(); | |||||
| default: | |||||
| return s.first() + "-" + s.last(); | |||||
| } | |||||
| } | |||||
| private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults(); | |||||
| private static final Splitter RANGE_SPLITTER = Splitter.on('-').trimResults().limit(2); | |||||
| private static NavigableSet<Integer> parseLengths(String s) { | |||||
| NavigableSet<Integer> lengths = new TreeSet<>(); | |||||
| for (String lengthOrRange : COMMA_SPLITTER.split(s)) { | |||||
| if (lengthOrRange.contains("-")) { | |||||
| List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange); | |||||
| int lo = parseInt(lohi.get(0)); | |||||
| int hi = parseInt(lohi.get(1)); | |||||
| checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi); | |||||
| checkArgument(lengths.isEmpty() || lo > lengths.last(), "Overlapping ranges: %s", s); | |||||
| lengths.addAll(ContiguousSet.closed(lo, hi)); | |||||
| } else { | |||||
| int length = parseInt(lengthOrRange); | |||||
| checkArgument(lengths.isEmpty() || length > lengths.last(), "Overlapping ranges: %s", s); | |||||
| lengths.add(length); | |||||
| } | |||||
| } | |||||
| return lengths; | |||||
| } | |||||
| private static int parseInt(String s) { | |||||
| return Integer.parseUnsignedInt(s, 10); | |||||
| } | |||||
| private RangesTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,228 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EMERGENCY; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType.EXPANDED_EMERGENCY; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableBiMap; | |||||
| import com.google.common.collect.ImmutableSortedMap; | |||||
| import com.google.common.collect.Maps; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Enums.Provenance; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlShortcodeType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Change; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvKeyMarshaller; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeKey; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.Comparator; | |||||
| import java.util.LinkedHashMap; | |||||
| import java.util.List; | |||||
| import java.util.Map; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * The schema of the standard "Shortcodes" table with rows keyed by {@link RangeKey} and columns: | |||||
| * <ol> | |||||
| * <li>{@link #TYPE}: The semantic type of numbers in a range. All ranges should be assigned a | |||||
| * type. | |||||
| * <li>{@link #TARIFF}: The expected cost of numbers in a range. All ranges should be assigned a | |||||
| * tariff. | |||||
| * <li>{@link #SMS}: True if numbers in a range are expected to support SMS. | |||||
| * <li>{@link #SUBREGION}: True if numbers in a range are expected to be only diallable from a | |||||
| * geographic subregion (rather than the whole region). | |||||
| * <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid. | |||||
| * <li>{@link #COMMENT}: Free text field usually containing evidence related to the provenance. | |||||
| * </ol> | |||||
| * | |||||
| * <p>Rows keys are serialized via the marshaller and produce leading columns: | |||||
| * <ol> | |||||
| * <li>{@code Region}: The region code for which this range applies. | |||||
| * <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]"). | |||||
| * <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9"). | |||||
| * </ol> | |||||
| * | |||||
| * <p>Note that the region must be part of the key, since some shortcodes have different types | |||||
| * between different regions. | |||||
| */ | |||||
| public final class ShortcodesTableSchema { | |||||
| /** | |||||
| * The row key of the shortcode table, specifying region and range key. This permits all | |||||
| * shortcodes to be stored in a single table (which is very helpful in NANPA, where there are | |||||
| * many regions, most with only a tiny amount of shortcode information). | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract static class ShortcodeKey { | |||||
| private static final Comparator<ShortcodeKey> ORDERING = Comparator | |||||
| .comparing(ShortcodeKey::getRegion) | |||||
| .thenComparing(ShortcodeKey::getRangeKey, RangeKey.ORDERING); | |||||
| private static final CsvKeyMarshaller<ShortcodeKey> MARSHALLER = new CsvKeyMarshaller<>( | |||||
| ShortcodeKey::write, | |||||
| ShortcodeKey::read, | |||||
| Optional.of(ShortcodeKey.ORDERING), | |||||
| "Region", | |||||
| "Prefix", | |||||
| "Length"); | |||||
| private static Stream<String> write(ShortcodeKey key) { | |||||
| return Stream.concat( | |||||
| Stream.of(key.getRegion().toString()), | |||||
| RangesTableSchema.write(key.getRangeKey())); | |||||
| } | |||||
| private static ShortcodeKey read(List<String> parts) { | |||||
| return ShortcodeKey.create( | |||||
| PhoneRegion.of(parts.get(0)), | |||||
| RangesTableSchema.read(parts.subList(1, parts.size()))); | |||||
| } | |||||
| public static ShortcodeKey create(PhoneRegion region, RangeKey rangeKey) { | |||||
| checkArgument(!region.equals(PhoneRegion.getUnknown()), "region must be valid"); | |||||
| return new AutoValue_ShortcodesTableSchema_ShortcodeKey(region, rangeKey); | |||||
| } | |||||
| public abstract PhoneRegion getRegion(); | |||||
| public abstract RangeKey getRangeKey(); | |||||
| } | |||||
| /** Shortcode type enum. */ | |||||
| public enum ShortcodeType { | |||||
| /** Default value not permitted in real data. */ | |||||
| UNKNOWN, | |||||
| /** | |||||
| * General purpose non-governmental services including commercial or charity services. This is | |||||
| * the default type for shortcodes if no other category is more applicable. | |||||
| */ | |||||
| COMMERCIAL, | |||||
| /** | |||||
| * Non-emergency, government run public services (e.g. directory enquiries). | |||||
| */ | |||||
| PUBLIC_SERVICE, | |||||
| /** | |||||
| * Public services which provide important non-emergency information for health or safety | |||||
| * (e.g. https://www.police.uk/contact/101/). | |||||
| */ | |||||
| EXPANDED_EMERGENCY, | |||||
| /** | |||||
| * Primary public emergency numbers (i.e. police, fire or ambulance) which are available to | |||||
| * everyone. Numbers in this category must be toll-free and not carrier specific. Mobile phone | |||||
| * manufacturers will often allow these numbers to be dialled from a locked device, so it's | |||||
| * important that they work for everyone. | |||||
| */ | |||||
| EMERGENCY; | |||||
| } | |||||
| private static final ImmutableBiMap<ExtTariff, XmlShortcodeType> XML_TARIFF_MAP = | |||||
| Stream.of(ExtTariff.TOLL_FREE, ExtTariff.STANDARD_RATE, ExtTariff.PREMIUM_RATE) | |||||
| .collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name()))); | |||||
| private static final ImmutableBiMap<ShortcodeType, XmlShortcodeType> XML_TYPE_MAP = | |||||
| Stream.of(EXPANDED_EMERGENCY, EMERGENCY) | |||||
| .collect(toImmutableBiMap(identity(), v -> XmlShortcodeType.valueOf("SC_" + v.name()))); | |||||
| /** Return the known mapping from the schema shortcode types to the XML type. */ | |||||
| public static Optional<XmlShortcodeType> getXmlType(ShortcodeType type) { | |||||
| return Optional.ofNullable(XML_TYPE_MAP.get(type)); | |||||
| } | |||||
| /** Return the mapping from the schema tariff to the XML type. */ | |||||
| public static XmlShortcodeType getXmlType(ExtTariff tariff) { | |||||
| XmlShortcodeType xmlType = XML_TARIFF_MAP.get(tariff); | |||||
| checkArgument(xmlType != null, "shortcodes do not support tariff: %s", tariff); | |||||
| return xmlType; | |||||
| } | |||||
| public static final Column<ShortcodeType> TYPE = | |||||
| Column.of(ShortcodeType.class, "Type", ShortcodeType.UNKNOWN); | |||||
| public static final Column<ExtTariff> TARIFF = RangesTableSchema.TARIFF; | |||||
| public static final Column<Boolean> SMS = RangesTableSchema.SMS; | |||||
| public static final Column<Boolean> CARRIER_SPECIFIC = Column.ofBoolean("Carrier Specific"); | |||||
| public static final Column<Boolean> SUBREGION = Column.ofBoolean("Subregion"); | |||||
| public static final Column<String> FORMAT = RangesTableSchema.FORMAT; | |||||
| public static final Column<Provenance> PROVENANCE = RangesTableSchema.PROVENANCE; | |||||
| public static final Column<String> COMMENT = RangesTableSchema.COMMENT; | |||||
| private static final Schema COLUMNS = | |||||
| Schema.builder() | |||||
| .add(TYPE) | |||||
| .add(TARIFF) | |||||
| .add(SMS) | |||||
| .add(CARRIER_SPECIFIC) | |||||
| .add(SUBREGION) | |||||
| .add(FORMAT) | |||||
| .add(PROVENANCE) | |||||
| .add(COMMENT) | |||||
| .build(); | |||||
| /** Schema instance defining the "Shortcodes" CSV table. */ | |||||
| public static final CsvSchema<ShortcodeKey> SCHEMA = | |||||
| CsvSchema.of(ShortcodeKey.MARSHALLER, COLUMNS); | |||||
| /** | |||||
| */ | |||||
| public static CsvTable<ShortcodeKey> toCsv(Map<PhoneRegion, RangeTable> tables) { | |||||
| CsvTable.Builder<ShortcodeKey> csv = CsvTable.builder(SCHEMA); | |||||
| tables.forEach((r, t) -> { | |||||
| for (Change c : t.toChanges()) { | |||||
| for (RangeKey k : RangeKey.decompose(c.getRanges())) { | |||||
| csv.put(ShortcodeKey.create(r, k), c.getAssignments()); | |||||
| } | |||||
| } | |||||
| }); | |||||
| return csv.build(); | |||||
| } | |||||
| /** | |||||
| * Maps a single shortcode CSV table into a map of region specific range tables. Note that the | |||||
| * ranges in these tables do not need to be consistent across regions (e.g. "toll free" in one | |||||
| * might be "premium rate" in the other). | |||||
| */ | |||||
| public static ImmutableSortedMap<PhoneRegion, RangeTable> toShortcodeTables( | |||||
| CsvTable<ShortcodeKey> csv) { | |||||
| // Retain order of regions in the CSV table (not natural region order). | |||||
| Map<PhoneRegion, RangeTable.Builder> builderMap = new LinkedHashMap<>(); | |||||
| for (ShortcodeKey k : csv.getKeys()) { | |||||
| // Basically the same as for RangesTableSchema, except that we deal with region codes in the | |||||
| // key. | |||||
| Change.Builder change = Change.builder(k.getRangeKey().asRangeTree()); | |||||
| csv.getRow(k).forEach(change::assign); | |||||
| PhoneRegion region = k.getRegion(); | |||||
| RangeTable.Builder table = builderMap.get(region); | |||||
| if (table == null) { | |||||
| table = RangeTable.builder(COLUMNS); | |||||
| builderMap.put(region, table); | |||||
| } | |||||
| table.apply(change.build(), OverwriteMode.NEVER); | |||||
| } | |||||
| return ImmutableSortedMap.copyOf(Maps.transformValues(builderMap, RangeTable.Builder::build)); | |||||
| } | |||||
| private ShortcodesTableSchema() {} | |||||
| } | |||||
| @ -0,0 +1,154 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.MetadataException.checkMetadata; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode.NEVER; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.ColumnGroup; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Schema; | |||||
| import java.util.Optional; | |||||
| /** | |||||
| * A schema describing the columns which are required for creating a {@link NumberingScheme}. | |||||
| * <ol> | |||||
| * <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not the same as | |||||
| * an {@code XmlNumberType}). All ranges should be assigned a validation type. | |||||
| * <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from | |||||
| * numbers in a range for local dialling. Local only lengths are derived using this column. | |||||
| * <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its | |||||
| * region. The "noInternationalDialling" ranges are derived from this column. | |||||
| * <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are | |||||
| * set {@code true} that range is valid within the region {@code XX}. | |||||
| * </ol> | |||||
| * | |||||
| * <p>This schema is sufficient for generating {@link NumberingScheme} instances, but isn't what we | |||||
| * expect to import data from (which is why it doesn't have a {@code CsvKeyMarshaller} associated | |||||
| * with it. That's covered by the {@code RangesTableSchema}. | |||||
| */ | |||||
| public final class XmlRangesSchema { | |||||
| /** | |||||
| * The internal "Type" column in the range table This is present in the schema and used is a lot | |||||
| * of places, but it is not what the type/tariff data is imported as (it's derived from other | |||||
| * columns). | |||||
| */ | |||||
| public static final Column<ValidNumberType> TYPE = | |||||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||||
| /** | |||||
| * The "Area Code Length" column in the range table, denoting the length of a prefix which can | |||||
| * be removed from all numbers in a range to obtain locally diallable numbers. If an | |||||
| * "area code" is not optional for dialling, then no value should be set here. | |||||
| */ | |||||
| public static final Column<Integer> AREA_CODE_LENGTH = RangesTableSchema.AREA_CODE_LENGTH; | |||||
| /** Denotes ranges which cannot be dialled internationally. */ | |||||
| public static final Column<Boolean> NATIONAL_ONLY = RangesTableSchema.NATIONAL_ONLY; | |||||
| /** Format specifier IDs. */ | |||||
| public static final Column<String> FORMAT = RangesTableSchema.FORMAT; | |||||
| /** The "Region:XX" column group in the range table. */ | |||||
| public static final ColumnGroup<PhoneRegion, Boolean> REGIONS = RangesTableSchema.REGIONS; | |||||
| /** The standard columns required for generating a {@link NumberingScheme}. */ | |||||
| public static final Schema COLUMNS = | |||||
| Schema.builder() | |||||
| .add(TYPE) | |||||
| .add(AREA_CODE_LENGTH) | |||||
| .add(NATIONAL_ONLY) | |||||
| .add(FORMAT) | |||||
| .add(REGIONS) | |||||
| .build(); | |||||
| /** Columns for per-region tables (just {@link #COLUMNS} without {@link #REGIONS}). */ | |||||
| public static final Schema PER_REGION_COLUMNS = | |||||
| Schema.builder() | |||||
| .add(TYPE) | |||||
| .add(AREA_CODE_LENGTH) | |||||
| .add(NATIONAL_ONLY) | |||||
| .add(FORMAT) | |||||
| .build(); | |||||
| public static RangeTable fromExternalTable(RangeTable src) { | |||||
| checkArgument(RangesTableSchema.TABLE_COLUMNS.isSubSchemaOf(src.getSchema()), | |||||
| "unexpected schema for source table, should be subschema of %s", | |||||
| RangesTableSchema.TABLE_COLUMNS); | |||||
| RangeTree unknown = src.getRanges(RangesTableSchema.TYPE, ExtType.UNKNOWN); | |||||
| checkMetadata(unknown.isEmpty(), "source table contains unknown type for ranges\n%s", unknown); | |||||
| checkSourceColumn(src, RangesTableSchema.TYPE); | |||||
| checkSourceColumn(src, RangesTableSchema.TARIFF); | |||||
| // We can copy most columns verbatim. | |||||
| RangeTable.Builder dst = RangeTable.builder(COLUMNS); | |||||
| copyColumn(src, dst, AREA_CODE_LENGTH); | |||||
| copyColumn(src, dst, NATIONAL_ONLY); | |||||
| copyColumn(src, dst, FORMAT); | |||||
| REGIONS.extractGroupColumns(src.getColumns()).values().forEach(c -> copyColumn(src, dst, c)); | |||||
| // But the type column must be inferred from a combination of the external type and tariff. | |||||
| // Tariff takes precedence, so we do type first and then overwrite ranges for tariff. | |||||
| // We also capture unsupported ranges as they must be ignored in this conversion. | |||||
| RangeTree unsupportedRanges = RangeTree.empty(); | |||||
| for (ExtType extType : src.getAssignedValues(RangesTableSchema.TYPE)) { | |||||
| RangeTree ranges = src.getRanges(RangesTableSchema.TYPE, extType); | |||||
| Optional<ValidNumberType> t = extType.toValidNumberType(); | |||||
| if (t.isPresent()) { | |||||
| dst.assign(TYPE, t.get(), ranges, OverwriteMode.NEVER); | |||||
| } else { | |||||
| unsupportedRanges = unsupportedRanges.union(ranges); | |||||
| } | |||||
| } | |||||
| // Because we know that both the type and tariff columns have assignments for every range (and | |||||
| // there's no "unknown" values for these) we can just ignore "standard rate" tariff ranges | |||||
| // since they must have had a type assigned above already. | |||||
| for (ExtTariff extTariff : src.getAssignedValues(RangesTableSchema.TARIFF)) { | |||||
| // Ignore unsupported ranges here (since otherwise they could add ranges based only on the | |||||
| // tariff, which would be wrong). For example, a toll free ISP number range should NOT be | |||||
| // in the table as TOLL_FREE, since ISP numbers should not be in the table at all (until | |||||
| // such time as they are a fully supported type). | |||||
| RangeTree ranges = | |||||
| src.getRanges(RangesTableSchema.TARIFF, extTariff).subtract(unsupportedRanges); | |||||
| extTariff.toValidNumberType() | |||||
| .ifPresent(t -> dst.assign(TYPE, t, ranges, OverwriteMode.ALWAYS)); | |||||
| } | |||||
| return dst.build(); | |||||
| } | |||||
| private static void checkSourceColumn(RangeTable table, Column<?> col) { | |||||
| checkMetadata(table.getAssignedRanges(col).equals(table.getAllRanges()), | |||||
| "table is missing assignments in column %s for ranges\n%s", | |||||
| col, table.getAllRanges().subtract(table.getAssignedRanges(col))); | |||||
| } | |||||
| private static void copyColumn(RangeTable src, RangeTable.Builder dst, Column<?> col) { | |||||
| if (src.getColumns().contains(col)) { | |||||
| src.getAssignedValues(col).forEach(v -> dst.assign(col, v, src.getRanges(col, v), NEVER)); | |||||
| } | |||||
| } | |||||
| private XmlRangesSchema() {} | |||||
| } | |||||
| @ -0,0 +1,92 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.Splitter; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * A single assignment of a column to a value. This can be used to change values in a | |||||
| * {@code RangeTable} and well as query for ranges with its value. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class Assignment<T extends Comparable<T>> { | |||||
| private static final Splitter SPLITTER = Splitter.on("=").limit(2).trimResults(); | |||||
| /** | |||||
| * Parses a string of the form {@code "<column>=<value>"} to create an assignment using the given | |||||
| * schema. The named column must exist in the schema, and the associated value must be a valid | |||||
| * value within that column. | |||||
| * <p> | |||||
| * Whitespace before and after the column or value is ignored. If the value is omitted, then an | |||||
| * unassignment is returned. | |||||
| */ | |||||
| public static Assignment<?> parse(String s, Schema schema) { | |||||
| List<String> parts = SPLITTER.splitToList(s); | |||||
| checkArgument(parts.size() == 2, "invalid assigment string: %s", s); | |||||
| Column<?> column = schema.getColumn(parts.get(0)); | |||||
| return create(column, column.parse(parts.get(1))); | |||||
| } | |||||
| // Type capture around AutoValue is a little painful, so this static helper ... helps. | |||||
| private static <T extends Comparable<T>> Assignment<T> create(Column<T> c, @Nullable Object v) { | |||||
| T value = c.cast(v); | |||||
| return new AutoValue_Assignment<>(c, Optional.ofNullable(value)); | |||||
| } | |||||
| /** | |||||
| * Returns an assignment in the given column for the specified, non null, value. | |||||
| * <p> | |||||
| * Note that an assignment for the default value of a column will return an explicit assignment | |||||
| * for that value, rather than an "unassignment" in that column; so | |||||
| * {@code Assignment.of(c, c.defaultValue())} is not equal to {@code unassign(c)}, even though | |||||
| * they may have the same effect when applied to a range table, and may even have the same | |||||
| * {@link #toString()} representation (in the case of String columns). | |||||
| */ | |||||
| public static <T extends Comparable<T>> Assignment<T> of(Column<T> c, Object v) { | |||||
| return new AutoValue_Assignment<>(c, Optional.of(c.cast(v))); | |||||
| } | |||||
| @SuppressWarnings("unchecked") | |||||
| public static <T extends Comparable<T>> Assignment<T> ofOptional(Column<T> c, Optional<?> v) { | |||||
| // Casting the value makes the optional cast below safe. | |||||
| v.ifPresent(c::cast); | |||||
| return new AutoValue_Assignment<>(c, (Optional<T>) v); | |||||
| } | |||||
| /** | |||||
| * Returns an unassignment in the given column. The {@link #value()} of this assignment is empty. | |||||
| */ | |||||
| public static <T extends Comparable<T>> Assignment<T> unassign(Column<T> c) { | |||||
| return new AutoValue_Assignment<>(c, Optional.empty()); | |||||
| } | |||||
| /** The column in which the assignment applies. */ | |||||
| public abstract Column<T> column(); | |||||
| /** The value in the column, or empty to signify unassignment. */ | |||||
| public abstract Optional<T> value(); | |||||
| @Override | |||||
| public final String toString() { | |||||
| return String.format("%s=%s", column().getName(), value().map(Object::toString).orElse("")); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,131 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import java.util.Arrays; | |||||
| import java.util.LinkedHashMap; | |||||
| import java.util.Map; | |||||
| import java.util.Optional; | |||||
| /** | |||||
| * A change which can be applied to a range table. Changes are applied sequentially to build a | |||||
| * range table and new changes overwrite existing mappings. Changes are additive, and cannot be | |||||
| * used to remove ranges from a table (but they can unassign previous assignments). | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class Change { | |||||
| private static final Change EMPTY = of(RangeTree.empty(), ImmutableList.of()); | |||||
| /** A builder for changes that supports assigning and unassigning column values for a range. */ | |||||
| public static final class Builder { | |||||
| private final RangeTree ranges; | |||||
| private final Map<Column<?>, Assignment<?>> assignments = new LinkedHashMap<>(); | |||||
| private Builder(RangeTree ranges) { | |||||
| this.ranges = checkNotNull(ranges); | |||||
| } | |||||
| /** | |||||
| * Assigns the optional value in the given column for the ranges of this builder (an empty | |||||
| * value has the effect of unassigning the value in the table that this change is applied to). | |||||
| */ | |||||
| public Builder assign(Assignment<?> assignment) { | |||||
| checkArgument(assignments.put(assignment.column(), assignment) == null, | |||||
| "Column already assigned: %s", assignment.column()); | |||||
| return this; | |||||
| } | |||||
| /** Assigns the non-null value in the given column for the ranges of this builder. */ | |||||
| public Builder assign(Column<?> column, Object value) { | |||||
| return assign(Assignment.of(column, value)); | |||||
| } | |||||
| /** Unassigns any values in the given column for the ranges of this builder. */ | |||||
| public Builder unassign(Column<?> column) { | |||||
| return assign(Assignment.unassign(column)); | |||||
| } | |||||
| /** Builds an immutable change from the current state of this builder. */ | |||||
| public Change build() { | |||||
| return Change.of(ranges, assignments.values()); | |||||
| } | |||||
| } | |||||
| public static Builder builder(RangeTree ranges) { | |||||
| return new Builder(ranges); | |||||
| } | |||||
| /** Returns the empty change which has no effect when applied to any table. */ | |||||
| public static Change empty() { | |||||
| return EMPTY; | |||||
| } | |||||
| /** Builds a change from a set of assignments (columns must be unique). */ | |||||
| public static Change of(RangeTree ranges, Iterable<Assignment<?>> assignments) { | |||||
| ImmutableList<Assignment<?>> a = ImmutableList.copyOf(assignments); | |||||
| checkArgument(a.size() == a.stream().map(Assignment::column).distinct().count(), | |||||
| "cannot supply different assignments for the same column: %s", a); | |||||
| return new AutoValue_Change(ranges, a); | |||||
| } | |||||
| /** | |||||
| * Returns the ranges affected by this change. These ranges are added to the table and | |||||
| * optionally assigned category values according to {@link #getAssignments()}. No other ranges | |||||
| * will be affected by this change. | |||||
| */ | |||||
| public abstract RangeTree getRanges(); | |||||
| /** | |||||
| * Returns a list of assignments to be applied for this change. Note that the set of columns for | |||||
| * these assignments is itself also a set (i.e. no two assignments in a change ever share the | |||||
| * same column). | |||||
| */ | |||||
| public abstract ImmutableList<Assignment<?>> getAssignments(); | |||||
| /** Returns whether this change contains any of the specified values in a given column. */ | |||||
| @SafeVarargs | |||||
| public final <T extends Comparable<T>> boolean hasAssignment(Column<T> column, T... values) { | |||||
| for (Assignment<?> a : getAssignments()) { | |||||
| if (column.equals(a.column())) { | |||||
| return a.value().map(v -> Arrays.asList(values).contains(column.cast(v))).orElse(false); | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| /** | |||||
| * Returns the value of the column in this change (or empty if there was not value or the value | |||||
| * was empty. This because it conflates "no value" and "explicitly empty value", this method | |||||
| * might not be suitable for Changes that unassign values. | |||||
| */ | |||||
| public final <T extends Comparable<T>> Optional<T> getAssignment(Column<T> column) { | |||||
| for (Assignment<?> a : getAssignments()) { | |||||
| if (column.equals(a.column())) { | |||||
| return a.value().map(column::cast); | |||||
| } | |||||
| } | |||||
| return Optional.empty(); | |||||
| } | |||||
| // Visible for AutoValue. | |||||
| Change() {} | |||||
| } | |||||
| @ -0,0 +1,217 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.CharMatcher.inRange; | |||||
| import static com.google.common.base.CharMatcher.whitespace; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static java.lang.Boolean.FALSE; | |||||
| import static java.lang.Boolean.TRUE; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.CaseFormat; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import java.util.function.Function; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * A column specifier which holds a set of values that are allowed with a column. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class Column<T extends Comparable<T>> { | |||||
| private static final ImmutableMap<String, Boolean> BOOLEAN_MAP = | |||||
| ImmutableMap.of("true", TRUE, "TRUE", TRUE, "false", FALSE, "FALSE", false); | |||||
| private static final CharMatcher ASCII_LETTER_OR_DIGIT = | |||||
| inRange('a', 'z').or(inRange('A', 'Z')).or(inRange('0', '9')); | |||||
| private static final CharMatcher LOWER_ASCII_LETTER_OR_DIGIT = | |||||
| inRange('a', 'z').or(inRange('0', '9')); | |||||
| private static final CharMatcher LOWER_UNDERSCORE = | |||||
| CharMatcher.is('_').or(LOWER_ASCII_LETTER_OR_DIGIT); | |||||
| /** | |||||
| * Returns a column for the specified type with a given parsing function. Use alternate helper | |||||
| * methods for creating columns of common types. | |||||
| */ | |||||
| public static <T extends Comparable<T>> Column<T> create( | |||||
| Class<T> clazz, String name, T defaultValue, Function<String, T> parseFn) { | |||||
| return new AutoValue_Column<>( | |||||
| checkName(name), clazz, parseFn, String::valueOf, defaultValue, null); | |||||
| } | |||||
| /** | |||||
| * Returns a column for the specified enum type. The string representation of a value in this | |||||
| * column is just the {@code toString()} value of the enum. | |||||
| */ | |||||
| public static <T extends Enum<T>> Column<T> of(Class<T> clazz, String name, T defaultValue) { | |||||
| return create(clazz, name, defaultValue, s -> Enum.valueOf(clazz, toEnumName(s))); | |||||
| } | |||||
| /** | |||||
| * Returns a column for strings. In there serialized form, strings do not preserve leading or | |||||
| * trailing whitespace, unless surrounded by double-quotes (e.g. {@code " foo "}). The quotes are | |||||
| * stripped on parsing and added back for any String value with leading/trailing whitespace. The | |||||
| * default value is the empty string. | |||||
| */ | |||||
| public static Column<String> ofString(String name) { | |||||
| return new AutoValue_Column<>( | |||||
| checkName(name), String.class, Column::trimOrUnquote, Column::maybeQuote, "", null); | |||||
| } | |||||
| /** | |||||
| * Returns a column for unsigned integers. The string representation of a value in this column | |||||
| * matches the {@link Integer#toString(int)} value. The default value is {@code 0}. | |||||
| */ | |||||
| public static Column<Integer> ofUnsignedInteger(String name) { | |||||
| return create(Integer.class, name, 0, Integer::parseUnsignedInt); | |||||
| } | |||||
| /** | |||||
| * Returns a column for booleans. The string representation of a value in this column can be any | |||||
| * of "true", "false", "TRUE", "FALSE" (but not things like "True", "T" or "YES"). The default | |||||
| * value is {@code false}. | |||||
| */ | |||||
| public static Column<Boolean> ofBoolean(String name) { | |||||
| return create(Boolean.class, name, false, BOOLEAN_MAP::get); | |||||
| } | |||||
| private static String checkName(String name) { | |||||
| checkArgument(name.indexOf(':') == -1, "invalid column name: %s", name); | |||||
| return name; | |||||
| } | |||||
| // Converts to UPPER_UNDERSCORE naming for enums. | |||||
| private static String toEnumName(String name) { | |||||
| // Allow conversion for lower_underscore and lowerCamel, since UPPER_UNDERSCORE is so "LOUD". | |||||
| // We can be sloppy with respect to errors here since all runtime exceptions are handled. | |||||
| if (LOWER_ASCII_LETTER_OR_DIGIT.matches(name.charAt(0))) { | |||||
| if (LOWER_UNDERSCORE.matchesAllOf(name)) { | |||||
| name = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.UPPER_UNDERSCORE, name); | |||||
| } else if (ASCII_LETTER_OR_DIGIT.matchesAllOf(name)) { | |||||
| name = CaseFormat.LOWER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, name); | |||||
| } else { | |||||
| // Message/type not important here since all exceptions are replaced anyway. | |||||
| throw new IllegalArgumentException(); | |||||
| } | |||||
| } | |||||
| return name; | |||||
| } | |||||
| // Trims whitespace from a serialize string, unless the value is surrounded by double-quotes (in | |||||
| // which case the quotes are removed). This is done to permit the rare use of leading/trailing | |||||
| // whitespace in data in a visually distinct and deliberate way. | |||||
| private static String trimOrUnquote(String s) { | |||||
| if (s.length() >= 2 && s.startsWith("\"") && s.endsWith("\"")) { | |||||
| return s.substring(1, s.length() - 1); | |||||
| } | |||||
| return whitespace().trimFrom(s); | |||||
| } | |||||
| // Surrounds any string with whitespace at either end with double quotes. | |||||
| private static String maybeQuote(String s) { | |||||
| if (s.length() > 0 | |||||
| && (whitespace().matches(s.charAt(0)) || whitespace().matches(s.charAt(s.length() - 1)))) { | |||||
| return '"' + s + '"'; | |||||
| } | |||||
| return s; | |||||
| } | |||||
| /** Returns the column name (which can be used as a human readable title if needed). */ | |||||
| public abstract String getName(); | |||||
| abstract Class<T> type(); | |||||
| // The parsing function from a string to a value. | |||||
| abstract Function<String, T> parseFn(); | |||||
| // The serialization function from a value to a String. This must be the inverse of the parseFn. | |||||
| abstract Function<T, String> serializeFn(); | |||||
| /** Default value for this column (inferred for unassigned ranges when a snapshot is built). */ | |||||
| public abstract T defaultValue(); | |||||
| // This is very private and should only be used in this class. | |||||
| @Nullable abstract Column<T> owningGroup(); | |||||
| /** Attempts to cast the given instance to the runtime type of this column. */ | |||||
| @Nullable public final T cast(@Nullable Object value) { | |||||
| return type().cast(value); | |||||
| } | |||||
| /** | |||||
| * Returns the value of this column based on its serialized representation (which is not | |||||
| * necessarily its {@code toString()} representation). | |||||
| */ | |||||
| @Nullable public final T parse(String id) { | |||||
| if (id.isEmpty()) { | |||||
| return null; | |||||
| } | |||||
| try { | |||||
| // TODO: Technically wrong, since for String columns this will unquote strings. | |||||
| // Hopefully this won't be an issue, since quoting is really only likely to be used for | |||||
| // preserving whitespace (which i | |||||
| T value = parseFn().apply(id); | |||||
| if (value != null) { | |||||
| return value; | |||||
| } | |||||
| } catch (RuntimeException e) { | |||||
| // fall through | |||||
| } | |||||
| throw new IllegalArgumentException( | |||||
| String.format("unknown value '%s' in column '%s'", id, getName())); | |||||
| } | |||||
| /** | |||||
| * Returns the serialized representation of a value in this column. This is the stored | |||||
| * representation of the value, not the value itself. | |||||
| */ | |||||
| public final String serialize(@Nullable Object value) { | |||||
| return (value != null) ? serializeFn().apply(cast(value)) : ""; | |||||
| } | |||||
| // Only to be called by ColumnGroup. | |||||
| final Column<T> fromPrototype(String suffix) { | |||||
| String name = getName() + ":" + checkName(suffix); | |||||
| return new AutoValue_Column<T>(name, type(), parseFn(), serializeFn(), defaultValue(), this); | |||||
| } | |||||
| final boolean isIn(ColumnGroup<?, ?> group) { | |||||
| return group.prototype().equals(owningGroup()); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| return "Column{'" + getName() + "'}"; | |||||
| } | |||||
| @Override | |||||
| public final boolean equals(Object obj) { | |||||
| if (!(obj instanceof Column<?>)) { | |||||
| return false; | |||||
| } | |||||
| Column<?> c = (Column<?>) obj; | |||||
| return c.getName().equals(getName()) && c.type().equals(type()); | |||||
| } | |||||
| @Override | |||||
| public final int hashCode() { | |||||
| return getName().hashCode() ^ type().hashCode(); | |||||
| } | |||||
| // Visible only for AutoValue | |||||
| Column() {} | |||||
| } | |||||
| @ -0,0 +1,100 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.collect.ImmutableBiMap.toImmutableBiMap; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableBiMap; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||||
| import java.util.Set; | |||||
| import java.util.function.Function; | |||||
| /** A group of {@link RangeTable} columns. */ | |||||
| @AutoValue | |||||
| public abstract class ColumnGroup<K, T extends Comparable<T>> { | |||||
| /** | |||||
| * Returns a group for columns with the same type as the given "prototype" column and which has a | |||||
| * a prefix that's the name of the prototype. Suffix values are parsed using the given function. | |||||
| */ | |||||
| public static <K, T extends Comparable<T>> ColumnGroup<K, T> of( | |||||
| Column<T> prototype, Function<String, K> parseFn) { | |||||
| return new AutoValue_ColumnGroup<>(prototype, parseFn); | |||||
| } | |||||
| /** Returns a group for the specified prototype column keyed by {@link PhoneRegion}. */ | |||||
| public static <T extends Comparable<T>> ColumnGroup<PhoneRegion, T> byRegion( | |||||
| Column<T> prototype) { | |||||
| return of(prototype, PhoneRegion::of); | |||||
| } | |||||
| /** Returns a group for the specified prototype column keyed by {@link SimpleLanguageTag}. */ | |||||
| public static <T extends Comparable<T>> ColumnGroup<SimpleLanguageTag, T> byLanguage( | |||||
| Column<T> prototype) { | |||||
| return of(prototype, SimpleLanguageTag::of); | |||||
| } | |||||
| // Internal use only. | |||||
| abstract Column<T> prototype(); | |||||
| abstract Function<String, K> parseFn(); | |||||
| /** Returns the column for a specified key. */ | |||||
| public Column<T> getColumn(K key) { | |||||
| // The reason this does not just call "prototype().fromPrototype(...)" is that the key may not | |||||
| // be parsable by the function just because it's the "right" type. This allows people to pass | |||||
| // in a function that limits columns to some subset of the domain (e.g. a subset of region | |||||
| // codes). | |||||
| return getColumnFromId(key.toString()); | |||||
| } | |||||
| /** Returns the column for a specified ID string. */ | |||||
| public Column<T> getColumnFromId(String id) { | |||||
| try { | |||||
| Object unused = parseFn().apply(id); | |||||
| } catch (RuntimeException e) { | |||||
| throw new IllegalArgumentException( | |||||
| String.format("invalid column %s, not in group: %s", id, this), e); | |||||
| } | |||||
| return prototype().fromPrototype(id); | |||||
| } | |||||
| /** Returns the key of a column in this group. */ | |||||
| @SuppressWarnings("unchecked") | |||||
| public K getKey(Column<?> c) { | |||||
| checkArgument(c.isIn(this), "column %s in not group %s", c, this); | |||||
| // Cast is safe since any column in this group is a Column<T>. | |||||
| return extractKey((Column<T>) c); | |||||
| } | |||||
| /** Returns a bidirectional mapping from group key to column, for columns in this group. */ | |||||
| @SuppressWarnings("unchecked") | |||||
| public ImmutableBiMap<K, Column<T>> extractGroupColumns(Set<Column<?>> columns) { | |||||
| return columns.stream() | |||||
| .filter(c -> c.isIn(this)) | |||||
| // Cast is safe since any column in this group is a Column<T>. | |||||
| .map(c -> (Column<T>) c) | |||||
| .collect(toImmutableBiMap(this::extractKey, identity())); | |||||
| } | |||||
| // Assumes we've already verified that the column is in this group. | |||||
| private K extractKey(Column<T> column) { | |||||
| String name = column.getName(); | |||||
| return parseFn().apply(name.substring(name.lastIndexOf(':') + 1)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,74 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static java.util.Comparator.naturalOrder; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.function.Function; | |||||
| import java.util.stream.Stream; | |||||
| /** Marshaller to handle key serialization and ordering in {@code CsvTable}. */ | |||||
| public final class CsvKeyMarshaller<K> { | |||||
| private final Function<K, Stream<String>> serialize; | |||||
| private final Function<List<String>, K> deserialize; | |||||
| private final Optional<Comparator<K>> ordering; | |||||
| private final ImmutableList<String> columns; | |||||
| public static CsvKeyMarshaller<String> ofSortedString(String columnName) { | |||||
| return new CsvKeyMarshaller<String>( | |||||
| Stream::of, p -> p.get(0), Optional.of(naturalOrder()), columnName); | |||||
| } | |||||
| public CsvKeyMarshaller( | |||||
| Function<K, Stream<String>> serialize, | |||||
| Function<List<String>, K> deserialize, | |||||
| Optional<Comparator<K>> ordering, | |||||
| String... columns) { | |||||
| this(serialize, deserialize, ordering, ImmutableList.copyOf(columns)); | |||||
| } | |||||
| public CsvKeyMarshaller( | |||||
| Function<K, Stream<String>> serialize, | |||||
| Function<List<String>, K> deserialize, | |||||
| Optional<Comparator<K>> ordering, | |||||
| List<String> columns) { | |||||
| this.serialize = checkNotNull(serialize); | |||||
| this.deserialize = checkNotNull(deserialize); | |||||
| this.ordering = checkNotNull(ordering); | |||||
| this.columns = ImmutableList.copyOf(columns); | |||||
| } | |||||
| public ImmutableList<String> getColumns() { | |||||
| return columns; | |||||
| } | |||||
| Stream<String> serialize(K key) { | |||||
| return serialize.apply(key); | |||||
| } | |||||
| K deserialize(List<String> keyParts) { | |||||
| return deserialize.apply(keyParts); | |||||
| } | |||||
| Optional<Comparator<K>> ordering() { | |||||
| return ordering; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,241 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.CharMatcher.isNot; | |||||
| import static com.google.common.base.CharMatcher.javaIsoControl; | |||||
| import static com.google.common.base.CharMatcher.whitespace; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.Streams; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Iterator; | |||||
| import java.util.List; | |||||
| import java.util.function.Consumer; | |||||
| import java.util.stream.Stream; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * An efficient, fluent CSV parser which operates on a {@link Stream} of lines. It handles quoting | |||||
| * of values, whitespace trimming and mapping values via a "schema" row. | |||||
| * | |||||
| * <p>This class is sadly necessary since the one in {@code com.google.common.text} doesn't support | |||||
| * ignoring whitespace (and making it do so would take longer than writing this). | |||||
| * | |||||
| * <p>This class is immutable and thread-safe. | |||||
| */ | |||||
| // TODO: Investigate other "standard" CSV parsers such as org.apache.commons.csv. | |||||
| public final class CsvParser { | |||||
| /** | |||||
| * A consumer for CSV rows which can automatically map values according to a header row. | |||||
| * | |||||
| * <p>This class is immutable and thread-safe. | |||||
| */ | |||||
| public static final class RowMapper { | |||||
| @Nullable private final Consumer<ImmutableList<String>> headerHandler; | |||||
| private RowMapper(Consumer<ImmutableList<String>> headerHandler) { | |||||
| this.headerHandler = headerHandler; | |||||
| } | |||||
| public Consumer<Stream<String>> mapTo(Consumer<ImmutableMap<String, String>> handler) { | |||||
| return new Consumer<Stream<String>>() { | |||||
| private ImmutableList<String> header = null; | |||||
| @Override | |||||
| public void accept(Stream<String> row) { | |||||
| if (header == null) { | |||||
| // Can contain duplicates (but that's bad for mapping). | |||||
| header = row.collect(toImmutableList()); | |||||
| checkArgument( | |||||
| header.size() == header.stream().distinct().count(), | |||||
| "duplicate values in CSV header: %s", | |||||
| header); | |||||
| if (headerHandler != null) { | |||||
| headerHandler.accept(header); | |||||
| } | |||||
| } else { | |||||
| ImmutableMap.Builder<String, String> map = ImmutableMap.builder(); | |||||
| int i = 0; | |||||
| for (String v : Streams.iterating(row)) { | |||||
| checkArgument(i < header.size(), | |||||
| "too many columns (expected %s): %s", header.size(), map); | |||||
| if (!v.isEmpty()) { | |||||
| map.put(header.get(i++), v); | |||||
| } | |||||
| } | |||||
| handler.accept(map.build()); | |||||
| } | |||||
| } | |||||
| }; | |||||
| } | |||||
| } | |||||
| private static final CharMatcher NON_WHITESPACE = CharMatcher.whitespace().negate(); | |||||
| private static final char QUOTE = '"'; | |||||
| private static final CharMatcher VALID_DELIMITER_CHAR = | |||||
| NON_WHITESPACE.and(javaIsoControl().negate()).and(isNot(QUOTE)).or(CharMatcher.anyOf(" \t")); | |||||
| public static CsvParser withSeparator(char delimiter) { | |||||
| return new CsvParser(delimiter, false, false); | |||||
| } | |||||
| public static CsvParser commaSeparated() { | |||||
| return withSeparator(','); | |||||
| } | |||||
| public static CsvParser tabSeparated() { | |||||
| return withSeparator('\t'); | |||||
| } | |||||
| public static RowMapper rowMapper() { | |||||
| return new RowMapper(null); | |||||
| } | |||||
| public static RowMapper rowMapper(Consumer<ImmutableList<String>> headerHandler) { | |||||
| return new RowMapper(headerHandler); | |||||
| } | |||||
| private final char delimiter; | |||||
| private final boolean trimWhitespace; | |||||
| private final boolean allowMultiline; | |||||
| private CsvParser(char delimiter, boolean trimWhitespace, boolean allowMultiline) { | |||||
| checkArgument(VALID_DELIMITER_CHAR.matches(delimiter), | |||||
| "invalid delimiter: %s", delimiter); | |||||
| this.delimiter = delimiter; | |||||
| this.trimWhitespace = trimWhitespace; | |||||
| this.allowMultiline = allowMultiline; | |||||
| } | |||||
| public CsvParser trimWhitespace() { | |||||
| checkArgument(NON_WHITESPACE.matches(delimiter), | |||||
| "cannot trim whitespace if delimiter is whitespace"); | |||||
| return new CsvParser(delimiter, true, allowMultiline); | |||||
| } | |||||
| public CsvParser allowMultiline() { | |||||
| return new CsvParser(delimiter, trimWhitespace, true); | |||||
| } | |||||
| public void parse(Stream<String> lines, Consumer<Stream<String>> rowCallback) { | |||||
| // Allow whitespace delimiter if we aren't also trimming whitespace. | |||||
| List<String> row = new ArrayList<>(); | |||||
| StringBuilder buffer = new StringBuilder(); | |||||
| Iterator<String> it = lines.iterator(); | |||||
| while (parseRow(it, row, buffer)) { | |||||
| rowCallback.accept(row.stream()); | |||||
| row.clear(); | |||||
| } | |||||
| } | |||||
| private boolean parseRow(Iterator<String> lines, List<String> row, StringBuilder buffer) { | |||||
| if (!lines.hasNext()) { | |||||
| return false; | |||||
| } | |||||
| // First line of potentially several which make up this row. | |||||
| String line = lines.next(); | |||||
| int start = maybeTrimWhitespace(line, 0); | |||||
| while (start < line.length()) { | |||||
| // "start" is the start of the next part and must be a valid index into current "line". | |||||
| // Could be high or low surrogate if badly formed string, or just point at the delimiter. | |||||
| char c = line.charAt(start); | |||||
| int pos; | |||||
| if (c == QUOTE) { | |||||
| // Quoted value, maybe parse and unescape multiple lines here. | |||||
| pos = ++start; | |||||
| while (true) { | |||||
| if (pos == line.length()) { | |||||
| buffer.append(line, start, pos); | |||||
| checkArgument(allowMultiline && lines.hasNext(), | |||||
| "unterminated quoted value: %s", buffer); | |||||
| buffer.append('\n'); | |||||
| line = lines.next(); | |||||
| start = 0; | |||||
| pos = 0; | |||||
| } | |||||
| c = line.charAt(pos); | |||||
| if (c == QUOTE) { | |||||
| buffer.append(line, start, pos++); | |||||
| if (pos == line.length()) { | |||||
| break; | |||||
| } | |||||
| if (line.charAt(pos) != QUOTE) { | |||||
| pos = maybeTrimWhitespace(line, pos); | |||||
| checkArgument(pos == line.length() || line.codePointAt(pos) == delimiter, | |||||
| "unexpected character (expected delimiter) in: %s", line); | |||||
| break; | |||||
| } | |||||
| // "Double double quotes, what does it mean?" (oh yeah, a single double quote). | |||||
| buffer.append(QUOTE); | |||||
| start = pos + 1; | |||||
| } | |||||
| pos++; | |||||
| } | |||||
| row.add(buffer.toString()); | |||||
| buffer.setLength(0); | |||||
| } else if (c == delimiter) { | |||||
| // Empty unquoted empty value (e.g. "foo,,bar"). | |||||
| row.add(""); | |||||
| pos = start; | |||||
| } else { | |||||
| // Non-empty unquoted value. | |||||
| pos = line.indexOf(delimiter, start + 1); | |||||
| if (pos == -1) { | |||||
| pos = line.length(); | |||||
| } | |||||
| String value = line.substring(start, maybeTrimTrailingWhitespace(line, pos)); | |||||
| checkArgument(value.indexOf(QUOTE) == -1, | |||||
| "quotes cannot appear in unquoted values: %s", value); | |||||
| row.add(value); | |||||
| } | |||||
| if (pos == line.length()) { | |||||
| // We hit end-of-line at the end of a value, so just return (no trailing empty value). | |||||
| return true; | |||||
| } | |||||
| // If not end-of-line, "pos" points at the last delimiter, so we can find the next start. | |||||
| start = maybeTrimWhitespace(line, pos + 1); | |||||
| } | |||||
| // We hit end-of-line either immediately, or after a delimiter. Either way we always need to | |||||
| // add a trailing empty value for consistency. | |||||
| row.add(""); | |||||
| return true; | |||||
| } | |||||
| private int maybeTrimWhitespace(String s, int i) { | |||||
| if (trimWhitespace) { | |||||
| i = NON_WHITESPACE.indexIn(s, i); | |||||
| if (i == -1) { | |||||
| i = s.length(); | |||||
| } | |||||
| } | |||||
| return i; | |||||
| } | |||||
| private int maybeTrimTrailingWhitespace(String s, int i) { | |||||
| if (trimWhitespace) { | |||||
| // There is no "lastIndexIn(String, int)" sadly. | |||||
| while (i > 0 && whitespace().matches(s.charAt(i - 1))) { | |||||
| i--; | |||||
| } | |||||
| } | |||||
| return i; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,108 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import java.io.IOException; | |||||
| import java.io.Reader; | |||||
| import java.nio.file.Files; | |||||
| import java.nio.file.Path; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.function.BiConsumer; | |||||
| /** | |||||
| * A CSV schema is a combination of a key marshaller and table columns. A CSV schema defines a | |||||
| * CSV table with key columns, followed by non-key columns. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class CsvSchema<K> { | |||||
| /** | |||||
| * Returns a schema for a CSV file using the given marshaller to define key columns, and a table | |||||
| * schema to define any additional columns in a row. | |||||
| */ | |||||
| public static <K> CsvSchema<K> of(CsvKeyMarshaller<K> marshaller, Schema columns) { | |||||
| return new AutoValue_CsvSchema<>(marshaller, columns); | |||||
| } | |||||
| /** The marshaller defining table keys and how they are serialized in CSV. */ | |||||
| public abstract CsvKeyMarshaller<K> keyMarshaller(); | |||||
| /** The table schema defining non-key columns in the table. */ | |||||
| public abstract Schema columns(); | |||||
| /** Returns the ordering for keys in the CSV table, as defined by the key marshaller. */ | |||||
| public Optional<Comparator<K>> rowOrdering() { | |||||
| return keyMarshaller().ordering(); | |||||
| } | |||||
| /** | |||||
| * Returns the ordering for additional non-key columns in the CSV table as defined by the table | |||||
| * schema. | |||||
| */ | |||||
| public Comparator<Column<?>> columnOrdering() { | |||||
| return columns().ordering(); | |||||
| } | |||||
| /** | |||||
| * Extracts the non-key columns of a table from the header row. The header row is expected to | |||||
| * contain the names of all columns (including key columns) in the CSV table and this method | |||||
| * verifies that the key columns are present as expected before resolving the non-key columns | |||||
| * in order. | |||||
| */ | |||||
| public ImmutableList<Column<?>> parseHeader(List<String> header) { | |||||
| int hsize = keyMarshaller().getColumns().size(); | |||||
| checkArgument(header.size() >= hsize, "CSV header too short: %s", header); | |||||
| checkArgument(header.subList(0, hsize).equals(keyMarshaller().getColumns()), | |||||
| "Invalid CSV header: %s", header); | |||||
| ImmutableList.Builder<Column<?>> columns = ImmutableList.builder(); | |||||
| header.subList(hsize, header.size()).forEach(s -> columns.add(columns().getColumn(s))); | |||||
| return columns.build(); | |||||
| } | |||||
| /** Parses a row from a CSV table containing unescaped values. */ | |||||
| public void parseRow( | |||||
| ImmutableList<Column<?>> columns, List<String> row, BiConsumer<K, List<Assignment<?>>> fn) { | |||||
| int hsize = keyMarshaller().getColumns().size(); | |||||
| checkArgument(row.size() >= hsize, "CSV row too short: %s", row); | |||||
| K key = keyMarshaller().deserialize(row.subList(0, hsize)); | |||||
| List<Assignment<?>> rowAssignments = new ArrayList<>(); | |||||
| for (int n = 0; n < row.size() - hsize; n++) { | |||||
| Column<?> c = columns.get(n); | |||||
| rowAssignments.add( | |||||
| Assignment.ofOptional(c, Optional.ofNullable(c.parse(row.get(n + hsize))))); | |||||
| } | |||||
| fn.accept(key, rowAssignments); | |||||
| } | |||||
| public CsvTable<K> load(Path file) throws IOException { | |||||
| if (!Files.exists(file)) { | |||||
| return CsvTable.builder(this).build(); | |||||
| } | |||||
| try (Reader csv = Files.newBufferedReader(file)) { | |||||
| return CsvTable.importCsv(this, csv); | |||||
| } | |||||
| } | |||||
| public CsvTable<K> load(Reader reader) throws IOException { | |||||
| return CsvTable.importCsv(this, reader); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,589 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||||
| import static com.google.common.collect.ImmutableSortedSet.toImmutableSortedSet; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_CHANGED; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.LHS_ONLY; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_CHANGED; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.RHS_ONLY; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.DiffKey.Status.UNCHANGED; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedSet; | |||||
| import com.google.common.collect.Maps; | |||||
| import com.google.common.collect.Ordering; | |||||
| import com.google.common.collect.Sets; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.common.collect.Tables; | |||||
| import com.google.common.collect.TreeBasedTable; | |||||
| import com.google.common.escape.CharEscaperBuilder; | |||||
| import com.google.common.escape.Escaper; | |||||
| import java.io.BufferedReader; | |||||
| import java.io.IOException; | |||||
| import java.io.Reader; | |||||
| import java.io.StringWriter; | |||||
| import java.io.Writer; | |||||
| import java.util.Arrays; | |||||
| import java.util.Collections; | |||||
| import java.util.LinkedHashMap; | |||||
| import java.util.List; | |||||
| import java.util.Map; | |||||
| import java.util.Objects; | |||||
| import java.util.Optional; | |||||
| import java.util.Set; | |||||
| import java.util.TreeMap; | |||||
| import java.util.function.Consumer; | |||||
| import java.util.function.Predicate; | |||||
| import java.util.function.Supplier; | |||||
| import java.util.stream.Stream; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * A general tabular representation of {@link Column} based data, which can include range data | |||||
| * (via {@link RangeTable}) or other tabular data using a specified row key implementation. | |||||
| * | |||||
| * @param <K> the row key type. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class CsvTable<K> { | |||||
| // Trim whitespace (since CSV files may be textually aligned) but don't allow multiline values | |||||
| // (we handle that by JSON style escaping to keep the "one row per line" assumption true). | |||||
| public static final String DEFAULT_DELIMETER = ";"; | |||||
| private static final CsvParser CSV_PARSER = | |||||
| CsvParser.withSeparator(DEFAULT_DELIMETER.charAt(0)).trimWhitespace(); | |||||
| /** | |||||
| * Mode to control how diffs are generated. If a diff table, rows have an additional | |||||
| * {@code Status} applied to describe whether they are unchanged, modified or exclusive (i.e. | |||||
| * exist only in one of the source tables). | |||||
| */ | |||||
| public enum DiffMode { | |||||
| /** Include all rows in the "diff table" (unchanged, modified or exclusive). */ | |||||
| ALL, | |||||
| /** Include only changed rows in the "diff table" (modified or exclusive). */ | |||||
| CHANGES, | |||||
| /** Include only left-hand-side rows in the "diff table" (unchanged, modified or exclusive). */ | |||||
| LHS, | |||||
| /** Include only right-hand-side rows in the "diff table" (unchanged, modified or exclusive). */ | |||||
| RHS, | |||||
| } | |||||
| /** A simple builder for programmatic generation of CSV tables. */ | |||||
| public static final class Builder<T> { | |||||
| private final CsvSchema<T> schema; | |||||
| private final Table<T, Column<?>, Object> table; | |||||
| private Builder(CsvSchema<T> schema) { | |||||
| this.schema = checkNotNull(schema); | |||||
| // Either use insertion order or sorted order for rows (depends on schema). | |||||
| if (schema.rowOrdering().isPresent()) { | |||||
| this.table = TreeBasedTable.create(schema.rowOrdering().get(), schema.columnOrdering()); | |||||
| } else { | |||||
| this.table = Tables.newCustomTable( | |||||
| new LinkedHashMap<>(), | |||||
| () -> new TreeMap<>(schema.columnOrdering())); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Puts a row into the table using the specific mappings (potentially overwriting any existing | |||||
| * row). | |||||
| */ | |||||
| public Builder<T> putRow(T key, Map<Column<?>, ?> row) { | |||||
| table.rowMap().remove(key); | |||||
| return addRow(key, row); | |||||
| } | |||||
| /** | |||||
| * Adds a new row to the table using the specific mappings (the row must not already be | |||||
| * present). | |||||
| */ | |||||
| public Builder<T> addRow(T key, Map<Column<?>, ?> row) { | |||||
| checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this); | |||||
| row.forEach((c, v) -> table.put(key, c, v)); | |||||
| return this; | |||||
| } | |||||
| /** | |||||
| * Adds a new row to the table using the specific mappings (the row must not already be | |||||
| * present). | |||||
| */ | |||||
| public Builder<T> addRow(T key, List<Assignment<?>> row) { | |||||
| checkArgument(!table.containsRow(key), "row '%s' already added\n%s", key, this); | |||||
| put(key, row); | |||||
| return this; | |||||
| } | |||||
| /** Puts (overwrites) a single value in the table. */ | |||||
| public <V extends Comparable<V>> Builder<T> put(T key, Column<V> c, @Nullable V v) { | |||||
| schema.columns().checkColumn(c); | |||||
| if (v != null) { | |||||
| table.put(key, c, c.cast(v)); | |||||
| } else { | |||||
| table.remove(key, c); | |||||
| } | |||||
| return this; | |||||
| } | |||||
| /** Puts (overwrites) a sequence of values in the table. */ | |||||
| public Builder<T> put(T key, Iterable<Assignment<?>> assign) { | |||||
| for (Assignment<?> a : assign) { | |||||
| if (a.value().isPresent()) { | |||||
| table.put(key, a.column(), a.value().get()); | |||||
| } else { | |||||
| table.remove(key, a.column()); | |||||
| } | |||||
| } | |||||
| return this; | |||||
| } | |||||
| /** Puts (overwrites) a sequence of values in the table. */ | |||||
| public Builder<T> put(T key, Assignment<?>... assign) { | |||||
| return put(key, Arrays.asList(assign)); | |||||
| } | |||||
| /** Returns an unmodifiable view of the keys for the table. */ | |||||
| public Set<T> getKeys() { | |||||
| return Collections.unmodifiableSet(table.rowKeySet()); | |||||
| } | |||||
| /** Gets a single value in the table (or null). */ | |||||
| public <V extends Comparable<V>> V get(T key, Column<V> c) { | |||||
| return c.cast(table.get(key, c)); | |||||
| } | |||||
| /** Removes an entire row from the table (does nothing if the row did no exist). */ | |||||
| public Builder<T> removeRow(T key) { | |||||
| table.rowKeySet().remove(key); | |||||
| return this; | |||||
| } | |||||
| /** Filters the rows of a table, keeping those which match the given predicate. */ | |||||
| public Builder<T> filterRows(Predicate<T> predicate) { | |||||
| Set<T> rows = table.rowKeySet(); | |||||
| // Copy to avoid concurrent modification exception. | |||||
| for (T key : ImmutableSet.copyOf(table.rowKeySet())) { | |||||
| if (!predicate.test(key)) { | |||||
| rows.remove(key); | |||||
| } | |||||
| } | |||||
| return this; | |||||
| } | |||||
| /** Filters the columns of a table, keeping only those which match the given predicate. */ | |||||
| public Builder<T> filterColumns(Predicate<Column<?>> predicate) { | |||||
| Set<Column<?>> toRemove = | |||||
| table.columnKeySet().stream().filter(predicate.negate()).collect(toImmutableSet()); | |||||
| table.columnKeySet().removeAll(toRemove); | |||||
| return this; | |||||
| } | |||||
| /** Builds the immutable CSV table. */ | |||||
| public CsvTable<T> build() { | |||||
| return from(schema, table); | |||||
| } | |||||
| @Override | |||||
| public String toString() { | |||||
| return build().toString(); | |||||
| } | |||||
| } | |||||
| /** Returns a builder for a CSV table with the expected key and column semantics. */ | |||||
| public static <K> Builder<K> builder(CsvSchema<K> schema) { | |||||
| return new Builder<>(schema); | |||||
| } | |||||
| /** Returns a CSV table based on the given table with the expected key and column semantics. */ | |||||
| public static <K> CsvTable<K> from(CsvSchema<K> schema, Table<K, Column<?>, Object> table) { | |||||
| ImmutableSet<Column<?>> columns = table.columnKeySet().stream() | |||||
| .sorted(schema.columnOrdering()) | |||||
| .collect(toImmutableSet()); | |||||
| columns.forEach(schema.columns()::checkColumn); | |||||
| return new AutoValue_CsvTable<>( | |||||
| schema, | |||||
| ImmutableMap.copyOf(Maps.transformValues(table.rowMap(), ImmutableMap::copyOf)), | |||||
| columns); | |||||
| } | |||||
| /** | |||||
| * Imports a semicolon separated CSV file. The CSV file needs to have the following layout: | |||||
| * <pre> | |||||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||||
| * ... | |||||
| * </pre> | |||||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key | |||||
| * {@link CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} | |||||
| * via the column names. | |||||
| * <p> | |||||
| * Column values are represented in a semi-typed fashion according to the associated column (some | |||||
| * columns require values to be escaped, others do not). Note that it's the column that defines | |||||
| * whether the value needs escaping, not the content of the value itself (all values in a String | |||||
| * column are required to be quoted). | |||||
| */ | |||||
| public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv) throws IOException { | |||||
| return importCsv(schema, csv, CSV_PARSER); | |||||
| } | |||||
| /** Imports a CSV file using a specified parser. */ | |||||
| public static <K> CsvTable<K> importCsv(CsvSchema<K> schema, Reader csv, CsvParser csvParser) | |||||
| throws IOException { | |||||
| TableParser<K> parser = new TableParser<>(schema); | |||||
| try (BufferedReader r = new BufferedReader(csv)) { | |||||
| csvParser.parse( | |||||
| r.lines(), | |||||
| row -> parser.accept( | |||||
| row.map(CsvTable::unescapeSingleLineCsvText).collect(toImmutableList()))); | |||||
| } | |||||
| return parser.done(); | |||||
| } | |||||
| /** | |||||
| * Imports a sequence of rows to create a CSV table. The values in the rows are unescaped and | |||||
| * require no explicit parsing. | |||||
| */ | |||||
| public static <K> CsvTable<K> importRows(CsvSchema<K> schema, Supplier<List<String>> rows) { | |||||
| TableParser<K> parser = new TableParser<>(schema); | |||||
| List<String> row; | |||||
| while ((row = rows.get()) != null) { | |||||
| parser.accept(row); | |||||
| } | |||||
| return parser.done(); | |||||
| } | |||||
| /** | |||||
| * Creates a "diff table" based on the given left and right table inputs. The resulting table | |||||
| * has a new key column which indicates (via the {@code Status} enum) how rows difference between | |||||
| * the left and right tables. | |||||
| */ | |||||
| public static <K> CsvTable<DiffKey<K>> diff(CsvTable<K> lhs, CsvTable<K> rhs, DiffMode mode) { | |||||
| checkArgument(lhs.getSchema().equals(rhs.getSchema()), "Cannot diff with different schemas"); | |||||
| checkNotNull(mode, "Must specify a diff mode"); | |||||
| CsvKeyMarshaller<DiffKey<K>> marshaller = DiffKey.wrap(lhs.getSchema().keyMarshaller()); | |||||
| CsvSchema<DiffKey<K>> diffSchema = CsvSchema.of(marshaller, lhs.getSchema().columns()); | |||||
| Builder<DiffKey<K>> diff = CsvTable.builder(diffSchema); | |||||
| if (mode != DiffMode.RHS) { | |||||
| Sets.difference(lhs.getKeys(), rhs.getKeys()) | |||||
| .forEach(k -> diff.addRow(DiffKey.of(LHS_ONLY, k), lhs.getRow(k))); | |||||
| } | |||||
| if (mode != DiffMode.LHS) { | |||||
| Sets.difference(rhs.getKeys(), lhs.getKeys()) | |||||
| .forEach(k -> diff.addRow(DiffKey.of(RHS_ONLY, k), rhs.getRow(k))); | |||||
| } | |||||
| for (K key : Sets.intersection(lhs.getKeys(), rhs.getKeys())) { | |||||
| Map<Column<?>, Object> lhsRow = lhs.getRow(key); | |||||
| Map<Column<?>, Object> rhsRow = rhs.getRow(key); | |||||
| if (lhsRow.equals(rhsRow)) { | |||||
| if (mode != DiffMode.CHANGES) { | |||||
| diff.addRow(DiffKey.of(UNCHANGED, key), lhsRow); | |||||
| } | |||||
| } else { | |||||
| if (mode != DiffMode.RHS) { | |||||
| diff.addRow(DiffKey.of(LHS_CHANGED, key), lhsRow); | |||||
| } | |||||
| if (mode != DiffMode.LHS) { | |||||
| diff.addRow(DiffKey.of(RHS_CHANGED, key), rhsRow); | |||||
| } | |||||
| } | |||||
| } | |||||
| return diff.build(); | |||||
| } | |||||
| /** Returns the schema for this table. */ | |||||
| public abstract CsvSchema<K> getSchema(); | |||||
| /** Returns the rows of the table (not public to avoid access to untyped access). */ | |||||
| // Note that this cannot easily be replaced by ImmutableTable (as of Jan 2019) because | |||||
| // ImmutableTable has severe limitations on how row/column ordering is handled that make the | |||||
| // row/column ordering required in CsvTable currently impossible. | |||||
| abstract ImmutableMap<K, ImmutableMap<Column<?>, Object>> getRows(); | |||||
| /** | |||||
| * Returns the set of columns for the table (excluding the synthetic key columns, which are | |||||
| * handled by the marshaller). | |||||
| */ | |||||
| public abstract ImmutableSet<Column<?>> getColumns(); | |||||
| /** Returns whether a row is in the table. */ | |||||
| public boolean isEmpty() { | |||||
| return getRows().isEmpty(); | |||||
| } | |||||
| /** Returns the set of keys for the table. */ | |||||
| public ImmutableSet<K> getKeys() { | |||||
| return getRows().keySet(); | |||||
| } | |||||
| /** Returns a single row as a map of column assignments. */ | |||||
| public ImmutableMap<Column<?>, Object> getRow(K rowKey) { | |||||
| ImmutableMap<Column<?>, Object> row = getRows().get(rowKey); | |||||
| return row != null ? row : ImmutableMap.of(); | |||||
| } | |||||
| /** Returns whether a row is in the table. */ | |||||
| public boolean containsRow(K rowKey) { | |||||
| return getKeys().contains(rowKey); | |||||
| } | |||||
| public Builder<K> toBuilder() { | |||||
| Builder<K> builder = builder(getSchema()); | |||||
| getRows().forEach(builder::putRow); | |||||
| return builder; | |||||
| } | |||||
| /** Returns the table column names, including the key columns, in schema order. */ | |||||
| public Stream<String> getCsvHeader() { | |||||
| return Stream.concat( | |||||
| getSchema().keyMarshaller().getColumns().stream(), | |||||
| getColumns().stream().map(Column::getName)); | |||||
| } | |||||
| /** Returns the unescaped CSV values for the specified row, in order. */ | |||||
| public Stream<String> getCsvRow(K key) { | |||||
| checkArgument(getKeys().contains(key), "no such row: %s", key); | |||||
| // Note that we pass the raw value (possibly null) to serialize so that we don't conflate | |||||
| // missing and default values. | |||||
| return Stream.concat( | |||||
| getSchema().keyMarshaller().serialize(key), | |||||
| getColumns().stream().map(c -> c.serialize(getOrNull(key, c)))); | |||||
| } | |||||
| /** | |||||
| * Exports the given table by writing its values as semicolon separated "CSV", with or without | |||||
| * alignment. For example (with alignment): | |||||
| * | |||||
| * <pre> | |||||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||||
| * ... | |||||
| * </pre> | |||||
| * | |||||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key {@link | |||||
| * CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the | |||||
| * column names. | |||||
| */ | |||||
| public boolean exportCsv(Writer writer, boolean align) { | |||||
| return exportCsvHelper(writer, align, getColumns()); | |||||
| } | |||||
| /** | |||||
| * Exports the given table by writing its values as semicolon separated "CSV", with or without | |||||
| * alignment. For example (with alignment): | |||||
| * | |||||
| * <pre> | |||||
| * Key1 ; Key2 ; Column1 ; Column2 ; Column3 | |||||
| * k1 ; k2 ; OTHER ; "Text" ; true | |||||
| * ... | |||||
| * </pre> | |||||
| * | |||||
| * Where the first {@code N} columns represent the row key (as encapsulated by the key {@link | |||||
| * CsvKeyMarshaller}) and the remaining columns correspond to the given {@link Schema} via the | |||||
| * column names. This will add columns that are part of the schema for the given table but have no | |||||
| * assigned values. | |||||
| */ | |||||
| public boolean exportCsvWithEmptyColumnsPresent(Writer writer, boolean align) { | |||||
| return exportCsvHelper( | |||||
| writer, | |||||
| align, | |||||
| Stream.concat(getSchema().columns().getColumns().stream(), getColumns().stream()) | |||||
| .collect(ImmutableSet.toImmutableSet())); | |||||
| } | |||||
| private boolean exportCsvHelper( | |||||
| Writer writer, boolean align, ImmutableSet<Column<?>> columnsToExport) { | |||||
| if (isEmpty()) { | |||||
| // Exit for empty tables (CSV file is truncated). The caller may then delete the empty file. | |||||
| return false; | |||||
| } | |||||
| CsvTableCollector collector = new CsvTableCollector(align); | |||||
| collector.accept( | |||||
| Stream.concat( | |||||
| getSchema().keyMarshaller().getColumns().stream(), | |||||
| columnsToExport.stream().map(Column::getName)) | |||||
| .distinct()); | |||||
| for (K k : getKeys()) { | |||||
| // Format raw values (possibly null) to avoid default values everywhere. | |||||
| collector.accept( | |||||
| Stream.concat( | |||||
| getSchema().keyMarshaller().serialize(k), | |||||
| columnsToExport.stream().map(c -> formatValue(c, getOrNull(k, c))))); | |||||
| } | |||||
| collector.writeCsv(writer); | |||||
| return true; | |||||
| } | |||||
| @Nullable private <T extends Comparable<T>> T getOrNull(K rowKey, Column<T> column) { | |||||
| return column.cast(getRow(rowKey).get(column)); | |||||
| } | |||||
| /** | |||||
| * Returns the value from the underlying table for the given row and column if present. | |||||
| */ | |||||
| public <T extends Comparable<T>> Optional<T> get(K rowKey, Column<T> column) { | |||||
| return Optional.ofNullable(getOrNull(rowKey, column)); | |||||
| } | |||||
| /** | |||||
| * Returns the value from the underlying table for the given row and column, or the (non-null) | |||||
| * default value. | |||||
| */ | |||||
| public <T extends Comparable<T>> T getOrDefault(K rowKey, Column<T> column) { | |||||
| T value = getOrNull(rowKey, column); | |||||
| return value != null ? value : column.defaultValue(); | |||||
| } | |||||
| /** | |||||
| * Returns the set of unique values in the given column. Note that if some rows do not have a | |||||
| * value, then this will NOT result in the column default value being in the returned set. An | |||||
| * empty column will result in an empty set being returned here. | |||||
| */ | |||||
| public <T extends Comparable<T>> ImmutableSortedSet<T> getValues(Column<T> column) { | |||||
| return getKeys().stream() | |||||
| .map(k -> getOrNull(k, column)) | |||||
| .filter(Objects::nonNull) | |||||
| .collect(toImmutableSortedSet(Ordering.natural())); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| StringWriter w = new StringWriter(); | |||||
| exportCsv(w, true); | |||||
| return w.toString(); | |||||
| } | |||||
| /** Parses CSV data on per-row basis, deserializing keys and adding values to a table. */ | |||||
| static class TableParser<K> implements Consumer<List<String>> { | |||||
| private final Builder<K> table; | |||||
| // Set when the header row is processed. | |||||
| private ImmutableList<Column<?>> columns = null; | |||||
| TableParser(CsvSchema<K> schema) { | |||||
| this.table = builder(schema); | |||||
| } | |||||
| @Override | |||||
| public void accept(List<String> row) { | |||||
| if (columns == null) { | |||||
| columns = table.schema.parseHeader(row); | |||||
| } else { | |||||
| table.schema.parseRow(columns, row, table::addRow); | |||||
| } | |||||
| } | |||||
| public CsvTable<K> done() { | |||||
| return table.build(); | |||||
| } | |||||
| } | |||||
| // Newlines can, in theory, be emitted "raw" in the CSV output inside a quoted string, but | |||||
| // this breaks all sorts of nice properties of CSV files, since there's no longer one row per | |||||
| // line. This export process escapes literal newlines and other control characters into Json | |||||
| // like escape sequences ('\n', '\t', '\\' etc...). Unlike Json however, any double-quotes are | |||||
| // _not_ escaped via '\' since the CSV way to escape those is via doubling. We leave other | |||||
| // non-ASCII characters as-is, since this is meant to be as human readable as possible. | |||||
| private static final Escaper ESCAPER = new CharEscaperBuilder() | |||||
| .addEscape('\n', "\\n") | |||||
| .addEscape('\r', "\\r") | |||||
| .addEscape('\t', "\\t") | |||||
| .addEscape('\\', "\\\\") | |||||
| // This is a special case only required when writing CSV file (since the parser handles | |||||
| // unescaping quotes when they are read back in). In theory it should be part of a separate | |||||
| // step during CSV writing, but it's not worth splitting it out. This is not considered an | |||||
| // unsafe char (since it definitely does appear). | |||||
| .addEscape('"', "\"\"") | |||||
| .toEscaper(); | |||||
| private static final CharMatcher ESCAPED_CHARS = CharMatcher.anyOf("\n\r\t\\"); | |||||
| private static final CharMatcher UNSAFE_CHARS = | |||||
| CharMatcher.javaIsoControl().and(ESCAPED_CHARS.negate()); | |||||
| private static String formatValue(Column<?> column, @Nullable Object value) { | |||||
| String unescaped = column.serialize(value); | |||||
| if (unescaped.isEmpty()) { | |||||
| return unescaped; | |||||
| } | |||||
| // Slightly risky with enums, since an enum could have ';' in its toString() representation. | |||||
| // However since columns and their semantics are tightly controlled, this should never happen. | |||||
| if (Number.class.isAssignableFrom(column.type()) | |||||
| || column.type() == Boolean.class | |||||
| || column.type().isEnum()) { | |||||
| checkArgument(ESCAPED_CHARS.matchesNoneOf(unescaped), "Bad 'safe' value: %s", unescaped); | |||||
| return unescaped; | |||||
| } | |||||
| return escapeForSingleLineCsv(unescaped); | |||||
| } | |||||
| /** | |||||
| * Escapes and quotes an arbitrary text string, ensuring it is safe for use as a single-line CSV | |||||
| * value. Newlines, carriage returns and tabs are backslash escaped (as is backslash itself) and | |||||
| * other ISO control characters are not permitted. | |||||
| * | |||||
| * <p>The purpose of this method is to make arbitrary Unicode text readable in a single line of | |||||
| * a CSV file so that we can rely on per-line processing tools, such as "grep" or "sed" if needed | |||||
| * without requiring expensive conversion to/from a spreadsheet. | |||||
| */ | |||||
| public static String escapeForSingleLineCsv(String unescaped) { | |||||
| checkArgument(UNSAFE_CHARS.matchesNoneOf(unescaped), "Bad string value: %s", unescaped); | |||||
| return '"' + ESCAPER.escape(unescaped) + '"'; | |||||
| } | |||||
| /** | |||||
| * Unescapes a line of text escaped by {@link #escapeForSingleLineCsv(String)} to restore literal | |||||
| * newlines and other backslash-escaped characters. Note that if the given string already has | |||||
| * newlines present, they are preserved but will then be escaped if the text is re-escaped later. | |||||
| */ | |||||
| public static String unescapeSingleLineCsvText(String s) { | |||||
| int i = s.indexOf('\\'); | |||||
| if (i == -1) { | |||||
| return s; | |||||
| } | |||||
| StringBuilder out = new StringBuilder(); | |||||
| int start = 0; | |||||
| do { | |||||
| out.append(s, start, i); | |||||
| char c = s.charAt(++i); | |||||
| out.append(checkNotNull(UNESCAPE.get(c), "invalid escape sequence: \\%s", c)); | |||||
| start = i + 1; | |||||
| i = s.indexOf('\\', start); | |||||
| } while (i != -1); | |||||
| return out.append(s, start, s.length()).toString(); | |||||
| } | |||||
| private static final ImmutableMap<Character, Character> UNESCAPE = | |||||
| ImmutableMap.<Character, Character>builder() | |||||
| .put('n', '\n') | |||||
| .put('r', '\r') | |||||
| .put('t', '\t') | |||||
| .put('\\', '\\') | |||||
| .build(); | |||||
| // Visible for AutoValue only. | |||||
| CsvTable() {} | |||||
| } | |||||
| @ -0,0 +1,99 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static java.util.stream.Collectors.joining; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import java.io.PrintWriter; | |||||
| import java.io.Writer; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| import java.util.NavigableMap; | |||||
| import java.util.TreeMap; | |||||
| import java.util.function.Consumer; | |||||
| import java.util.stream.IntStream; | |||||
| import java.util.stream.Stream; | |||||
| /** Collects cell values and tracks maximum cell width to make it easy to output aligned CSV. */ | |||||
| public final class CsvTableCollector implements Consumer<Stream<String>> { | |||||
| private final NavigableMap<Integer, Integer> maxLengths = new TreeMap<>(); | |||||
| private final List<List<String>> cells = new ArrayList<>(); | |||||
| private final boolean align; | |||||
| public CsvTableCollector(boolean align) { | |||||
| this.align = align; | |||||
| } | |||||
| /** | |||||
| * Writes the contents of this table, with optional alignment, as a CSV table. Returns whether | |||||
| * anything was written. | |||||
| */ | |||||
| public void writeCsv(Writer writer) { | |||||
| try (PrintWriter out = new PrintWriter(writer)) { | |||||
| // Pad elements with whitespace when aligning (since we've gone to all the effort of padding | |||||
| // everything else). | |||||
| String joiner = align ? " ; " : ";"; | |||||
| for (int rowIndex = 0; rowIndex < cells.size(); rowIndex++) { | |||||
| // No need to use CharMatcher to trim "properly" since only ASCII space is possible. | |||||
| out.println(getRow(rowIndex).collect(joining(joiner)).trim()); | |||||
| } | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Accepts the next row in the CSV table. Note that the first consumer returned is expected to | |||||
| * have the title row written to it. | |||||
| * | |||||
| * <p>Values passed into the accept method of the returned consumer are expected to have already | |||||
| * been escaped if necessary. The caller must call the {@link Consumer#accept(Object)} method for | |||||
| * every column of the table, even if only to pass an empty string to indicate an empty cell. | |||||
| */ | |||||
| @Override | |||||
| public void accept(Stream<String> row) { | |||||
| ImmutableList<String> rowValues = row.collect(toImmutableList()); | |||||
| for (int i = 0; i < rowValues.size(); i++) { | |||||
| updateMaxLength(rowValues.get(i), i); | |||||
| } | |||||
| cells.add(rowValues); | |||||
| } | |||||
| private Stream<String> getRow(int index) { | |||||
| List<String> row = cells.get(index); | |||||
| int length = row.size(); | |||||
| while (length > 0 && row.get(length - 1).isEmpty()) { | |||||
| length--; | |||||
| } | |||||
| if (align) { | |||||
| return IntStream.range(0, length).mapToObj(n -> pad(row.get(n), maxLength(n))); | |||||
| } | |||||
| return row.stream().limit(length); | |||||
| } | |||||
| private static String pad(String s, int len) { | |||||
| return len > 0 ? String.format("%-" + len + "s", s) : ""; | |||||
| } | |||||
| private int maxLength(int index) { | |||||
| return maxLengths.getOrDefault(index, 0); | |||||
| } | |||||
| private void updateMaxLength(String s, int index) { | |||||
| // Note: This isn't Unicode aware, but in reality it's not that important. | |||||
| maxLengths.put(index, Math.max(s.length(), maxLength(index))); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,100 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.Maps; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Comparator; | |||||
| import java.util.EnumSet; | |||||
| import java.util.List; | |||||
| import java.util.Optional; | |||||
| import java.util.function.Function; | |||||
| import java.util.stream.Stream; | |||||
| /** Key for use in "diff" tables, allowing rows to be marked with a diff status. */ | |||||
| @AutoValue | |||||
| public abstract class DiffKey<K> { | |||||
| /** | |||||
| * Status for rows in a "diff table". Every row in a diff table has a {@code DiffKey}, with a | |||||
| * status. Modified rows appear twice in the diff table, once for the left-side row, and once for | |||||
| * the right-side row. | |||||
| */ | |||||
| public enum Status { | |||||
| /** A row which appears exclusively in the left-hand-side of the diff. */ | |||||
| LHS_ONLY("----"), | |||||
| /** A row which appears exclusively in the right-hand-side of the diff. */ | |||||
| RHS_ONLY("++++"), | |||||
| /** The left-hand-side row which was modified by the diff. */ | |||||
| LHS_CHANGED("<<<<"), | |||||
| /** The right-hand-side row which was modified by the diff. */ | |||||
| RHS_CHANGED(">>>>"), | |||||
| /** A row unchanged by the diff. */ | |||||
| UNCHANGED("===="); | |||||
| private static final ImmutableMap<String, Status> MAP = | |||||
| Maps.uniqueIndex(EnumSet.allOf(Status.class), Status::getLabel); | |||||
| private final String label; | |||||
| Status(String label) { | |||||
| this.label = label; | |||||
| } | |||||
| String getLabel() { | |||||
| return label; | |||||
| } | |||||
| static Status parse(String s) { | |||||
| return MAP.get(s); | |||||
| } | |||||
| } | |||||
| static <K> CsvKeyMarshaller<DiffKey<K>> wrap(CsvKeyMarshaller<K> keyMarshaller) { | |||||
| List<String> keyColumns = new ArrayList<>(); | |||||
| keyColumns.add("Diff"); | |||||
| keyColumns.addAll(keyMarshaller.getColumns()); | |||||
| return new CsvKeyMarshaller<>( | |||||
| serialize(keyMarshaller), deserialize(keyMarshaller), ordering(keyMarshaller), keyColumns); | |||||
| } | |||||
| static <K> DiffKey<K> of(Status status, K key) { | |||||
| return new AutoValue_DiffKey<>(status, key); | |||||
| } | |||||
| public abstract Status getStatus(); | |||||
| public abstract K getOriginalKey(); | |||||
| private static <T> Function<DiffKey<T>, Stream<String>> serialize(CsvKeyMarshaller<T> m) { | |||||
| return k -> Stream.concat(Stream.of(k.getStatus().getLabel()), m.serialize(k.getOriginalKey())); | |||||
| } | |||||
| private static <T> Function<List<String>, DiffKey<T>> deserialize(CsvKeyMarshaller<T> m) { | |||||
| return r -> | |||||
| new AutoValue_DiffKey<>(Status.parse(r.get(0)), m.deserialize(r.subList(1, r.size()))); | |||||
| } | |||||
| private static <T> Optional<Comparator<DiffKey<T>>> ordering(CsvKeyMarshaller<T> m) { | |||||
| return m.ordering().map(o -> { | |||||
| // Weird bug (possibly IntelliJ) means it really doesn't do well inferring types over lambdas | |||||
| // for this sort of chained API call. Pulling into separate variables works fine. | |||||
| Comparator<DiffKey<T>> keyFn = Comparator.comparing(DiffKey::getOriginalKey, o); | |||||
| return keyFn.thenComparing(DiffKey::getStatus); | |||||
| }); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,186 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.common.collect.Maps.filterValues; | |||||
| import static com.google.common.collect.Maps.transformValues; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedMap; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import java.util.Map.Entry; | |||||
| import java.util.SortedMap; | |||||
| import java.util.TreeMap; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * A mapping from category values to a set of disjoint ranges. This is used only by the RangeTable | |||||
| * class to represent a column of values. | |||||
| */ | |||||
| final class DisjointRangeMap<T extends Comparable<T>> { | |||||
| static final class Builder<T extends Comparable<T>> { | |||||
| private final Column<T> column; | |||||
| private final SortedMap<T, RangeTree> map = new TreeMap<>(); | |||||
| // Cache of all assigned ranges, used repeatedly by RangeTable . This could be recalculated | |||||
| // every time it's needed, but it's just as easy to keep it cached here. | |||||
| private RangeTree assignedRanges = RangeTree.empty(); | |||||
| Builder(Column<T> column) { | |||||
| this.column = checkNotNull(column); | |||||
| } | |||||
| /** | |||||
| * Returns the ranges assigned to the given value (returns the empty range if the given value | |||||
| * is unassigned in this column). Note that unlike table operations, it makes no sense to allow | |||||
| * {@code null} to be used to determine the unassigned ranges, since calculating that requires | |||||
| * knowledge of the table in which this column exists. | |||||
| */ | |||||
| RangeTree getRanges(Object value) { | |||||
| T checkedValue = column.cast(checkNotNull(value)); | |||||
| return map.getOrDefault(checkedValue, RangeTree.empty()); | |||||
| } | |||||
| /** Returns the currently assigned ranges for this column. */ | |||||
| RangeTree getAssignedRanges() { | |||||
| return assignedRanges; | |||||
| } | |||||
| /** | |||||
| * Checks whether the "proposed" assignment would succeed with the specified overwrite mode | |||||
| * (assignments always succeed if the mode is {@link OverwriteMode#ALWAYS} ALWAYS). If the | |||||
| * given value is {@code null} and the mode is not {@code ALWAYS}, this method ensures that | |||||
| * none of the given ranges are assigned to any value in this column. | |||||
| * <p> | |||||
| * This is useful as a separate method when multiple changes are to be made which cannot be | |||||
| * allowed to fail halfway through. | |||||
| * | |||||
| * @throws IllegalArgumentException if the value cannot be added to the column. | |||||
| * @throws RangeException if the write is not possible with the given mode. | |||||
| */ | |||||
| T checkAssign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||||
| // Always check the proposed value (for consistency). | |||||
| T checkedValue = column.cast(value); | |||||
| if (mode != OverwriteMode.ALWAYS) { | |||||
| checkArgument(checkedValue != null, | |||||
| "Assigning a null value (unassignment) with mode other than ALWAYS makes no sense: %s", | |||||
| mode); | |||||
| if (mode == OverwriteMode.SAME) { | |||||
| // Don't care about ranges that are already in the map. | |||||
| ranges = ranges.subtract(map.getOrDefault(checkedValue, RangeTree.empty())); | |||||
| } | |||||
| RangeException.checkDisjoint(column, checkedValue, assignedRanges, ranges, mode); | |||||
| } | |||||
| return checkedValue; | |||||
| } | |||||
| /** | |||||
| * Assigns the given ranges to the specified value in this column. After a call to | |||||
| * {@code assign()} with a non-null value it is true that: | |||||
| * <ul> | |||||
| * <li>The result of {@code getRanges(value)} will contain at least the given ranges. | |||||
| * <li>No ranges assigned to any other category value will intersect with the given ranges. | |||||
| * </ul> | |||||
| * If ranges are "assigned" to {@code null}, it has the effect of unassigning them. | |||||
| * | |||||
| * @param value the category value to assign ranges to, or {@code null} to unassign. | |||||
| * @param ranges the ranges to assign to the category value with ID {@code id}. | |||||
| * @param mode the overwrite mode describing how to handle existing assignments. | |||||
| * @throws IllegalArgumentException if the assignment violates the given {@link OverwriteMode}. | |||||
| */ | |||||
| void assign(@Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||||
| T checkedValue = checkAssign(value, ranges, mode); | |||||
| // Now unassign the ranges for all other values (only necessary if mode is "ALWAYS" since in | |||||
| // other modes we've already ensured there's no intersection). | |||||
| if (mode == OverwriteMode.ALWAYS) { | |||||
| RangeTree overlap = assignedRanges.intersect(ranges); | |||||
| if (!overlap.isEmpty()) { | |||||
| for (Entry<T, RangeTree> e : map.entrySet()) { | |||||
| // Skip needless extra work for the value we are about to assign. | |||||
| if (!e.getKey().equals(checkedValue)) { | |||||
| e.setValue(e.getValue().subtract(overlap)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if (checkedValue != null) { | |||||
| map.put(checkedValue, ranges.union(map.getOrDefault(checkedValue, RangeTree.empty()))); | |||||
| assignedRanges = assignedRanges.union(ranges); | |||||
| } else { | |||||
| assignedRanges = assignedRanges.subtract(ranges); | |||||
| } | |||||
| } | |||||
| /** Builds the range map. */ | |||||
| DisjointRangeMap<T> build() { | |||||
| return new DisjointRangeMap<T>(column, map, assignedRanges); | |||||
| } | |||||
| } | |||||
| private final Column<T> column; | |||||
| private final ImmutableSortedMap<T, RangeTree> map; | |||||
| private final RangeTree assignedRanges; | |||||
| private DisjointRangeMap( | |||||
| Column<T> column, SortedMap<T, RangeTree> map, RangeTree assignedRanges) { | |||||
| this.column = checkNotNull(column); | |||||
| this.map = ImmutableSortedMap.copyOfSorted(filterValues(map, r -> !r.isEmpty())); | |||||
| this.assignedRanges = assignedRanges; | |||||
| } | |||||
| /** | |||||
| * Returns the ranges assigned to the given value. | |||||
| * | |||||
| * @throws IllegalArgumentException if {@code value} is not a value in this category. | |||||
| */ | |||||
| RangeTree getRanges(Object value) { | |||||
| return map.get(column.cast(value)); | |||||
| } | |||||
| /** Returns all values assigned to non-empty ranges in this column. */ | |||||
| ImmutableSet<T> getAssignedValues() { | |||||
| return map.keySet(); | |||||
| } | |||||
| /** Returns the union of all assigned ranges in this column. */ | |||||
| RangeTree getAssignedRanges() { | |||||
| return assignedRanges; | |||||
| } | |||||
| /** Intersects this column with the given bounds. */ | |||||
| DisjointRangeMap<T> intersect(RangeTree bounds) { | |||||
| return new DisjointRangeMap<T>( | |||||
| column, transformValues(map, r -> r.intersect(bounds)), assignedRanges.intersect(bounds)); | |||||
| } | |||||
| @Override | |||||
| public boolean equals(Object obj) { | |||||
| if (!(obj instanceof DisjointRangeMap<?>)) { | |||||
| return false; | |||||
| } | |||||
| // No need to check "assignedRanges" since it's just a cache of other values anyway. | |||||
| DisjointRangeMap<?> other = (DisjointRangeMap<?>) obj; | |||||
| return this == other || (column.equals(other.column) && map.equals(other.map)); | |||||
| } | |||||
| @Override | |||||
| public int hashCode() { | |||||
| return column.hashCode() ^ map.hashCode(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,116 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.CharMatcher.whitespace; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import com.google.common.base.Joiner; | |||||
| import com.google.common.base.Splitter; | |||||
| import com.google.common.collect.Comparators; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableSortedSet; | |||||
| import java.util.Comparator; | |||||
| import java.util.function.Function; | |||||
| /** | |||||
| * A wrapper to permit sets of values to be specified as a single "cell" in a CsvTable or | |||||
| * RangeTable. Currently only sets of values are permitted (not lists) so duplicate elements are | |||||
| * not allowed. This is easy to change in future, but the real data suggests no use case for that. | |||||
| * | |||||
| * <p>The expectation of this class is that specific, non-generic subclasses will be made to | |||||
| * "solidify" the choice of value type, separator and value ordering. This is why those specific | |||||
| * attributes are not tested in the equals()/hashCode() methods, since they are expected to be | |||||
| * constant for a given implementation. Subclasses should be final, and look something like: | |||||
| * <pre> {@code | |||||
| * public static final class Foos extends MultiValue<Foo, Foos> { | |||||
| * private static final Foos EMPTY = new Foos(ImmutableSet.of()); | |||||
| * | |||||
| * public static Column<Foos> column(String name) { | |||||
| * return Column.create(Foos.class, name, EMPTY, Foos::new); | |||||
| * } | |||||
| * | |||||
| * public static Foos of(Iterable<Foo> foos) { | |||||
| * return new Foos(foos); | |||||
| * } | |||||
| * | |||||
| * private Foos(Iterable<Foo> foos) { super(foos, <separator>, <ordering>, <sorted>); } | |||||
| * private Foos(String s) { super(s, <parseFn>, <separator>, <ordering>, <sorted>); } | |||||
| * } | |||||
| * }</pre> | |||||
| * where {@code <separator>}, {@code <ordering>} and {@code <sorted>} are the same constants in | |||||
| * both places. | |||||
| */ | |||||
| public abstract class MultiValue<T, M extends MultiValue<T, M>> | |||||
| implements Comparable<M> { | |||||
| private final ImmutableSet<T> values; | |||||
| private final char separator; | |||||
| private final Comparator<Iterable<T>> comparator; | |||||
| protected MultiValue( | |||||
| String s, Function<String, T> fn, char separator, Comparator<T> comparator, boolean sorted) { | |||||
| this(parse(s, fn, separator), separator, comparator, sorted); | |||||
| } | |||||
| protected MultiValue( | |||||
| Iterable<T> values, char separator, Comparator<T> comparator, boolean sorted) { | |||||
| this.separator = separator; | |||||
| this.values = | |||||
| sorted ? ImmutableSortedSet.copyOf(comparator, values) : ImmutableSet.copyOf(values); | |||||
| this.comparator = Comparators.lexicographical(comparator); | |||||
| } | |||||
| private static <T> ImmutableList<T> parse(String s, Function<String, T> fn, char separator) { | |||||
| Splitter splitter = Splitter.on(separator).omitEmptyStrings().trimResults(whitespace()); | |||||
| return splitter.splitToList(s).stream().map(fn).collect(toImmutableList()); | |||||
| } | |||||
| public final ImmutableSet<T> getValues() { | |||||
| return values; | |||||
| } | |||||
| public final char separator() { | |||||
| return separator; | |||||
| } | |||||
| @Override | |||||
| public final int compareTo(M that) { | |||||
| // The separator doesn't factor in here since it's always the same. | |||||
| return comparator.compare(this.getValues(), that.getValues()); | |||||
| } | |||||
| @Override | |||||
| @SuppressWarnings({"unchecked", "EqualsGetClass"}) | |||||
| public final boolean equals(Object obj) { | |||||
| // Check exact subclass, since we expect separators and ordering to always be the same. | |||||
| if (obj == null || obj.getClass() != getClass()) { | |||||
| return false; | |||||
| } | |||||
| return getValues().equals(((MultiValue<T, M>) obj).getValues()); | |||||
| } | |||||
| @Override | |||||
| public final int hashCode() { | |||||
| return getValues().hashCode(); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| return Joiner.on(separator()).join(getValues()); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,74 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static java.util.stream.Collectors.joining; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import javax.annotation.Nullable; | |||||
| /** A structured exception which should be used whenever structural errors occur in table data. */ | |||||
| public final class RangeException extends IllegalArgumentException { | |||||
| // Called when assigning ranges, depending on the overwrite mode. As more cases are added, | |||||
| // consider refactoring and subclassing for clean semantics. | |||||
| static <T extends Comparable<T>> void checkDisjoint( | |||||
| Column<T> column, T value, RangeTree existing, RangeTree ranges, OverwriteMode mode) { | |||||
| RangeTree intersection = existing.intersect(ranges); | |||||
| if (!intersection.isEmpty()) { | |||||
| // A non-empty intersection implies both inputs are also non-empty. | |||||
| throw new RangeException(column, value, existing, ranges, intersection, mode); | |||||
| } | |||||
| } | |||||
| RangeException(Column<?> column, | |||||
| @Nullable Object value, | |||||
| RangeTree existing, | |||||
| RangeTree ranges, | |||||
| RangeTree intersection, | |||||
| OverwriteMode mode) { | |||||
| super(explain(checkNotNull(column), value, existing, ranges, intersection, checkNotNull(mode))); | |||||
| } | |||||
| private static String explain( | |||||
| Column<?> column, | |||||
| @Nullable Object value, | |||||
| RangeTree existing, | |||||
| RangeTree ranges, | |||||
| RangeTree intersection, | |||||
| OverwriteMode mode) { | |||||
| return String.format( | |||||
| "cannot assign non-disjoint ranges for value '%s' in column '%s' using overwrite mode: %s\n" | |||||
| + "overlapping ranges:\n%s" | |||||
| + "existing ranges:\n%s" | |||||
| + "new ranges:\n%s", | |||||
| value, column, mode, toLines(intersection), toLines(existing), toLines(ranges)); | |||||
| } | |||||
| private static String toLines(RangeTree ranges) { | |||||
| checkArgument(!ranges.isEmpty()); | |||||
| return ranges.asRangeSpecifications().stream().map(s -> " " + s + "\n").collect(joining()); | |||||
| } | |||||
| // We suppress stack traces for "semantic" exceptions, since these aren't intended to indicate | |||||
| // bugs, but rather user error (for which a stack trace is not very useful). | |||||
| @Override | |||||
| public synchronized Throwable fillInStackTrace() { | |||||
| return this; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,215 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; | |||||
| import static java.lang.Integer.numberOfTrailingZeros; | |||||
| import static java.util.Comparator.comparing; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableSortedSet; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Comparator; | |||||
| import java.util.List; | |||||
| import java.util.NavigableSet; | |||||
| import java.util.Set; | |||||
| /** | |||||
| * A range key is somewhat similar to a {@link RangeSpecification}, except that it can encode | |||||
| * multiple possible lengths for a single range prefix. Range keys are particularly useful as | |||||
| * unique "row keys" when representing range trees as tabular data. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class RangeKey { | |||||
| /** | |||||
| * Order by prefix first and then minimum length. For row keys representing disjoint ranges, this | |||||
| * will be a total ordering (since the comparison is really with the "shortest" digit sequence in | |||||
| * the ranges, which must be distinct for disjoint ranges). | |||||
| */ | |||||
| public static final Comparator<RangeKey> ORDERING = | |||||
| comparing(RangeKey::getPrefix, comparing(s -> s.min().toString())) | |||||
| .thenComparing(RangeKey::getLengths, comparing(NavigableSet::first)); | |||||
| /** | |||||
| * Creates a range key representing ranges with a prefix of some set of lengths. The prefix must | |||||
| * not be longer than the possible lengths and cannot end with an "any" edge (i.e. "x"). | |||||
| */ | |||||
| public static RangeKey create(RangeSpecification prefix, Set<Integer> lengths) { | |||||
| checkArgument(prefix.length() == 0 || prefix.getBitmask(prefix.length() - 1) != ALL_DIGITS_MASK, | |||||
| "prefix cannot end with an 'any' edge: %s", prefix); | |||||
| ImmutableSortedSet<Integer> sorted = ImmutableSortedSet.copyOf(lengths); | |||||
| checkArgument(sorted.first() >= prefix.length(), | |||||
| "lengths cannot be shorter than the prefix: %s - %s", prefix, lengths); | |||||
| return new AutoValue_RangeKey(prefix, sorted); | |||||
| } | |||||
| /** | |||||
| * Decomposes the given range tree into a sorted sequence of keys, representing the same digit | |||||
| * sequences. The resulting keys form a disjoint covering of the original range set, and no | |||||
| * two keys will contain the same prefix (but prefixes of keys may overlap, even if the ranges | |||||
| * they ultimately represent do not). The resulting sequence is ordered by {@link #ORDERING}. | |||||
| */ | |||||
| public static ImmutableList<RangeKey> decompose(RangeTree tree) { | |||||
| List<RangeKey> keys = new ArrayList<>(); | |||||
| // The ALLOW_EDGE_SPLITTING strategy works best for the case of generating row keys because it | |||||
| // helps avoid having the same sequence appear in multiple rows. Note however than even this | |||||
| // strategy isn't perfect, and partially overlapping ranges with different lengths can still | |||||
| // cause issues. For example, 851 appears as a prefix for 2 rows in the following (real world) | |||||
| // example. | |||||
| // prefix=85[1-9], length=10 | |||||
| // prefix=8[57]1, length=11 | |||||
| // However a given digit sequence will still only appear in (at most) one range key based on | |||||
| // its length. | |||||
| for (RangeTree f : RangeTreeFactorizer.factor(tree, MergeStrategy.ALLOW_EDGE_SPLITTING)) { | |||||
| KeyVisitor.visit(f, keys); | |||||
| } | |||||
| return ImmutableList.sortedCopyOf(ORDERING, keys); | |||||
| } | |||||
| // A recursive descent visitor that splits range keys from the visited tree on the upward phase | |||||
| // of visitation. After finding the terminal node, the visitor tries to strip as much of the | |||||
| // trailing "any" path as possible, to leave the prefix. Note that the visitor can never start | |||||
| // another downward visitation while its processing the "any" paths, because if it walks up | |||||
| // through an "any" path, the node it reaches cannot have any other edges coming from it (the | |||||
| // "any" path is all the possible edges). | |||||
| private static class KeyVisitor implements DfaVisitor { | |||||
| private static void visit(RangeTree tree, List<RangeKey> keys) { | |||||
| KeyVisitor v = new KeyVisitor(keys); | |||||
| tree.accept(v); | |||||
| // We may still need to emit a key for ranges with "any" paths that reach the root node. | |||||
| int lengthMask = v.lengthMask; | |||||
| // Shouldn't happen for phone numbers, since it implies the existence of "zero length" digit | |||||
| // sequences. | |||||
| if (tree.getInitial().canTerminate()) { | |||||
| lengthMask |= 1; | |||||
| } | |||||
| if (lengthMask != 0) { | |||||
| // Use the empty specification as a prefix since the ranges are defined purely by length. | |||||
| keys.add(new AutoValue_RangeKey(RangeSpecification.empty(), buildLengths(lengthMask))); | |||||
| } | |||||
| } | |||||
| // Collection of extracted keys. | |||||
| private final List<RangeKey> keys; | |||||
| // Current path from the root of the tree being visited. | |||||
| private RangeSpecification path = RangeSpecification.empty(); | |||||
| // Non-zero when we are in the "upward" phase of visitation, processing trailing "any" paths. | |||||
| // When zero we are either in a "downward" phase or traversing up without stripping paths. | |||||
| private int lengthMask = 0; | |||||
| private KeyVisitor(List<RangeKey> keys) { | |||||
| this.keys = checkNotNull(keys); | |||||
| } | |||||
| @Override | |||||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||||
| checkState(lengthMask == 0, | |||||
| "during downward tree traversal, length mask should be zero (was %s)", lengthMask); | |||||
| RangeSpecification oldPath = path; | |||||
| path = path.extendByMask(edge.getDigitMask()); | |||||
| if (target.equals(RangeTree.getTerminal())) { | |||||
| lengthMask = (1 << path.length()); | |||||
| // We might emit the key immediately for ranges without trailing paths (e.g. "1234"). | |||||
| maybeEmitKey(); | |||||
| } else { | |||||
| target.accept(this); | |||||
| // If we see a terminating node, we are either adding a new possible length to an existing | |||||
| // key or starting to process a new key (we don't know and it doesn't matter providing we | |||||
| // capture the current length in the mask). | |||||
| if (target.canTerminate()) { | |||||
| lengthMask |= (1 << path.length()); | |||||
| } | |||||
| maybeEmitKey(); | |||||
| } | |||||
| path = oldPath; | |||||
| } | |||||
| // Conditionally emits a key for the current path prefix and possible lengths if we've found | |||||
| // the "end" of an "any" path (e.g. we have possible lengths and the edge above us is not an | |||||
| // "any" path). | |||||
| private void maybeEmitKey() { | |||||
| if (lengthMask != 0 && path.getBitmask(path.length() - 1) != ALL_DIGITS_MASK) { | |||||
| keys.add(new AutoValue_RangeKey(path, buildLengths(lengthMask))); | |||||
| lengthMask = 0; | |||||
| } | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Returns the prefix for this range key. All digit sequences matches by this key are of the | |||||
| * form {@code "<prefix>xxxx"} for some number of "any" edges. This prefix can be "empty" for | |||||
| * ranges such as {@code "xxxx"}. | |||||
| */ | |||||
| public abstract RangeSpecification getPrefix(); | |||||
| /** | |||||
| * Returns the possible lengths for digit sequences matched by this key. The returned set is | |||||
| * never empty. | |||||
| */ | |||||
| public abstract ImmutableSortedSet<Integer> getLengths(); | |||||
| /** | |||||
| * Converts the range key into a sequence of range specifications, ordered by length. The | |||||
| * returned set is never empty. | |||||
| */ | |||||
| public final ImmutableList<RangeSpecification> asRangeSpecifications() { | |||||
| RangeSpecification s = getPrefix(); | |||||
| return getLengths().stream() | |||||
| .map(n -> s.extendByLength(n - s.length())) | |||||
| .collect(toImmutableList()); | |||||
| } | |||||
| public final RangeTree asRangeTree() { | |||||
| RangeSpecification s = getPrefix(); | |||||
| return RangeTree.from(getLengths().stream().map(n -> s.extendByLength(n - s.length()))); | |||||
| } | |||||
| /* | |||||
| * Checks if the RangeKey contains a range represented by the given prefix and length. | |||||
| */ | |||||
| public boolean contains(DigitSequence prefix, Integer length) { | |||||
| return asRangeSpecifications().stream() | |||||
| .anyMatch( | |||||
| specification -> | |||||
| specification.matches( | |||||
| prefix.extendBy(DigitSequence.zeros(length - prefix.length())))); | |||||
| } | |||||
| private static ImmutableSortedSet<Integer> buildLengths(int lengthMask) { | |||||
| checkArgument(lengthMask != 0); | |||||
| ImmutableSortedSet.Builder<Integer> lengths = ImmutableSortedSet.naturalOrder(); | |||||
| do { | |||||
| int length = numberOfTrailingZeros(lengthMask); | |||||
| lengths.add(length); | |||||
| // Clear each bit as we go. | |||||
| lengthMask &= ~(1 << length); | |||||
| } while (lengthMask != 0); | |||||
| return lengths.build(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,951 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||||
| import static com.google.common.collect.Iterables.transform; | |||||
| import static com.google.common.collect.Maps.immutableEntry; | |||||
| import static java.util.Comparator.comparing; | |||||
| import static java.util.Map.Entry.comparingByKey; | |||||
| import static java.util.stream.Collectors.joining; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.HashBasedTable; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.ImmutableTable; | |||||
| import com.google.common.collect.Iterables; | |||||
| import com.google.common.collect.Sets; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.common.collect.TreeBasedTable; | |||||
| import com.google.common.collect.UnmodifiableIterator; | |||||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Collection; | |||||
| import java.util.Comparator; | |||||
| import java.util.Iterator; | |||||
| import java.util.List; | |||||
| import java.util.Map; | |||||
| import java.util.Map.Entry; | |||||
| import java.util.NoSuchElementException; | |||||
| import java.util.Optional; | |||||
| import java.util.Set; | |||||
| import java.util.SortedMap; | |||||
| import java.util.TreeMap; | |||||
| import java.util.function.Function; | |||||
| import javax.annotation.Nullable; | |||||
| /** | |||||
| * A tabular representation of attributes, assigned to number ranges. | |||||
| * <p> | |||||
| * A {@code RangeTable} is equivalent to {@code Table<RangeSpecification, Column, Value>}, | |||||
| * but is expressed as a mapping of {@code (Column, Value) -> RangeTree} (since {@code RangeTree} | |||||
| * is not a good key). To keep the data structurally equivalent to its tabular form, it's important | |||||
| * that within a column, all assigned ranges are mutually disjoint (and thus a digit sequence can | |||||
| * have at most one value assigned in any column). | |||||
| * | |||||
| * <h3>Table Schemas</h3> | |||||
| * A table requires a {@link Schema}, which defines the columns which can be present and their | |||||
| * order. Column ordering is important since it relates to how rules are applied (see below). | |||||
| * | |||||
| * <h3>Columns and Column Groups</h3> | |||||
| * A {@link Column} defines a category of values of a particular type (e.g. String, Boolean, | |||||
| * Integer or user specified enums) and a default value. New columns can be implemented easily and | |||||
| * can choose to limit their values to some known set. | |||||
| * <p> | |||||
| * A {@link ColumnGroup} defines a related set of columns of the same type. The exact set of | |||||
| * columns available in a group is not necessarily known in advance. A good example of a column | |||||
| * group is having columns for names is different languages. A column group of "Name" could define | |||||
| * columns such as "Name:en", "Name:fr", "Name:ja" etc. which contain the various translations of | |||||
| * the value. The first time a value is added for a column inferred by a column group, that column | |||||
| * is created. | |||||
| * <p> | |||||
| * An {@link Assignment} is a useful way to encapsulate "a value in a column" and can be used to | |||||
| * assign or unassign values to ranges, or query for the ranges which have that assignment. | |||||
| * | |||||
| * <h3>Builders and Unassigned Values</h3> | |||||
| * To allow a {@code RangeTable} to fully represent data in a tabular way, it must be possible to | |||||
| * have rows in a table for which no value is assigned in any column. Unassigned ranges can be | |||||
| * added to a builder via the {@link Builder#add(RangeTree)} method, and these "empty rows" are | |||||
| * preserved in the final table. | |||||
| * <p> | |||||
| * This is useful since it allows a {@link Change} to affect no columns, but still have an effect | |||||
| * on the final table. It's also useful when applying rules to infer values and fill-in column | |||||
| * defaults. | |||||
| */ | |||||
| public final class RangeTable { | |||||
| /** Overwrite rules for modifying range categorization. */ | |||||
| public enum OverwriteMode { | |||||
| /** Only assign ranges that were previously unassigned. */ | |||||
| NEVER, | |||||
| /** Only assign ranges that were either unassigned or had the same value. */ | |||||
| SAME, | |||||
| /** Always assign ranges (and unassign them from any other values in the same category). */ | |||||
| ALWAYS; | |||||
| } | |||||
| /** A builder for an immutable range table to which changes and rules can be applied. */ | |||||
| public static final class Builder { | |||||
| // The schema for the table to be built. | |||||
| private final Schema schema; | |||||
| // The map of per-column ranges. | |||||
| private final SortedMap<Column<?>, DisjointRangeMap.Builder<?>> columnRanges; | |||||
| // The union of all ranges added to the builder (either by assignment or range addition). | |||||
| // This is not just a cache of all the assigned ranges, since assigning and unassigning a range | |||||
| // will not cause it to be removed from the table altogether (even if it is no longer assigned | |||||
| // in any column). | |||||
| private RangeTree allRanges = RangeTree.empty(); | |||||
| private Builder(Schema schema) { | |||||
| this.schema = checkNotNull(schema); | |||||
| this.columnRanges = new TreeMap<>(schema.ordering()); | |||||
| } | |||||
| // Helper to return an on-demand builder for a column. | |||||
| private <T extends Comparable<T>> DisjointRangeMap.Builder<T> getOrAddRangeMap(Column<T> c) { | |||||
| // The generic type of the builder is defined by the column it's building for, and the map | |||||
| // just uses that column as its key. Thus, if the given column is recognized by the schema, | |||||
| // the returned builder must be of the same type. | |||||
| @SuppressWarnings("unchecked") | |||||
| DisjointRangeMap.Builder<T> ranges = (DisjointRangeMap.Builder<T>) | |||||
| columnRanges.computeIfAbsent(schema.checkColumn(c), DisjointRangeMap.Builder::new); | |||||
| return ranges; | |||||
| } | |||||
| // ---- Read-only API ---- | |||||
| /** Returns the schema for this builder. */ | |||||
| public Schema getSchema() { | |||||
| return schema; | |||||
| } | |||||
| /** | |||||
| * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned | |||||
| * ranges in the column are returned. | |||||
| */ | |||||
| public RangeTree getRanges(Assignment<?> assignment) { | |||||
| return getRanges(assignment.column(), assignment.value().orElse(null)); | |||||
| } | |||||
| /** | |||||
| * Returns ranges for the given value in the specified column. If the value is {@code null}, | |||||
| * then the unassigned ranges in the column are returned. If the column has no values assigned, | |||||
| * then the empty range is returned (or, if {@code value == null}, all ranges in the table). | |||||
| */ | |||||
| public RangeTree getRanges(Column<?> column, @Nullable Object value) { | |||||
| getSchema().checkColumn(column); | |||||
| DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); | |||||
| if (value != null) { | |||||
| return rangeMap != null ? rangeMap.getRanges(value) : RangeTree.empty(); | |||||
| } else { | |||||
| RangeTree all = getAllRanges(); | |||||
| return rangeMap != null ? all.subtract(rangeMap.getAssignedRanges()) : all; | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Returns all assigned ranges in the specified column. If the column doesn't exist in the | |||||
| * table, the empty range is returned). | |||||
| */ | |||||
| public RangeTree getAssignedRanges(Column<?> column) { | |||||
| getSchema().checkColumn(column); | |||||
| DisjointRangeMap.Builder<?> rangeMap = columnRanges.get(column); | |||||
| return rangeMap != null ? rangeMap.getAssignedRanges() : RangeTree.empty(); | |||||
| } | |||||
| /** | |||||
| * Returns ranges which were added to this builder, either directly via {@link #add(RangeTree)} | |||||
| * or indirectly via assignment. | |||||
| */ | |||||
| public RangeTree getAllRanges() { | |||||
| return allRanges; | |||||
| } | |||||
| /** Returns all ranges present in this table which are not assigned in any column. */ | |||||
| public RangeTree getUnassignedRanges() { | |||||
| RangeTree allAssigned = columnRanges.values().stream() | |||||
| .map(DisjointRangeMap.Builder::getAssignedRanges) | |||||
| .reduce(RangeTree.empty(), RangeTree::union); | |||||
| return allRanges.subtract(allAssigned); | |||||
| } | |||||
| /** | |||||
| * Returns a snapshot of the columns in schema order (including empty columns which may have | |||||
| * been added explicitly or exist due to values being unassigned). | |||||
| */ | |||||
| public ImmutableSet<Column<?>> getColumns() { | |||||
| return columnRanges.entrySet().stream() | |||||
| .map(Entry::getKey) | |||||
| .collect(toImmutableSet()); | |||||
| } | |||||
| // ---- Range assignment/addition/removal ---- | |||||
| /** | |||||
| * Assigns the specified ranges to the given assignment. If the value is {@code empty}, then | |||||
| * this has the effect of unassigning the given ranges, but does not remove them from the | |||||
| * table. If {@code ranges} is empty, this method has no effect. | |||||
| * | |||||
| * @throws RangeException if assignment cannot be performed according to the overwrite mode | |||||
| * (no change will have occurred in the table if this occurs). | |||||
| */ | |||||
| public Builder assign(Assignment<?> assignment, RangeTree ranges, OverwriteMode mode) { | |||||
| assign(assignment.column(), assignment.value().orElse(null), ranges, mode); | |||||
| return this; | |||||
| } | |||||
| /** | |||||
| * Assigns the specified ranges to a value within a column (other columns unaffected). If the | |||||
| * value is {@code null}, then this has the effect of unassigning the given ranges, but does | |||||
| * not remove them from the table. If {@code ranges} is empty, this method has no effect. | |||||
| * | |||||
| * @throws RangeException if assignment cannot be performed according to the overwrite mode | |||||
| * (no change will have occurred in the table if this occurs). | |||||
| */ | |||||
| public Builder assign( | |||||
| Column<?> column, @Nullable Object value, RangeTree ranges, OverwriteMode mode) { | |||||
| if (!ranges.isEmpty()) { | |||||
| getOrAddRangeMap(column).assign(value, ranges, mode); | |||||
| allRanges = allRanges.union(ranges); | |||||
| } | |||||
| return this; | |||||
| } | |||||
| /** | |||||
| * Unconditionally assigns all values, ranges and columns in the given table. This does not | |||||
| * clear any already assigned ranges. | |||||
| */ | |||||
| public Builder add(RangeTable table) { | |||||
| add(table.getAllRanges()); | |||||
| add(table.getColumns()); | |||||
| for (Column<?> column : table.getColumns()) { | |||||
| for (Object value : table.getAssignedValues(column)) { | |||||
| assign(column, value, table.getRanges(column, value), OverwriteMode.ALWAYS); | |||||
| } | |||||
| } | |||||
| return this; | |||||
| } | |||||
| /** | |||||
| * Ensures that the given ranges exist in the table, even if no assignments are ever made in | |||||
| * any columns. | |||||
| */ | |||||
| public Builder add(RangeTree ranges) { | |||||
| allRanges = allRanges.union(ranges); | |||||
| return this; | |||||
| } | |||||
| /** Ensures that the given column exists in the table (even if there are no assignments). */ | |||||
| public Builder add(Column<?> column) { | |||||
| getOrAddRangeMap(checkNotNull(column)); | |||||
| return this; | |||||
| } | |||||
| /** Ensures that the given columns exist in the table (even if there are no assignments). */ | |||||
| public Builder add(Collection<Column<?>> columns) { | |||||
| columns.forEach(this::add); | |||||
| return this; | |||||
| } | |||||
| /** Removes the given ranges from the table, including all assignments in all columns. */ | |||||
| public Builder remove(RangeTree ranges) { | |||||
| for (DisjointRangeMap.Builder<?> rangeMap : columnRanges.values()) { | |||||
| rangeMap.assign(null, ranges, OverwriteMode.ALWAYS); | |||||
| } | |||||
| allRanges = allRanges.subtract(ranges); | |||||
| return this; | |||||
| } | |||||
| /** Removes the given column from the table (has no effect if the column is not present). */ | |||||
| public Builder remove(Column<?> column) { | |||||
| columnRanges.remove(checkNotNull(column)); | |||||
| return this; | |||||
| } | |||||
| /** Removes the given columns from the table (has no effect if columns are not present). */ | |||||
| public Builder remove(Collection<Column<?>> columns) { | |||||
| columns.forEach(this::remove); | |||||
| return this; | |||||
| } | |||||
| /** Copies the assigned, non-default, values of the specified column. */ | |||||
| public <T extends Comparable<T>> Builder copyNonDefaultValues( | |||||
| Column<T> column, RangeTable src, OverwriteMode mode) { | |||||
| for (T v : src.getAssignedValues(column)) { | |||||
| if (!column.defaultValue().equals(v)) { | |||||
| assign(column, v, src.getRanges(column, v), mode); | |||||
| } | |||||
| } | |||||
| return this; | |||||
| } | |||||
| // ---- Applying changes ---- | |||||
| /** | |||||
| * Unconditionally applies the given change to this range table. Unlike | |||||
| * {@link #apply(Change, OverwriteMode)}, this method cannot fail, since changes are applied | |||||
| * unconditionally. | |||||
| */ | |||||
| public Builder apply(Change change) { | |||||
| return apply(change, OverwriteMode.ALWAYS); | |||||
| } | |||||
| /** | |||||
| * Applies the given change to this range table. A change adds ranges to the table, optionally | |||||
| * assigning them specific category values within columns. | |||||
| * | |||||
| * @throws RangeException if the overwrite mode prohibits the modification in this change (the | |||||
| * builder remains unchanged). | |||||
| */ | |||||
| public Builder apply(Change change, OverwriteMode mode) { | |||||
| RangeTree ranges = change.getRanges(); | |||||
| if (!ranges.isEmpty()) { | |||||
| // Check first that the assignments will succeed before attempting them (so as not to | |||||
| // leave the builder in an inconsistent state if it fails). | |||||
| if (mode != OverwriteMode.ALWAYS) { | |||||
| for (Assignment<?> a : change.getAssignments()) { | |||||
| getOrAddRangeMap(a.column()).checkAssign(a.value().orElse(null), ranges, mode); | |||||
| } | |||||
| } | |||||
| for (Assignment<?> a : change.getAssignments()) { | |||||
| getOrAddRangeMap(a.column()).assign(a.value().orElse(null), ranges, mode); | |||||
| } | |||||
| allRanges = allRanges.union(ranges); | |||||
| } | |||||
| return this; | |||||
| } | |||||
| // ---- Builder related methods ---- | |||||
| /** Builds the range table from the current state of the builder. */ | |||||
| public RangeTable build() { | |||||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columnRanges.entrySet().stream() | |||||
| .map(e -> immutableEntry(e.getKey(), e.getValue().build())) | |||||
| .sorted(comparingByKey(schema.ordering())) | |||||
| .collect(toImmutableMap(Entry::getKey, Entry::getValue)); | |||||
| return new RangeTable(schema, columnMap, allRanges, getUnassignedRanges()); | |||||
| } | |||||
| /** | |||||
| * Returns a new builder with the same state as the current builder. This is useful when state | |||||
| * is being built up incrementally. | |||||
| */ | |||||
| public Builder copy() { | |||||
| // Can be made more efficient if necessary... | |||||
| return build().toBuilder(); | |||||
| } | |||||
| /** Builds a minimal version of this table in which empty columns are no longer present. */ | |||||
| public RangeTable buildMinimal() { | |||||
| ImmutableSet<Column<?>> empty = columnRanges.entrySet().stream() | |||||
| .filter(e -> e.getValue().getAssignedRanges().isEmpty()) | |||||
| .map(Entry::getKey) | |||||
| .collect(toImmutableSet()); | |||||
| remove(empty); | |||||
| return build(); | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| return build().toString(); | |||||
| } | |||||
| } | |||||
| /** Returns a builder for a range table with the specified column mapping. */ | |||||
| public static Builder builder(Schema schema) { | |||||
| return new Builder(schema); | |||||
| } | |||||
| public static RangeTable from( | |||||
| Schema schema, Table<RangeSpecification, Column<?>, Optional<?>> t) { | |||||
| Builder table = builder(schema); | |||||
| for (Entry<RangeSpecification, Map<Column<?>, Optional<?>>> row : t.rowMap().entrySet()) { | |||||
| List<Assignment<?>> assignments = row.getValue().entrySet().stream() | |||||
| .map(e -> Assignment.ofOptional(e.getKey(), e.getValue())) | |||||
| .collect(toImmutableList()); | |||||
| table.apply(Change.of(RangeTree.from(row.getKey()), assignments)); | |||||
| } | |||||
| return table.build(); | |||||
| } | |||||
| // Definition of table columns. | |||||
| private final Schema schema; | |||||
| // Mapping to the assigned ranges for each column type. | |||||
| private final ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges; | |||||
| // All ranges in this table (possibly larger than union of all assigned ranges in all columns). | |||||
| private final RangeTree allRanges; | |||||
| // Ranges unassigned in any column (a subset of, or equal to allRanges). | |||||
| private final RangeTree unassigned; | |||||
| private RangeTable( | |||||
| Schema schema, | |||||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnRanges, | |||||
| RangeTree allRanges, | |||||
| RangeTree unassigned) { | |||||
| this.schema = checkNotNull(schema); | |||||
| this.columnRanges = checkNotNull(columnRanges); | |||||
| this.allRanges = checkNotNull(allRanges); | |||||
| this.unassigned = checkNotNull(unassigned); | |||||
| } | |||||
| /** Returns a builder initialized to the ranges and assignements in this table. */ | |||||
| public Builder toBuilder() { | |||||
| // Any mode would work here (the builder is empty) but the "always overwrite" mode is fastest. | |||||
| return new Builder(schema).add(this); | |||||
| } | |||||
| private Optional<DisjointRangeMap<?>> getRangeMap(Column<?> column) { | |||||
| return Optional.ofNullable(columnRanges.get(schema.checkColumn(column))); | |||||
| } | |||||
| public Schema getSchema() { | |||||
| return schema; | |||||
| } | |||||
| public ImmutableSet<Column<?>> getColumns() { | |||||
| return columnRanges.keySet(); | |||||
| } | |||||
| /** | |||||
| * Returns the set of values with assigned ranges in the given column. | |||||
| * | |||||
| * @throws IllegalArgumentException if the specified column does not exist in this table. | |||||
| */ | |||||
| public <T extends Comparable<T>> ImmutableSet<T> getAssignedValues(Column<T> column) { | |||||
| getSchema().checkColumn(column); | |||||
| // Safe since if the column is in the schema the values must have been checked when added. | |||||
| @SuppressWarnings("unchecked") | |||||
| DisjointRangeMap<T> rangeMap = | |||||
| (DisjointRangeMap<T>) columnRanges.get(schema.checkColumn(column)); | |||||
| return rangeMap != null ? rangeMap.getAssignedValues() : ImmutableSet.of(); | |||||
| } | |||||
| /** Returns all assigned ranges in the specified column. */ | |||||
| public RangeTree getAssignedRanges(Column<?> column) { | |||||
| return getRangeMap(column).map(DisjointRangeMap::getAssignedRanges).orElse(RangeTree.empty()); | |||||
| } | |||||
| /** | |||||
| * Returns ranges for the given assignment. If the value is {@code empty}, then the unassigned | |||||
| * ranges in the column are returned. | |||||
| */ | |||||
| public RangeTree getRanges(Assignment<?> assignment) { | |||||
| return getRanges(assignment.column(), assignment.value().orElse(null)); | |||||
| } | |||||
| /** | |||||
| * Returns ranges for the given value in the specified column. If the value is {@code null}, then | |||||
| * the unassigned ranges in the column are returned. | |||||
| */ | |||||
| public RangeTree getRanges(Column<?> column, @Nullable Object value) { | |||||
| getSchema().checkColumn(column); | |||||
| if (value == null) { | |||||
| return getAllRanges().subtract(getAssignedRanges(column)); | |||||
| } else { | |||||
| return getRangeMap(column).map(m -> m.getRanges(value)).orElse(RangeTree.empty()); | |||||
| } | |||||
| } | |||||
| /** Returns all ranges present in this table. */ | |||||
| public RangeTree getAllRanges() { | |||||
| return allRanges; | |||||
| } | |||||
| /** Returns all ranges present in this table which are not assigned in any column. */ | |||||
| public RangeTree getUnassignedRanges() { | |||||
| return unassigned; | |||||
| } | |||||
| /** | |||||
| * Returns whether this table contains no ranges (assigned or unassigned). Note that not all | |||||
| * empty tables are equal, since they may still differ by the columns they have. | |||||
| */ | |||||
| public boolean isEmpty() { | |||||
| return allRanges.isEmpty(); | |||||
| } | |||||
| /** | |||||
| * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the | |||||
| * returned table is the same as this table. | |||||
| */ | |||||
| public RangeTable subTable(RangeTree bounds, Set<Column<?>> columns) { | |||||
| // Columns must be a subset of what's allowed in this schema. | |||||
| columns.forEach(getSchema()::checkColumn); | |||||
| return subTable(bounds, getSchema(), columns); | |||||
| } | |||||
| /** | |||||
| * Returns a sub-table with rows and columns limited by the specified bounds. The schema of the | |||||
| * returned table is the same as this table. | |||||
| */ | |||||
| public RangeTable subTable(RangeTree bounds, Column<?> first, Column<?>... rest) { | |||||
| return subTable(bounds, ImmutableSet.<Column<?>>builder().add(first).add(rest).build()); | |||||
| } | |||||
| /** | |||||
| * Returns a table with rows and columns limited by the specified bounds. The schema of the | |||||
| * returned table is the given sub-schema. | |||||
| */ | |||||
| public RangeTable subTable(RangeTree bounds, Schema subSchema) { | |||||
| checkArgument(subSchema.isSubSchemaOf(getSchema()), | |||||
| "expected sub-schema of %s, got %s", getSchema(), subSchema); | |||||
| return subTable(bounds, subSchema, Sets.filter(getColumns(), subSchema::isValidColumn)); | |||||
| } | |||||
| // Callers MUST validate that the given set of columns are all valid in the subSchema. | |||||
| private RangeTable subTable(RangeTree bounds, Schema subSchema, Set<Column<?>> columns) { | |||||
| ImmutableMap<Column<?>, DisjointRangeMap<?>> columnMap = columns.stream() | |||||
| // Bound the given columns which exist in this table. | |||||
| .map(c -> immutableEntry(c, getRangeMap(c).map(r -> r.intersect(bounds)))) | |||||
| // Reject columns we didn't already have (but allow empty columns if they exist). | |||||
| .filter(e -> e.getValue().isPresent()) | |||||
| // Sort to our schema (since the given set of columns is not required to be sorted). | |||||
| .sorted(comparingByKey(schema.ordering())) | |||||
| .collect(toImmutableMap(Entry::getKey, e -> e.getValue().get())); | |||||
| return new RangeTable( | |||||
| subSchema, columnMap, allRanges.intersect(bounds), unassigned.intersect(bounds)); | |||||
| } | |||||
| /** | |||||
| * Returns the assigned rows of a RangeTable as a minimal list of disjoint changes, which can | |||||
| * be applied to an empty table to recreate this table. No two changes affect the same columns | |||||
| * in the same way and changes are ordered by the minimal values of their ranges. This is | |||||
| * essentially the same information as returned in {@link #toImmutableTable()} but does not | |||||
| * decompose ranges into range specifications, and it thus more amenable to compact | |||||
| * serialization. | |||||
| */ | |||||
| // Note that the minimal nature of the returned changes is essential for some algorithms that | |||||
| // operate on tables and this must not be changed. | |||||
| public ImmutableList<Change> toChanges() { | |||||
| Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); | |||||
| for (Column<?> c : getColumns()) { | |||||
| for (Object v : getAssignedValues(c)) { | |||||
| table.put(c, Optional.of(v), getRanges(c, v)); | |||||
| } | |||||
| } | |||||
| return toChanges(schema, table, getAllRanges()); | |||||
| } | |||||
| /** | |||||
| * Returns a minimum set of changes based on a table of assignments (column plus value). This is | |||||
| * not expected to be used often (since RangeTable is usually a better representation of the data | |||||
| * but can be useful in representing things like updates and patches in which only some rows or | |||||
| * columns are represented. | |||||
| * @param schema a schema for the columns in the given Table (used to determine column order). | |||||
| * @param table the table of assignments to assigned ranges. | |||||
| * @param allRanges the set of all ranges affected by the changes (this might include ranges not | |||||
| * present anywhere in the table, which correspond to empty rows). | |||||
| */ | |||||
| public static ImmutableList<Change> toChanges( | |||||
| Schema schema, Table<Column<?>, Optional<?>, RangeTree> table, RangeTree allRanges) { | |||||
| return ImmutableList.copyOf( | |||||
| transform(toRows(table, allRanges, schema.ordering()), Row::toChange)); | |||||
| } | |||||
| /** | |||||
| * Returns the data in this table represented as a {@link ImmutableTable}. Row keys are disjoint | |||||
| * range specifications (in order). The returned table has the smallest number of rows necessary | |||||
| * to represent the data in this range table. This is useful as a human readable serialized form | |||||
| * since any digit sequence in the table is contained in a unique row. | |||||
| */ | |||||
| public ImmutableTable<RangeSpecification, Column<?>, Optional<?>> toImmutableTable() { | |||||
| Table<Column<?>, Optional<?>, RangeTree> table = HashBasedTable.create(); | |||||
| for (Column<?> c : getColumns()) { | |||||
| for (Object v : getAssignedValues(c)) { | |||||
| table.put(c, Optional.of(v), getRanges(c, v)); | |||||
| } | |||||
| RangeTree unassigned = getAllRanges().subtract(getAssignedRanges(c)); | |||||
| if (!unassigned.isEmpty()) { | |||||
| table.put(c, Optional.empty(), unassigned); | |||||
| } | |||||
| } | |||||
| // Unique changes contain disjoint ranges, each associated with a unique combination of | |||||
| // assignments. | |||||
| TreeBasedTable<RangeSpecification, Column<?>, Optional<?>> out = | |||||
| TreeBasedTable.create(comparing(RangeSpecification::min), schema.ordering()); | |||||
| for (Change c : toChanges(schema, table, getAllRanges())) { | |||||
| List<RangeSpecification> keys = c.getRanges().asRangeSpecifications(); | |||||
| for (Assignment<?> a : c.getAssignments()) { | |||||
| for (RangeSpecification k : keys) { | |||||
| out.put(k, a.column(), a.value()); | |||||
| } | |||||
| } | |||||
| } | |||||
| return ImmutableTable.copyOf(out); | |||||
| } | |||||
| /** | |||||
| * Extracts a map for a single column in this table containing the minimal prefix tree for each | |||||
| * of the assigned values. The returned prefixes are the shortest prefixes possible for | |||||
| * distinguishing each value in the column. This method is especially useful if you want to | |||||
| * categorize partial digit sequences efficiently (i.e. prefix matching). | |||||
| * | |||||
| * <p>A minimal length can be specified to avoid creating prefixes that are "too short" for some | |||||
| * circumstances. Note that returned prefixes are never zero length, so {@code 1} is the lowest | |||||
| * meaningful value (although zero is still accepted to imply "no length restriction"). | |||||
| * | |||||
| * <p>Note that for some table data, it is technically impossible to obtain perfect prefix | |||||
| * information and in cases where overlap occurs, this method returns the shortest prefixes. This | |||||
| * means that for some valid inputs it might be true that more than one prefix is matched. It | |||||
| * is therefore up to the caller to determine a "best order" for testing the prefixes if this | |||||
| * matters. See {@link PrefixTree#minimal(RangeTree, RangeTree, int)} for more information. | |||||
| * | |||||
| * <p>An example of an "impossible" prefix would be if "123" has value A, "1234" has value B and | |||||
| * "12345" has value A again. In this case there is no prefix which can distinguish A and B | |||||
| * (the calculated map would be { "123" => A, "1234" => B }). In this situation, testing for the | |||||
| * longer prefix would help preserve as much of the original mapping as possible, but it would | |||||
| * never be possible to correctly distinguish all inputs. | |||||
| */ | |||||
| public <T extends Comparable<T>> ImmutableMap<T, PrefixTree> getPrefixMap( | |||||
| Column<T> column, int minPrefixLength) { | |||||
| ImmutableMap.Builder<T, PrefixTree> map = ImmutableMap.builder(); | |||||
| // Important: Don't just use the assigned ranges in the column, use the assigned ranges of the | |||||
| // entire table. This ensures unassigned ranges in the column are not accidentally captured by | |||||
| // any of the generated prefixes. | |||||
| RangeTree allRanges = getAllRanges(); | |||||
| for (T value : getAssignedValues(column)) { | |||||
| RangeTree include = getRanges(column, value); | |||||
| map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength)); | |||||
| } | |||||
| return map.build(); | |||||
| } | |||||
| // Constants for the simplification routine below. | |||||
| // Use -1 for unassigned rows (these are the "overlap" ranges and they don't have an index). | |||||
| private static final Column<Integer> INDEX = | |||||
| Column.create(Integer.class, "Change Index", -1, Integer::parseInt); | |||||
| private static final Schema INDEX_SCHEMA = Schema.builder().add(INDEX).build(); | |||||
| /** | |||||
| * Applies a simplification function to the rows defined by the given columns of this table. The | |||||
| * returned table will only have (at most) the specified columns present. | |||||
| * | |||||
| * <p>The simplification function is used to produce ranges which satisfy some business logic | |||||
| * criteria (such as having at most N significant digits, or merging lengths). Range | |||||
| * simplification enables easier comparison between data sources of differing precision, and | |||||
| * helps to reduce unnecessary complexity in generated regular expressions. | |||||
| * | |||||
| * <p>The simplification function should return a range that's at least as large as the input | |||||
| * range. This is to ensure that simplification cannot unassign ranges, even accidentally. The | |||||
| * returned range is automatically restricted to preserve disjoint ranges in the final table. | |||||
| * | |||||
| * <p>By passing a {@link Change} rather than just a {@link RangeTree}, the simplification | |||||
| * function has access to the row assignments for the range it is simplifying. This allows it to | |||||
| * select different strategies according to the values in specific columns (e.g. area code | |||||
| * length). | |||||
| * | |||||
| * <p>Note that unassigned ranges in the original table will be preserved and simplified ranges | |||||
| * will not overwrite them. This can be useful for defining "no go" ranges which should be left | |||||
| * alone. | |||||
| */ | |||||
| public RangeTable simplify( | |||||
| Function<Change, RangeTree> simplifyFn, | |||||
| int minPrefixLength, | |||||
| Column<?> first, | |||||
| Column<?>... rest) { | |||||
| // Build the single column "index" table (one index for each change) and simplify its ranges. | |||||
| // This only works because "toChanges()" produces the minimal set of changes such that each | |||||
| // unique combination of assignments appears only once. | |||||
| ImmutableList<Change> rows = subTable(getAllRanges(), first, rest).toChanges(); | |||||
| RangeTable simplifiedIndexTable = simplifyIndexTable(rows, simplifyFn, minPrefixLength); | |||||
| // Reconstruct the output table by assigning values from the original change set according to | |||||
| // the indices in the simplified index table. | |||||
| Builder simplified = RangeTable.builder(getSchema()).add(simplifiedIndexTable.getAllRanges()); | |||||
| for (int i : simplifiedIndexTable.getAssignedValues(INDEX)) { | |||||
| RangeTree simplifiedRange = simplifiedIndexTable.getRanges(INDEX, i); | |||||
| for (Assignment<?> a : rows.get(i).getAssignments()) { | |||||
| simplified.assign(a, simplifiedRange, OverwriteMode.NEVER); | |||||
| } | |||||
| } | |||||
| return simplified.build(); | |||||
| } | |||||
| /** | |||||
| * Helper function to simplify an index table based on the given rows. The resulting table will | |||||
| * have a single "index" column with simplified ranges, where the index value {@code N} | |||||
| * references the Nth row in the given list of disjoint changes. This is a 3 stage process: | |||||
| * <ol> | |||||
| * <li>Step 1: Determine which ranges can overlap with respect to set of range prefixes. | |||||
| * <li>Step 2: Do simplification on the non-overlapping "prefix disjoint" ranges in the table, | |||||
| * which are then be re-partitioned by the disjoint prefixes. | |||||
| * <li>Step 3: Copy over any overlapping ranges from the original table (these don't get | |||||
| * simplified since it's not possible to easily re-pertition them). | |||||
| * </ol> | |||||
| */ | |||||
| private static <T extends Comparable<T>> RangeTable simplifyIndexTable( | |||||
| ImmutableList<Change> rows, Function<Change, RangeTree> simplifyFn, int minPrefixLength) { | |||||
| RangeTable indexTable = makeIndexTable(rows); | |||||
| // Step 1: Determine overlapping ranges from the index table, retaining minimum prefix length. | |||||
| ImmutableMap<Integer, PrefixTree> nonDisjointPrefixes = | |||||
| indexTable.getPrefixMap(INDEX, minPrefixLength); | |||||
| // Don't just use the assigned ranges (we need to account for valid but unassigned ranges when | |||||
| // determining overlaps). | |||||
| RangeTree allRanges = indexTable.getAllRanges(); | |||||
| RangeTree overlaps = RangeTree.empty(); | |||||
| for (int n : indexTable.getAssignedValues(INDEX)) { | |||||
| RangeTree otherRanges = allRanges.subtract(indexTable.getRanges(INDEX, n)); | |||||
| overlaps = overlaps.union(nonDisjointPrefixes.get(n).retainFrom(otherRanges)); | |||||
| } | |||||
| // Step 2: Determine the "prefix disjoint" ranges in a new table and simplify it. | |||||
| // | |||||
| // Before getting the new set of prefixes, add the overlapping ranges back to the table, but | |||||
| // without assigning them to anything. This keeps the generated prefixes as long as necessary | |||||
| // to avoid creating conflicting assignments for different values. Essentially we're trying to | |||||
| // keep ranges "away from" any overlaps. Note however that it is still possible for simplified | |||||
| // ranges encroach on the overlapping areas, so we must still forcibly overwrite the original | |||||
| // overlapping values after siplification. Consider: | |||||
| // A = { "12x", "12xxx" }, B = { "123x" } | |||||
| // where the simplification function just creates any "any" range for all lengths between the | |||||
| // minimum and maximum range lengths (e.g. { "123", "45678" } ==> { "xxx", "xxxx", "xxxxx" }. | |||||
| // | |||||
| // The (non disjoint) prefix table is Pre(A) => { "12" }, Pre(B) => { "123" } and this | |||||
| // captures the overlaps: | |||||
| // Pre(A).retainFrom(B) = { "123x" } = B | |||||
| // Pre(B).retainFrom(A) = { "123xx" } | |||||
| // | |||||
| // Since is of "B" is entirely contained by the overlap, it is not simplified, but A is | |||||
| // simplified to: | |||||
| // { "xxx", "xxxx", "xxxxx" } | |||||
| // and the re-captured by the "disjoint" prefix (which is still just "12") to: | |||||
| // { "12x", "12xx", "12xxx" } | |||||
| // | |||||
| // However now, when the original overlaps are added back at the end (in step 3) we find that | |||||
| // both "123xx" already exists (with the same index) and "123x" exists with a different index. | |||||
| // The resolution is to just overwrite all overlaps back into the table, since these represent | |||||
| // the original (unsimplified) values. | |||||
| // | |||||
| // Thus in this case, the simplified table is: | |||||
| // Sim(A) = { "12x", "12[0-24-9]x", "12xxx" }, Sim(B) = { "123x" } | |||||
| // | |||||
| // And it is still true that: Sim(A).containsAll(A) and Sim(B).containsAll(B) | |||||
| RangeTable prefixDisjointTable = indexTable | |||||
| .subTable(allRanges.subtract(overlaps), INDEX) | |||||
| .toBuilder() | |||||
| .add(overlaps) | |||||
| .build(); | |||||
| // NOTE: Another way to do this would be to implement an "exclusive prefix" method which could | |||||
| // be used to immediately return a set of truly "disjoint" prefixes (although this would change | |||||
| // the algorithm's behaviour since more ranges would be considered "overlapping" than now). | |||||
| // TODO: Experiment with an alternate "exclusive" prefix function. | |||||
| ImmutableMap<Integer, PrefixTree> disjointPrefixes = prefixDisjointTable.getPrefixMap(INDEX, 1); | |||||
| // Not all values from the original table need be present in the derived table (since some | |||||
| // overlaps account for all the ranges of a value). | |||||
| Builder simplified = RangeTable.builder(INDEX_SCHEMA); | |||||
| for (int n : prefixDisjointTable.getAssignedValues(INDEX)) { | |||||
| RangeTree disjointRange = prefixDisjointTable.getRanges(INDEX, n); | |||||
| // Pass just the assignments, not the whole row (Change) because that also contains a range, | |||||
| // which might not be the same as the disjoint range (so it could be rather confusing). | |||||
| PrefixTree disjointPrefix = disjointPrefixes.get(n); | |||||
| RangeTree simplifiedRange = | |||||
| simplifyFn.apply(Change.of(disjointRange, rows.get(n).getAssignments())); | |||||
| // Technically this check is not strictly required, but there's probably no good use-case in | |||||
| // which you'd want to remove assignments via the simplification process. | |||||
| checkArgument(simplifiedRange.containsAll(disjointRange), | |||||
| "simplification should return a superset of the given range\n" | |||||
| + "input: %s\n" | |||||
| + "output: %s\n" | |||||
| + "missing: %s", | |||||
| disjointRange, simplifiedRange, disjointRange.subtract(simplifiedRange)); | |||||
| // Repartition the simplified ranges by the "disjoint" prefixes to restore most of the | |||||
| // simplified ranges. These ranges should never overlap with each other. | |||||
| RangeTree repartitionedRange = disjointPrefix.retainFrom(simplifiedRange); | |||||
| simplified.assign(INDEX, n, repartitionedRange, OverwriteMode.NEVER); | |||||
| } | |||||
| // Step 3: Copy remaining overlapping ranges from the original table back into the result. | |||||
| // Note that we may end up overwriting values here, but that's correct since it restores | |||||
| // original "unsimplifiable" ranges. | |||||
| for (int n : indexTable.getAssignedValues(INDEX)) { | |||||
| simplified.assign( | |||||
| INDEX, n, indexTable.getRanges(INDEX, n).intersect(overlaps), OverwriteMode.ALWAYS); | |||||
| } | |||||
| return simplified.build(); | |||||
| } | |||||
| // Helper to make a table with a single column than references a list of disjoint changes by | |||||
| // index (against the range of that change). | |||||
| private static RangeTable makeIndexTable(ImmutableList<Change> rows) { | |||||
| Builder indexTable = RangeTable.builder(INDEX_SCHEMA); | |||||
| for (int i = 0; i < rows.size(); i++) { | |||||
| // Empty rows are added to the table, but not assigned an index. Their existence in the index | |||||
| // table prevents over simplification from affecting unassigned rows of the original table. | |||||
| if (rows.get(i).getAssignments().isEmpty()) { | |||||
| indexTable.add(rows.get(i).getRanges()); | |||||
| } else { | |||||
| indexTable.assign(INDEX, i, rows.get(i).getRanges(), OverwriteMode.NEVER); | |||||
| } | |||||
| } | |||||
| return indexTable.build(); | |||||
| } | |||||
| @Override | |||||
| public boolean equals(Object obj) { | |||||
| if (!(obj instanceof RangeTable)) { | |||||
| return false; | |||||
| } | |||||
| RangeTable other = (RangeTable) obj; | |||||
| return this == other | |||||
| || (schema.equals(other.schema) | |||||
| && allRanges.equals(other.allRanges) | |||||
| && columnRanges.values().asList().equals(other.columnRanges.values().asList())); | |||||
| } | |||||
| @Override | |||||
| public int hashCode() { | |||||
| // This could be memoized if it turns out to be slow. | |||||
| return schema.hashCode() ^ columnRanges.hashCode() ^ allRanges.hashCode(); | |||||
| } | |||||
| // TODO: Prettier format for toString(). | |||||
| @Override | |||||
| public final String toString() { | |||||
| ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = toImmutableTable(); | |||||
| return table.rowMap().entrySet().stream() | |||||
| .map(e -> String.format("%s, %s", e.getKey(), rowToString(e.getValue()))) | |||||
| .collect(joining("\n")); | |||||
| } | |||||
| private static String rowToString(Map<Column<?>, Optional<?>> r) { | |||||
| return r.values().stream() | |||||
| .map(v -> v.map(Object::toString).orElse("UNSET")) | |||||
| .collect(joining(", ")); | |||||
| } | |||||
| // Helper method to convert a table of values into a minimal set of changes. This is used to | |||||
| // turn a single RangeTable into an ImmutableTable, but also to convert a Patch into a minimal | |||||
| // sequence of Changes. Each returned "row" defines a range, and a unique sequence of assignments | |||||
| // over that range (i.e. no two rows have the same assignments in). The assignments are ordered | |||||
| // in column order within each row, and the rows are ordered by the minimum digit sequence in | |||||
| // each range and the ranges form a disjoint covering of the ranges in the original table. | |||||
| // | |||||
| // See go/phonenumber-v2-data-structure for more details. | |||||
| private static ImmutableList<Row> toRows( | |||||
| Table<Column<?>, Optional<?>, RangeTree> src, | |||||
| RangeTree allRanges, | |||||
| Comparator<Column<?>> columnOrdering) { | |||||
| // Get the non-empty columns in _reverse_ iteration order. We build up rows as a linked list | |||||
| // structure, started from the "right hand side". This avoids a lot of copying as new columns | |||||
| // are processed. | |||||
| ImmutableList<Column<?>> reversedColumns = src.rowMap().entrySet().stream() | |||||
| .filter(e -> !e.getValue().isEmpty()) | |||||
| .map(Entry::getKey) | |||||
| .sorted(columnOrdering.reversed()) | |||||
| .collect(toImmutableList()); | |||||
| List<Row> uniqueRows = new ArrayList<>(); | |||||
| uniqueRows.add(Row.empty(allRanges)); | |||||
| for (Column<?> col : reversedColumns) { | |||||
| // Loop backward here so that rows can be (a) removed in place and (b) added at the end. | |||||
| for (int i = uniqueRows.size() - 1; i >= 0; i--) { | |||||
| Row row = uniqueRows.get(i); | |||||
| // Track the unprocessed range for each row as we extend it. | |||||
| RangeTree remainder = row.getRanges(); | |||||
| for (Entry<Optional<?>, RangeTree> e : src.row(col).entrySet()) { | |||||
| RangeTree overlap = e.getValue().intersect(remainder); | |||||
| if (overlap.isEmpty()) { | |||||
| continue; | |||||
| } | |||||
| // Extend the existing row by the current column value and reduce the remaining ranges. | |||||
| uniqueRows.add(Row.of(overlap, col, e.getKey(), row)); | |||||
| remainder = remainder.subtract(overlap); | |||||
| if (remainder.isEmpty()) { | |||||
| // We've accounted for all of the existing row in the new column, so remove it. | |||||
| uniqueRows.remove(i); | |||||
| break; | |||||
| } | |||||
| } | |||||
| if (!remainder.isEmpty()) { | |||||
| // The existing row is not completely covered by the new column, so retain what's left. | |||||
| uniqueRows.set(i, row.bound(remainder)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return ImmutableList.sortedCopyOf(comparing(r -> r.getRanges().first()), uniqueRows); | |||||
| } | |||||
| /** | |||||
| * A notional "row" with some set of assignments in a range table or table like structure. Note | |||||
| * that a Row can represent unassignment as well as assignment, and not all rows need to contain | |||||
| * all columns. Rows are used for representing value in a table, but also changes between tables. | |||||
| */ | |||||
| @AutoValue | |||||
| abstract static class Row implements Iterable<Assignment<?>> { | |||||
| private static Row empty(RangeTree row) { | |||||
| return new AutoValue_RangeTable_Row(row, null); | |||||
| } | |||||
| private static Row of(RangeTree row, Column<?> col, Optional<?> val, Row next) { | |||||
| checkArgument(!row.isEmpty(), "empty ranges not permitted (col=%s, val=%s)", col, val); | |||||
| return new AutoValue_RangeTable_Row( | |||||
| row, new AutoValue_RangeTable_Cell(Assignment.ofOptional(col, val), next.head())); | |||||
| } | |||||
| public abstract RangeTree getRanges(); | |||||
| @Nullable abstract Cell head(); | |||||
| Change toChange() { | |||||
| return Change.of(getRanges(), this); | |||||
| } | |||||
| private Row bound(RangeTree ranges) { | |||||
| return new AutoValue_RangeTable_Row(getRanges().intersect(ranges), head()); | |||||
| } | |||||
| @Override | |||||
| public Iterator<Assignment<?>> iterator() { | |||||
| return new UnmodifiableIterator<Assignment<?>>() { | |||||
| @Nullable private Cell cur = Row.this.head(); | |||||
| @Override | |||||
| public boolean hasNext() { | |||||
| return cur != null; | |||||
| } | |||||
| @Override | |||||
| public Assignment<?> next() { | |||||
| Cell c = cur; | |||||
| if (c == null) { | |||||
| throw new NoSuchElementException(); | |||||
| } | |||||
| cur = cur.next(); | |||||
| return c.assignment(); | |||||
| } | |||||
| }; | |||||
| } | |||||
| @Override | |||||
| public final String toString() { | |||||
| return "Row{" + getRanges() + " >> " + Iterables.toString(this) + "}"; | |||||
| } | |||||
| } | |||||
| @AutoValue | |||||
| abstract static class Cell { | |||||
| abstract Assignment<?> assignment(); | |||||
| @Nullable abstract Cell next(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,169 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.collect.ImmutableCollection; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.Ordering; | |||||
| import java.util.Comparator; | |||||
| /** | |||||
| * Representation of ordered {@link Column}s in a table. Schemas define columns in both | |||||
| * {@code RangeTable} and {@code CsvTable}. | |||||
| */ | |||||
| @AutoValue | |||||
| public abstract class Schema { | |||||
| /** | |||||
| * Builder for a table schema. Columns are ordered in the order in which they, or their owning | |||||
| * group is added to the schema. | |||||
| */ | |||||
| public static final class Builder { | |||||
| private final ImmutableSet.Builder<String> names = ImmutableSet.builder(); | |||||
| private final ImmutableMap.Builder<String, Column<?>> columns = ImmutableMap.builder(); | |||||
| private final ImmutableMap.Builder<String, ColumnGroup<?, ?>> groups = ImmutableMap.builder(); | |||||
| /** Adds the given column to the schema. */ | |||||
| public Builder add(Column<?> column) { | |||||
| names.add(column.getName()); | |||||
| columns.put(column.getName(), column); | |||||
| return this; | |||||
| } | |||||
| /** Adds the given column group to the schema. */ | |||||
| public Builder add(ColumnGroup<?, ?> group) { | |||||
| names.add(group.prototype().getName()); | |||||
| groups.put(group.prototype().getName(), group); | |||||
| return this; | |||||
| } | |||||
| public Schema build() { | |||||
| return new AutoValue_Schema(names.build(), columns.build(), groups.build()); | |||||
| } | |||||
| } | |||||
| private static final Schema EMPTY = builder().build(); | |||||
| /** Returns an empty schema with no assigned columns. */ | |||||
| public static Schema empty() { | |||||
| return EMPTY; | |||||
| } | |||||
| /** Returns a new schema builder. */ | |||||
| public static Builder builder() { | |||||
| return new Builder(); | |||||
| } | |||||
| // Visible for AutoValue only. | |||||
| Schema() {} | |||||
| // List of column/group names used to determine column order: | |||||
| // E.g. if "names" is: ["col1", "grp1", "col2", "col3"] | |||||
| // You can have the table <<"col1", "grp1:xx", "grp1:yy", "col3">> | |||||
| // Not all columns need to be present and groups are ordered contiguously as the group prefix | |||||
| // appears in the names list. | |||||
| abstract ImmutableSet<String> names(); | |||||
| abstract ImmutableMap<String, Column<?>> columns(); | |||||
| abstract ImmutableMap<String, ColumnGroup<?, ?>> groups(); | |||||
| /** | |||||
| * Returns the column for the specified key string. For "plain" columns (not in groups) the key | |||||
| * is just the column name. For group columns, the key takes the form "prefix:suffix", where the | |||||
| * prefix is the name of the "prototype" column, and the "suffix" is an ID of a value within the | |||||
| * group. For example: | |||||
| * <p> {@oode | |||||
| * // Schema has a plain column called "Type" in it. | |||||
| * typeCol = table.getColumn("Type"); | |||||
| * | |||||
| * // Schema has a group called "Region" in it which can parse RegionCodes. | |||||
| * usRegionCol = table.getColumn("Region:US"); | |||||
| * }</p> | |||||
| */ | |||||
| public Column<?> getColumn(String key) { | |||||
| int split = key.indexOf(':'); | |||||
| Column<?> column; | |||||
| if (split == -1) { | |||||
| column = columns().get(key); | |||||
| } else { | |||||
| ColumnGroup<?, ?> group = groups().get(key.substring(0, split)); | |||||
| checkArgument(group != null, "invalid column %s, not in schema: %s", key, this); | |||||
| column = group.getColumnFromId(key.substring(split + 1)); | |||||
| } | |||||
| checkArgument(column != null, "invalid column %s, not in schema: %s", key, this); | |||||
| return column; | |||||
| } | |||||
| /** Returns whether the given column is valid within this schema. */ | |||||
| public <T extends Comparable<T>> boolean isValidColumn(Column<T> column) { | |||||
| int split = column.getName().indexOf(':'); | |||||
| if (split == -1) { | |||||
| return columns().containsValue(column); | |||||
| } else { | |||||
| ColumnGroup<?, ?> group = groups().get(column.getName().substring(0, split)); | |||||
| return group != null && column.isIn(group); | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Checks whether the given column is valid within this schema, otherwise throws | |||||
| * IllegalArgumentException. This is expected to be internal use only, since table users are | |||||
| * meant to always know which columns are valid. | |||||
| */ | |||||
| <T extends Comparable<T>> Column<T> checkColumn(Column<T> column) { | |||||
| checkArgument(isValidColumn(column), "invalid column %s, not in schema: %s", column, this); | |||||
| return column; | |||||
| } | |||||
| /** | |||||
| * Returns whether the this schema has a subset of columns/groups, in the same order as the | |||||
| * given schema. | |||||
| */ | |||||
| public boolean isSubSchemaOf(Schema schema) { | |||||
| return schema.columns().values().containsAll(columns().values()) | |||||
| && schema.groups().entrySet().containsAll(groups().entrySet()) | |||||
| && names().asList().equals( | |||||
| schema.names().stream().filter(names()::contains).collect(toImmutableList())); | |||||
| } | |||||
| /** Returns an ordering for all columns in this schema. */ | |||||
| public Comparator<Column<?>> ordering() { | |||||
| return Comparator | |||||
| .comparing(Schema::getPrefix, Ordering.explicit(names().asList())) | |||||
| .thenComparing(Schema::getSuffix); | |||||
| } | |||||
| public ImmutableSet<String> getNames() { | |||||
| return names(); | |||||
| } | |||||
| public ImmutableCollection<Column<?>> getColumns() { | |||||
| return columns().values(); | |||||
| } | |||||
| private static String getPrefix(Column<?> column) { | |||||
| int split = column.getName().indexOf(':'); | |||||
| return split != -1 ? column.getName().substring(0, split) : column.getName(); | |||||
| } | |||||
| private static String getSuffix(Column<?> column) { | |||||
| int split = column.getName().indexOf(':'); | |||||
| return split == -1 ? "" : column.getName().substring(split + 1); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,69 @@ | |||||
| // Copyright (C) 2017 The Libphonenumber Authors. | |||||
| // | |||||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| // you may not use this file except in compliance with the License. | |||||
| // You may obtain a copy of the License at | |||||
| // | |||||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software | |||||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| // See the License for the specific language governing permissions and | |||||
| // limitations under the License. | |||||
| syntax = "proto3"; | |||||
| package i18n.phonenumbers.metadata; | |||||
| option java_package = "com.google.i18n.phonenumbers.metadata.proto"; | |||||
| // The possible provenance which can be assigned to a range. | |||||
| // This enum is NOT stable and must only be stored in text based protocol | |||||
| // buffers. | |||||
| enum Provenance { | |||||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||||
| // No data should actually ever have this value. | |||||
| UNKNOWN = 0; | |||||
| // Indicates that the ranges were defined in an official ITU document. The | |||||
| // comment associated with this range should contain a link to the document. | |||||
| // This is the most trusted for of evidence and will usually replace any | |||||
| // previous "lower" provenance (though this is not always true for some | |||||
| // countries). | |||||
| ITU = 10; | |||||
| // Indicates that the ranges were defined in an official IR21 document. The | |||||
| // comment associated with this range should contain a link to the document. | |||||
| // This is the most trusted for of evidence and will usually replace any | |||||
| // previous "lower" provenance (though this is not always true for some | |||||
| // countries). | |||||
| IR21 = 20; | |||||
| // Indicates that evidence for a range was found in a website belonging to | |||||
| // an official, government endorsed entity (e.g. national telecoms operator), | |||||
| // but not part of either an official ITU or IR21 document. | |||||
| // The comment associated with this range should contain a URL to the | |||||
| // appropriate page where the evidence was found. | |||||
| GOVERNMENT = 30; | |||||
| // Indicates that evidence for a range was found in a website belonging to a | |||||
| // telecoms operators (mobile carrier, MVNO etc...). The comment associated | |||||
| // with this range should contain a URL to the appropriate page where the | |||||
| // evidence was found. | |||||
| TELECOMS = 40; | |||||
| // Indicates that evidence for a range was found in an unofficial website | |||||
| // (e.g Facebook or a general company home page). The comment associated | |||||
| // with this range should contain a URL to the appropriate page where the | |||||
| // evidence was found. | |||||
| WEB = 50; | |||||
| // Used to indicate special situations in which a number is accepted as | |||||
| // valid, despite no citeable evidence. When this provenance the coment text | |||||
| // should indicate some bug report or internal reasoning as to why this range | |||||
| // should be accepted. This provenance should be used only in exceptional | |||||
| // circumstances and the comment may be scrubbed from externally published | |||||
| // versions of the range data. | |||||
| INTERNAL = 100; | |||||
| } | |||||
| @ -0,0 +1,82 @@ | |||||
| // Copyright (C) 2017 The Libphonenumber Authors. | |||||
| // | |||||
| // Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| // you may not use this file except in compliance with the License. | |||||
| // You may obtain a copy of the License at | |||||
| // | |||||
| // http://www.apache.org/licenses/LICENSE-2.0 | |||||
| // | |||||
| // Unless required by applicable law or agreed to in writing, software | |||||
| // distributed under the License is distributed on an "AS IS" BASIS, | |||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| // See the License for the specific language governing permissions and | |||||
| // limitations under the License. | |||||
| syntax = "proto3"; | |||||
| package i18n.phonenumbers.metadata; | |||||
| option java_package = "com.google.i18n.phonenumbers.metadata.proto"; | |||||
| // Enum names must match the element names in the XML metadata modulo casing. | |||||
| enum XmlNumberType { | |||||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||||
| // No data should actually ever have this value. | |||||
| XML_UNKNOWN = 0; | |||||
| XML_NO_INTERNATIONAL_DIALLING = 1; | |||||
| XML_FIXED_LINE = 2; | |||||
| XML_MOBILE = 3; | |||||
| XML_PAGER = 4; | |||||
| XML_TOLL_FREE = 5; | |||||
| XML_PREMIUM_RATE = 6; | |||||
| XML_SHARED_COST = 7; | |||||
| XML_PERSONAL_NUMBER = 8; | |||||
| XML_VOIP = 9; | |||||
| XML_UAN = 10; | |||||
| XML_VOICEMAIL = 11; | |||||
| } | |||||
| // Validation types for phone number ranges. Each valid range is categorized as | |||||
| // exactly one of these types. This does not include NO_INTERNATIONAL_DIALLING | |||||
| // since it is an attribute of ranges rather than their fundamental type. | |||||
| enum ValidNumberType { | |||||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||||
| // No data should actually ever have this value. | |||||
| UNKNOWN = 0; | |||||
| FIXED_LINE = 1; | |||||
| MOBILE = 2; | |||||
| FIXED_LINE_OR_MOBILE = 3; | |||||
| PAGER = 4; | |||||
| TOLL_FREE = 5; | |||||
| PREMIUM_RATE = 6; | |||||
| SHARED_COST = 7; | |||||
| PERSONAL_NUMBER = 8; | |||||
| VOIP = 9; | |||||
| UAN = 10; | |||||
| VOICEMAIL = 11; | |||||
| } | |||||
| // Enum names must match the element names in the XML metadata modulo casing. | |||||
| // Unlike main metadata, these types are not required to be exclusive a number. | |||||
| enum XmlShortcodeType { | |||||
| // Having a distinct default/unknown enum with a zero value is a proto3 thing. | |||||
| // No data should actually ever have this value. | |||||
| SC_UNKNOWN = 0; | |||||
| // General short codes without a more specific representation (unlike | |||||
| // generalDesc, which can just be the leading digits, this must be precise). | |||||
| SC_SHORT_CODE = 1; | |||||
| // Mutually exclusive sub-set of types for tariff. | |||||
| SC_TOLL_FREE = 2; | |||||
| SC_STANDARD_RATE = 3; | |||||
| SC_PREMIUM_RATE = 4; | |||||
| // Use-case types. | |||||
| SC_CARRIER_SPECIFIC = 5; | |||||
| SC_EMERGENCY = 6; | |||||
| SC_EXPANDED_EMERGENCY = 7; | |||||
| SC_SMS_SERVICES = 8; | |||||
| } | |||||
| @ -0,0 +1,134 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class DigitSequenceTest { | |||||
| @Test | |||||
| public void testEmpty() { | |||||
| Object e = DigitSequence.of(""); | |||||
| assertThat(e).isSameInstanceAs(DigitSequence.empty()); | |||||
| assertThat(DigitSequence.empty().length()).isEqualTo(0); | |||||
| assertThrows(IndexOutOfBoundsException.class, () -> DigitSequence.empty().getDigit(0)); | |||||
| assertThat(DigitSequence.empty().toString()).isEqualTo(""); | |||||
| } | |||||
| @Test | |||||
| public void testCreate() { | |||||
| DigitSequence s = DigitSequence.of("0123456789"); | |||||
| assertThat(s).isEqualTo(DigitSequence.of("0123456789")); | |||||
| assertThat(s).isNotEqualTo(DigitSequence.of("1111111111")); | |||||
| } | |||||
| @Test | |||||
| public void testGetDigit() { | |||||
| DigitSequence s = DigitSequence.of("0123456789"); | |||||
| assertThat(s.length()).isEqualTo(10); | |||||
| for (int n = 0; n < s.length(); n++) { | |||||
| assertThat(s.getDigit(n)).isEqualTo(n); | |||||
| } | |||||
| assertThat(s.toString()).isEqualTo("0123456789"); | |||||
| } | |||||
| @Test | |||||
| public void testBadArguments() { | |||||
| assertThrows(NullPointerException.class, () -> DigitSequence.of(null)); | |||||
| assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("123X")); | |||||
| // Too long (19 digits). | |||||
| assertThrows(IllegalArgumentException.class, () -> DigitSequence.of("1234567890123456789")); | |||||
| } | |||||
| @Test | |||||
| public void testMin() { | |||||
| assertThat(domain().minValue()).isEqualTo(DigitSequence.empty()); | |||||
| assertThat(domain().next(DigitSequence.empty())).isNotNull(); | |||||
| assertThat(domain().previous(DigitSequence.empty())).isNull(); | |||||
| } | |||||
| @Test | |||||
| public void testMax() { | |||||
| DigitSequence max = DigitSequence.of("999999999999999999"); | |||||
| assertThat(domain().maxValue()).isEqualTo(max); | |||||
| assertThat(domain().previous(max)).isNotNull(); | |||||
| assertThat(domain().next(max)).isNull(); | |||||
| } | |||||
| @Test | |||||
| public void testDistance() { | |||||
| assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("0"))) | |||||
| .isEqualTo(1); | |||||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("1"))) | |||||
| .isEqualTo(1); | |||||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("00"))) | |||||
| .isEqualTo(10); | |||||
| assertThat(domain().distance(DigitSequence.of("0"), DigitSequence.of("10"))) | |||||
| .isEqualTo(20); | |||||
| assertThat(domain().distance(DigitSequence.of("10"), DigitSequence.of("0"))) | |||||
| .isEqualTo(-20); | |||||
| assertThat(domain().distance(DigitSequence.empty(), DigitSequence.of("000000"))) | |||||
| .isEqualTo(111111); | |||||
| assertThat(domain().distance(DigitSequence.of("000"), DigitSequence.of("000000"))) | |||||
| .isEqualTo(111000); | |||||
| // Max distance is one less than the total number of digit sequences. | |||||
| assertThat(domain().distance(domain().minValue(), domain().maxValue())) | |||||
| .isEqualTo(1111111111111111110L); | |||||
| } | |||||
| @Test | |||||
| public void testLexicographicalOrdering() { | |||||
| testComparator( | |||||
| DigitSequence.empty(), | |||||
| DigitSequence.of("0"), | |||||
| DigitSequence.of("1"), | |||||
| DigitSequence.of("9"), | |||||
| DigitSequence.of("00"), | |||||
| DigitSequence.of("01"), | |||||
| DigitSequence.of("10"), | |||||
| DigitSequence.of("99"), | |||||
| DigitSequence.of("000"), | |||||
| DigitSequence.of("123"), | |||||
| DigitSequence.of("124"), | |||||
| DigitSequence.of("999")); | |||||
| } | |||||
| @Test | |||||
| public void testExtend() { | |||||
| assertThat(DigitSequence.empty().extendBy(0)).isEqualTo(DigitSequence.of("0")); | |||||
| assertThat(DigitSequence.of("1234").extendBy(DigitSequence.of("5678"))) | |||||
| .isEqualTo(DigitSequence.of("12345678")); | |||||
| } | |||||
| private static <T extends Comparable<T>> void testComparator(T... items) { | |||||
| for (int i = 0; i < items.length; i++) { | |||||
| assertThat(items[i]).isEqualTo(items[i]); | |||||
| assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]); | |||||
| for (int j = i + 1; j < items.length; j++) { | |||||
| assertThat(items[i]).isNotEqualTo(items[j]); | |||||
| assertThat(items[i]).isLessThan(items[j]); | |||||
| assertThat(items[j]).isGreaterThan(items[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,213 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTree.empty; | |||||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||||
| import java.util.Arrays; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class PrefixTreeTest { | |||||
| @Test | |||||
| public void testNewInstancesNormalized() { | |||||
| assertThat(prefixes("123", "1234")).containsExactly("123"); | |||||
| assertThat(prefixes("70x", "7[1-9]")).containsExactly("7"); | |||||
| // Regression test for b/68707522 | |||||
| assertThat(prefixes("123xxx", "123x_xxx", "567xxx", "567x_xxx")).containsExactly("123", "567"); | |||||
| } | |||||
| @Test | |||||
| public void testRetainFrom() { | |||||
| PrefixTree prefix = prefixes("123", "124", "126", "555"); | |||||
| RangeTree ranges = ranges("1xxxxxx", "5xxxxxx", "6xxxxxx"); | |||||
| assertThat(prefix.retainFrom(ranges)).containsExactly("12[346]xxxx", "555xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testPrefixes() { | |||||
| PrefixTree prefix = prefixes("123", "124", "126", "555"); | |||||
| assertThat(prefix.prefixes(seq("1230000"))).isTrue(); | |||||
| assertThat(prefix.prefixes(seq("555000"))).isTrue(); | |||||
| assertThat(prefix.prefixes(seq("12"))).isFalse(); | |||||
| assertThat(prefix.prefixes(seq("120000"))).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testEmptyVsZeroLength() { | |||||
| PrefixTree empty = PrefixTree.from(empty()); | |||||
| PrefixTree zeroLength = prefixes("xxx"); | |||||
| assertThat(empty).isEmpty(); | |||||
| assertThat(zeroLength).isNotEmpty(); | |||||
| assertThat(zeroLength).hasSize(1); | |||||
| assertThat(zeroLength).containsExactly(RangeSpecification.empty()); | |||||
| // While the empty prefix tree filters out everything, the zero length tree allows everything | |||||
| // to pass. This is because the zero length prefix tree represents a single prefix of length | |||||
| // zero and all digit sequences start with a zero length sub-sequence. | |||||
| RangeTree ranges = ranges("12x", "3xx", "456"); | |||||
| assertThat(empty.retainFrom(ranges)).isEqualTo(empty()); | |||||
| assertThat(zeroLength.retainFrom(ranges)).isEqualTo(ranges); | |||||
| } | |||||
| @Test | |||||
| public void testNoTrailingAnyPath() { | |||||
| assertThat(prefixes("123xxx", "456xx", "789x")).containsExactly("123", "456", "789"); | |||||
| } | |||||
| @Test | |||||
| public void testRangeAndPrefixSameLength() { | |||||
| PrefixTree prefix = prefixes("1234"); | |||||
| RangeTree ranges = ranges("xxxx"); | |||||
| assertThat(prefix.retainFrom(ranges)).containsExactly("1234"); | |||||
| } | |||||
| @Test | |||||
| public void testRangeShorterThanPrefix() { | |||||
| PrefixTree prefix = prefixes("1234"); | |||||
| RangeTree ranges = ranges("xxx"); | |||||
| assertThat(prefix.retainFrom(ranges)).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testComplex() { | |||||
| PrefixTree prefix = prefixes("[12]", "3x4x5", "67890", "987xx9"); | |||||
| RangeTree ranges = ranges("x", "xx", "xxx", "1234xx", "234xxx", "3xx8xx", "67890"); | |||||
| assertThat(prefix.retainFrom(ranges)) | |||||
| .containsExactly("[12]", "[12]x", "[12]xx", "67890", "1234xx", "234xxx", "3x485x"); | |||||
| } | |||||
| @Test | |||||
| public void testEmptyPrefixTree() { | |||||
| // The empty filter filters everything out, since a filter operation is defined to return | |||||
| // only ranges which are prefixed by an element in the filter (of which there are none). | |||||
| assertThat(PrefixTree.from(empty()).retainFrom(ranges("12xxx"))).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testZeroLengthPrefix() { | |||||
| // The non-empty prefix tree which contains a single prefix of zero length. This has no effect | |||||
| // as a filter, since all ranges "have a zero length prefix". | |||||
| PrefixTree prefix = PrefixTree.from(RangeTree.from(RangeSpecification.empty())); | |||||
| RangeTree input = ranges("12xxx"); | |||||
| assertThat(prefix.retainFrom(input)).isEqualTo(input); | |||||
| } | |||||
| @Test | |||||
| public void testUnion() { | |||||
| // Overlapping prefixes retain the more general (shorter) one. | |||||
| assertThat(prefixes("1234").union(prefixes("12"))).containsExactly("12"); | |||||
| // Indentical prefixes treated like normal union. | |||||
| assertThat(prefixes("12").union(prefixes("12"))).containsExactly("12"); | |||||
| // Non-overlapping prefixes treated like normal union. | |||||
| assertThat(prefixes("123").union(prefixes("124"))).containsExactly("12[34]"); | |||||
| // Complex case where prefixes are split into 2 lengths due to a partial overlap. | |||||
| assertThat(prefixes("1234", "45", "800").union(prefixes("12", "4x67"))) | |||||
| .containsExactly("12", "45", "4[0-46-9]67", "800"); | |||||
| } | |||||
| @Test | |||||
| public void testIntersection() { | |||||
| // Overlapping prefixes retain the more specific (longer) one. | |||||
| assertThat(prefixes("1234").intersect(prefixes("12"))).containsExactly("1234"); | |||||
| // Indentical prefixes treated like normal intersection. | |||||
| assertThat(prefixes("12").intersect(prefixes("12"))).containsExactly("12"); | |||||
| // Non-overlapping prefixes treated like normal intersection. | |||||
| assertThat(prefixes("123").intersect(prefixes("124"))).isEmpty(); | |||||
| // Unlike the union case, with intersection, only the longest prefix remains. | |||||
| assertThat(prefixes("1234", "45x", "800").intersect(prefixes("12x", "4x67"))) | |||||
| .containsExactly("1234", "4567"); | |||||
| } | |||||
| @Test | |||||
| public void testTrim() { | |||||
| assertThat(prefixes("1234").trim(3)).containsExactly("123"); | |||||
| assertThat(prefixes("12").trim(3)).containsExactly("12"); | |||||
| assertThat(prefixes("1234").trim(0)).containsExactly(RangeSpecification.empty()); | |||||
| // Trimming can result in prefixes shorter than the stated length if by collapsing the original | |||||
| // prefix tree you end up with trailing any digit sequences. | |||||
| assertThat(prefixes("12[0-4]5", "12[5-9]").trim(3)).containsExactly("12"); | |||||
| assertThat(prefixes("7001", "70[1-9]", "7[1-9]").trim(3)).containsExactly("7"); | |||||
| } | |||||
| @Test | |||||
| public void testMinimal() { | |||||
| // If there are no ranges to include, the minimal prefix is empty (matching nothing). | |||||
| assertThat(PrefixTree.minimal(RangeTree.empty(), ranges("123x"), 0)).isEmpty(); | |||||
| // If the prefix for the included ranges is the identity, then the result is the identity | |||||
| // (after converting to a prefix, ranges like "xxx.." become the identity prefix). | |||||
| assertThat(PrefixTree.minimal(ranges("xxxx"), ranges("123"), 0).isIdentity()).isTrue(); | |||||
| // Without an exclude set, the prefix returned (at zero length) can just accept everything. | |||||
| assertThat(PrefixTree.minimal(ranges("123x"), RangeTree.empty(), 0).isIdentity()).isTrue(); | |||||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), ranges("13xx", "459x"), 0)) | |||||
| .containsExactly("12", "456"); | |||||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 1)).containsExactly("[14]"); | |||||
| assertThat(PrefixTree.minimal(ranges("123x", "456x"), empty(), 2)).containsExactly("12", "45"); | |||||
| // Pick the shortest prefix when several suffice. | |||||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 0)) | |||||
| .containsExactly("12", "56"); | |||||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("1xx", "5xxx"), 3)) | |||||
| .containsExactly("12", "56"); | |||||
| // When ranges are contested, split the prefix (only "12" is contested out of "1[2-4]"). | |||||
| assertThat(PrefixTree.minimal(ranges("1[2-4]5xx", "189xx"), ranges("128xx"), 0)) | |||||
| .containsExactly("125", "1[348]"); | |||||
| // If the include range already prefixes an entire path of the exclude set, ignore that path. | |||||
| // Here '12' (the shorter path) already captures '123', so '123' is ignored. | |||||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5xxx"), 0)) | |||||
| .containsExactly("1", "56"); | |||||
| // Now all exclude paths are ignored, so you get the "identity" prefix that catches everything. | |||||
| assertThat(PrefixTree.minimal(ranges("12", "1234", "56"), ranges("123", "5678"), 0)) | |||||
| .containsExactly(""); | |||||
| } | |||||
| @Test | |||||
| public void testMinimal_regression() { | |||||
| // This is extracted from a real case in which the old algorithm would fail for this case. The | |||||
| // "281xxxxxxx" path was necessary for failing since while visiting this, the old algorithm | |||||
| // became "confused" and added an additional "250" path to the minimal prefix, meaning that | |||||
| // the resulting range tree was "250", "250395". When this was turned into a prefix tree, the | |||||
| // shorter, early terminating, path took precedence and the result was (incorrectly) "250". | |||||
| assertThat( | |||||
| PrefixTree.minimal( | |||||
| ranges("250395xxxx"), | |||||
| ranges("250[24-9]xxxxxx", "2503[0-8]xxxxx", "25039[0-46-9]xxxx", "281xxxxxxx"), | |||||
| 3)) | |||||
| .containsExactly("250395"); | |||||
| } | |||||
| private static DigitSequence seq(String s) { | |||||
| return DigitSequence.of(s); | |||||
| } | |||||
| private static PrefixTree prefixes(String... specs) { | |||||
| return PrefixTree.from(ranges(specs)); | |||||
| } | |||||
| private static RangeTree ranges(String... specs) { | |||||
| return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,308 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeSpecification.parse; | |||||
| import static java.util.Arrays.asList; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.common.collect.ImmutableRangeSet; | |||||
| import com.google.common.collect.Range; | |||||
| import com.google.common.collect.RangeSet; | |||||
| import com.google.common.truth.Truth; | |||||
| import java.util.Arrays; | |||||
| import java.util.List; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class RangeSpecificationTest { | |||||
| @Test | |||||
| public void testParse() { | |||||
| assertThat(parse("")).isSameInstanceAs(RangeSpecification.empty()); | |||||
| assertThat(parse("0").toString()).isEqualTo("0"); | |||||
| assertThat(parse("0").length()).isEqualTo(1); | |||||
| assertThat(parse("01234").toString()).isEqualTo("01234"); | |||||
| assertThat(parse("01234").length()).isEqualTo(5); | |||||
| assertThat(parse("012[0-9]").toString()).isEqualTo("012x"); | |||||
| assertThat(parse("012[0234789]xxx").toString()).isEqualTo("012[02-47-9]xxx"); | |||||
| assertThat(parse("0_1_2").toString()).isEqualTo("012"); | |||||
| assertThat(parse("0_12[3-8]_xxx_xxx").toString()).isEqualTo("012[3-8]xxxxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testParseBad() { | |||||
| assertThrows(NullPointerException.class, () -> parse(null)); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("#")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[0-")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[0-]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[0--9]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[0..9]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[33]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[32]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[3-3]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("[3-2]")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("123[9-0]456")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("1234_")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("_1234")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("12__34")); | |||||
| assertThrows(IllegalArgumentException.class, () -> parse("1[2_4]5")); | |||||
| } | |||||
| @Test | |||||
| public void testSingleton() { | |||||
| assertThat(RangeSpecification.singleton(asList(0, 1, 2, 4, 5, 7, 8, 9))) | |||||
| .isEqualTo(parse("[0-2457-9]")); | |||||
| } | |||||
| @Test | |||||
| public void testMatches() { | |||||
| assertThat(RangeSpecification.empty().matches(DigitSequence.empty())).isTrue(); | |||||
| assertAllMatch(parse("0"), "0"); | |||||
| assertNoneMatch(parse("0"), "00", "1"); | |||||
| assertAllMatch(parse("01234"), "01234"); | |||||
| assertNoneMatch(parse("01234"), "01233", "01235"); | |||||
| assertAllMatch(parse("012x"), "0120", "0125", "0129"); | |||||
| assertNoneMatch(parse("012x"), "012", "0119", "0130", "01200"); | |||||
| assertAllMatch(parse("012[3-689]xxx"), "0124000", "0128999"); | |||||
| assertNoneMatch(parse("012[3-689]xxx"), "0122000", "0127999"); | |||||
| } | |||||
| @Test | |||||
| public void testMinMax() { | |||||
| assertThat(parse("123xxx").min()).isEqualTo(DigitSequence.of("123000")); | |||||
| assertThat(parse("123xxx").max()).isEqualTo(DigitSequence.of("123999")); | |||||
| assertThat(parse("1x[2-3]x4").min()).isEqualTo(DigitSequence.of("10204")); | |||||
| assertThat(parse("1x[2-3]x4").max()).isEqualTo(DigitSequence.of("19394")); | |||||
| } | |||||
| @Test | |||||
| public void testSequenceCount() { | |||||
| assertThat(RangeSpecification.empty().getSequenceCount()).isEqualTo(1); | |||||
| assertThat(parse("1xx").getSequenceCount()).isEqualTo(100); | |||||
| assertThat(parse("1[2-46-8]x").getSequenceCount()).isEqualTo(60); | |||||
| assertThat(parse("1xx[0-27-9]").getSequenceCount()).isEqualTo(600); | |||||
| } | |||||
| @Test | |||||
| public void testFrom() { | |||||
| assertThat(RangeSpecification.from(DigitSequence.empty())) | |||||
| .isEqualTo(RangeSpecification.empty()); | |||||
| assertThat(RangeSpecification.from(DigitSequence.of("1"))).isEqualTo(parse("1")); | |||||
| assertThat(RangeSpecification.from(DigitSequence.of("1234"))).isEqualTo(parse("1234")); | |||||
| } | |||||
| @Test | |||||
| public void testAny() { | |||||
| assertThat(RangeSpecification.any(0)).isEqualTo(RangeSpecification.empty()); | |||||
| assertThat(RangeSpecification.any(2)).isEqualTo(parse("xx")); | |||||
| assertThat(RangeSpecification.any(10)).isEqualTo(parse("xxxxxxxxxx")); | |||||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(-1)); | |||||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.any(19)); | |||||
| } | |||||
| @Test | |||||
| public void testFirst() { | |||||
| RangeSpecification spec = parse("123[4-7]xxxx"); | |||||
| assertThat(spec.first(3)).isEqualTo(parse("123")); | |||||
| assertThat(spec.first(6)).isEqualTo(parse("123[4-7]xx")); | |||||
| assertThat(spec.first(spec.length())).isSameInstanceAs(spec); | |||||
| assertThat(spec.first(100)).isSameInstanceAs(spec); | |||||
| assertThat(spec.first(0)).isEqualTo(RangeSpecification.empty()); | |||||
| assertThrows(IllegalArgumentException.class, () -> spec.first(-1)); | |||||
| } | |||||
| @Test | |||||
| public void testLast() { | |||||
| RangeSpecification spec = parse("123[4-7]xxxx"); | |||||
| assertThat(spec.last(3)).isEqualTo(parse("xxx")); | |||||
| assertThat(spec.last(6)).isEqualTo(parse("3[4-7]xxxx")); | |||||
| assertThat(spec.last(spec.length())).isSameInstanceAs(spec); | |||||
| assertThat(spec.last(100)).isSameInstanceAs(spec); | |||||
| assertThat(spec.last(0)).isEqualTo(RangeSpecification.empty()); | |||||
| assertThrows(IllegalArgumentException.class, () -> spec.last(-1)); | |||||
| } | |||||
| @Test | |||||
| public void testGetPrefix() { | |||||
| assertThat(RangeSpecification.empty().getPrefix()).isEqualTo(RangeSpecification.empty()); | |||||
| assertThat(parse("xxxx").getPrefix()).isEqualTo(RangeSpecification.empty()); | |||||
| assertThat(parse("xx1x").getPrefix()).isEqualTo(parse("xx1")); | |||||
| assertThat(parse("123[4-7]xxxx").getPrefix()).isEqualTo(parse("123[4-7]")); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering_simple() { | |||||
| // For specifications representing a single DigitSequence, the ordering should be the same. | |||||
| testComparator( | |||||
| RangeSpecification.empty(), | |||||
| parse("0"), | |||||
| parse("00"), | |||||
| parse("000"), | |||||
| parse("01"), | |||||
| parse("1"), | |||||
| parse("10"), | |||||
| parse("123"), | |||||
| parse("124"), | |||||
| parse("4111"), | |||||
| parse("4200"), | |||||
| parse("4555"), | |||||
| parse("9"), | |||||
| parse("99"), | |||||
| parse("999")); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering_disjoint() { | |||||
| // NOT the same as using the min() sequence for ordering (since "4555" > "4200" > "4111"). | |||||
| testComparator( | |||||
| parse("12xx"), | |||||
| parse("13xx"), | |||||
| parse("14xx"), | |||||
| parse("1[5-8]00"), | |||||
| parse("[2-3]xxx"), | |||||
| parse("[4-6]555"), | |||||
| parse("[45]111"), | |||||
| parse("[45]2xx"), | |||||
| parse("4999")); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering_overlapping() { | |||||
| // Ordering for overlapping ranges is well defined but not particularly intuitive. | |||||
| testComparator( | |||||
| parse("01xxx"), | |||||
| parse("01xx[0-5]"), | |||||
| parse("01x0[0-5]"), | |||||
| parse("01x00"), | |||||
| parse("01[0-6]00"), | |||||
| parse("01[2-7]xx"), | |||||
| parse("01[2-7]00"), | |||||
| parse("01[2-7]67"), | |||||
| parse("01[4-9]00")); | |||||
| } | |||||
| @Test | |||||
| public void testToString() { | |||||
| assertThat(parse("0").toString()).isEqualTo("0"); | |||||
| assertThat(parse("01234").toString()).isEqualTo("01234"); | |||||
| assertThat(parse("012[3-4]").toString()).isEqualTo("012[34]"); | |||||
| assertThat(parse("012[0-9]").toString()).isEqualTo("012x"); | |||||
| assertThat(parse("012[3-689]xxx").toString()).isEqualTo("012[3-689]xxx"); | |||||
| } | |||||
| @Test | |||||
| public void testBitmaskToString() { | |||||
| assertThat(RangeSpecification.toString(1 << 0)).isEqualTo("0"); | |||||
| assertThat(RangeSpecification.toString(1 << 9)).isEqualTo("9"); | |||||
| assertThat(RangeSpecification.toString(0xF)).isEqualTo("[0-3]"); | |||||
| assertThat(RangeSpecification.toString(0xF1)).isEqualTo("[04-7]"); | |||||
| assertThat(RangeSpecification.toString(ALL_DIGITS_MASK)).isEqualTo("x"); | |||||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0)); | |||||
| assertThrows(IllegalArgumentException.class, () -> RangeSpecification.toString(0x400)); | |||||
| } | |||||
| @Test | |||||
| public void testRangeProcessing_singleBlock() { | |||||
| Truth.assertThat(RangeSpecification.from(setOf(range("1200", "1299")))) | |||||
| .isEqualTo(specs("12xx")); | |||||
| } | |||||
| @Test | |||||
| public void testRangeProcessing_fullRange() { | |||||
| Truth.assertThat(RangeSpecification.from(setOf(range("0000", "9999")))) | |||||
| .isEqualTo(specs("xxxx")); | |||||
| } | |||||
| @Test | |||||
| public void testRangeProcessing_edgeCases() { | |||||
| Truth.assertThat(RangeSpecification.from(setOf(range("1199", "1300")))).isEqualTo(specs( | |||||
| "1199", | |||||
| "12xx", | |||||
| "1300")); | |||||
| } | |||||
| @Test | |||||
| public void testRangeProcessing_complex() { | |||||
| Truth.assertThat(RangeSpecification.from(setOf(range("123", "45678")))).isEqualTo(specs( | |||||
| "12[3-9]", | |||||
| "1[3-9]x", | |||||
| "[2-9]xx", | |||||
| "xxxx", | |||||
| "[0-3]xxxx", | |||||
| "4[0-4]xxx", | |||||
| "45[0-5]xx", | |||||
| "456[0-6]x", | |||||
| "4567[0-8]")); | |||||
| } | |||||
| @Test | |||||
| public void testAsRanges_edgeCase() { | |||||
| // The middle 2 ranges abut. | |||||
| assertThat(RangeSpecification.parse("12[34][0189]x").asRanges()) | |||||
| .containsExactly(range("12300", "12319"), range("12380", "12419"), range("12480", "12499")) | |||||
| .inOrder(); | |||||
| } | |||||
| private static void assertAllMatch(RangeSpecification r, String... sequences) { | |||||
| for (String digits : sequences) { | |||||
| assertThat(r.matches(DigitSequence.of(digits))).isTrue(); | |||||
| } | |||||
| } | |||||
| private static void assertNoneMatch(RangeSpecification r, String... sequences) { | |||||
| for (String digits : sequences) { | |||||
| assertThat(r.matches(DigitSequence.of(digits))).isFalse(); | |||||
| } | |||||
| } | |||||
| List<RangeSpecification> specs(String... s) { | |||||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||||
| } | |||||
| private static Range<DigitSequence> range(String lo, String hi) { | |||||
| return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain()); | |||||
| } | |||||
| private static RangeSet<DigitSequence> setOf(Range<DigitSequence>... r) { | |||||
| return ImmutableRangeSet.copyOf(Arrays.asList(r)); | |||||
| } | |||||
| private static <T extends Comparable<T>> void testComparator(T... items) { | |||||
| for (int i = 0; i < items.length; i++) { | |||||
| assertThat(items[i]).isEqualTo(items[i]); | |||||
| assertThat(items[i]).isEquivalentAccordingToCompareTo(items[i]); | |||||
| for (int j = i + 1; j < items.length; j++) { | |||||
| assertThat(items[i]).isNotEqualTo(items[j]); | |||||
| assertThat(items[i]).isLessThan(items[j]); | |||||
| assertThat(items[j]).isGreaterThan(items[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,101 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTree.empty; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.ALLOW_EDGE_SPLITTING; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES; | |||||
| import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.factor; | |||||
| import java.util.List; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class RangeTreeFactorizerTest { | |||||
| @Test | |||||
| public void testEmpty() { | |||||
| assertThat(factor(empty(), REQUIRE_EQUAL_EDGES)).isEmpty(); | |||||
| assertThat(factor(empty(), ALLOW_EDGE_SPLITTING)).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testSimplePrefix() { | |||||
| RangeTree t = ranges("123x", "123xx", "123xxx"); | |||||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t); | |||||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t); | |||||
| } | |||||
| @Test | |||||
| public void testDisjointBranchesNotFactored() { | |||||
| RangeTree t = ranges("123xxx", "124xx", "125x"); | |||||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)).containsExactly(t); | |||||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)).containsExactly(t); | |||||
| } | |||||
| @Test | |||||
| public void testOverlappingBranchesAreFactored() { | |||||
| RangeTree t = ranges("123xxx", "1234x", "1234", "123"); | |||||
| assertThat(factor(t, REQUIRE_EQUAL_EDGES)) | |||||
| .containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234")) | |||||
| .inOrder(); | |||||
| assertThat(factor(t, ALLOW_EDGE_SPLITTING)) | |||||
| .containsExactly(ranges("123xxx", "123"), ranges("1234x", "1234")) | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testStrategyDifference() { | |||||
| // When factoring with REQUIRE_EQUAL_EDGES the [3-9] edge in the shorter path cannot be merged | |||||
| // into the longer path of the first factor, since [3-4] already exists and is not equal to | |||||
| // [3-9]. However since [3-4] is contained by [3-9], when we ALLOW_EDGE_SPLITTING, we can split | |||||
| // the edge we are trying to merge to add paths for both [3-4] and [5-9]. This isn't always a | |||||
| // win for regular expression length, and in fact for the most complex cases, | |||||
| // REQUIRE_EQUAL_EDGES often ends up smaller. | |||||
| RangeTree splittable = ranges("12[3-5]xx", "12[3-9]x"); | |||||
| assertThat(factor(splittable, REQUIRE_EQUAL_EDGES)) | |||||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[3-9]x")) | |||||
| .inOrder(); | |||||
| assertThat(factor(splittable, ALLOW_EDGE_SPLITTING)) | |||||
| .containsExactly(ranges("12[3-5]xx", "12[3-9]x")); | |||||
| // In this case, the [3-5] edge in the first factor in only a partial overlap with the [4-9] | |||||
| // edge we are trying to merge in. Now both strategies will prefer to treat the shorter path | |||||
| // as a separate factor, since there's no clean way to merge into the existing edge. | |||||
| RangeTree unsplittable = ranges("12[3-5]xx", "12[4-9]x"); | |||||
| assertThat(factor(unsplittable, REQUIRE_EQUAL_EDGES)) | |||||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x")) | |||||
| .inOrder(); | |||||
| assertThat(factor(unsplittable, ALLOW_EDGE_SPLITTING)) | |||||
| .containsExactly(ranges("12[3-5]xx"), ranges("12[4-9]x")) | |||||
| .inOrder(); | |||||
| // TODO: Find a non-complex example where REQUIRE_EQUAL_EDGES yeilds smaller regex. | |||||
| // Approximately 50 out of the 1000+ regex's in the XML get smaller with REQUIRE_EQUAL_EDGES. | |||||
| } | |||||
| RangeTree ranges(String... s) { | |||||
| return RangeTree.from(specs(s)); | |||||
| } | |||||
| List<RangeSpecification> specs(String... s) { | |||||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,555 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain; | |||||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||||
| import static java.util.Arrays.asList; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.auto.value.AutoValue; | |||||
| import com.google.common.base.Strings; | |||||
| import com.google.common.collect.ImmutableRangeSet; | |||||
| import com.google.common.collect.Range; | |||||
| import com.google.common.collect.RangeSet; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Collections; | |||||
| import java.util.List; | |||||
| import java.util.Random; | |||||
| import java.util.concurrent.ExecutionException; | |||||
| import java.util.concurrent.ForkJoinPool; | |||||
| import java.util.stream.Collectors; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class RangeTreeTest { | |||||
| @Test | |||||
| public void testEmptyTree() { | |||||
| assertThat(RangeTree.empty()).containsExactly(); | |||||
| assertThat(RangeTree.empty()).hasSize(0); | |||||
| } | |||||
| @Test | |||||
| public void testEmptySequenceTree() { | |||||
| // The tree that matches a zero length input is a perfectly valid range tree (zero length input | |||||
| // is perfectly valid input). This is very distinct from the empty tree, which cannot match any | |||||
| // input. It's not used very often, but it is well defined. | |||||
| RangeTree r = RangeTree.from(RangeSpecification.empty()); | |||||
| assertThat(r).containsExactly(RangeSpecification.empty()); | |||||
| assertThat(r).hasSize(1); | |||||
| } | |||||
| @Test | |||||
| public void testFromRangeSetSimple() { | |||||
| // Single ranges produce minimal/canoncial range specifications. | |||||
| RangeTree r = RangeTree.from(rangeSetOf(range("1000", "4999"))); | |||||
| assertThat(r).containsExactly("[1-4]xxx"); | |||||
| assertThat(r).hasSize(4000); | |||||
| } | |||||
| @Test | |||||
| public void testFromRangeSetMinMax() { | |||||
| RangeTree r = RangeTree.from(rangeSetOf(range("0000", "9999"))); | |||||
| assertThat(r).containsExactly("xxxx"); | |||||
| assertThat(r).hasSize(10000); | |||||
| } | |||||
| @Test | |||||
| public void testFromRangeSetAllValues() { | |||||
| // Just checking for any out-of-bounds issues at the end of the domain. | |||||
| RangeTree r = RangeTree.from(rangeSetOf(range("0", domain().maxValue().toString()))); | |||||
| assertThat(r).containsExactly( | |||||
| "x", | |||||
| "xx", | |||||
| "xxx", | |||||
| "xxxx", | |||||
| "xxxxx", | |||||
| "xxxxxx", | |||||
| "xxxxxxx", | |||||
| "xxxxxxxx", | |||||
| "xxxxxxxxx", | |||||
| "xxxxxxxxxx", | |||||
| "xxxxxxxxxxx", | |||||
| "xxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxxxxxx", | |||||
| "xxxxxxxxxxxxxxxxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testContains() { | |||||
| // The tree generated from the empty range specification actually contains one digit sequence | |||||
| // (the empty one). This is not the same as RangeTree.empty() which really contains nothing. | |||||
| assertThat(RangeTree.empty()).doesNotContain(""); | |||||
| assertThat(RangeTree.from(RangeSpecification.empty())).contains(""); | |||||
| assertThat(RangeTree.from(spec("x"))).contains("7"); | |||||
| assertThat(RangeTree.from(spec("1"))).contains("1"); | |||||
| assertThat(RangeTree.from(spec("1"))).doesNotContain("5"); | |||||
| assertThat(RangeTree.from(spec("xx"))).contains("99"); | |||||
| assertThat(RangeTree.from(spec("xx"))).doesNotContain("100"); | |||||
| assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).contains("027617"); | |||||
| } | |||||
| @Test | |||||
| public void testMatchCount() { | |||||
| assertThat(RangeTree.empty()).hasSize(0); | |||||
| assertThat(RangeTree.from(RangeSpecification.empty())).hasSize(1); | |||||
| assertThat(RangeTree.from(spec("x"))).hasSize(10); | |||||
| assertThat(RangeTree.from(spec("1"))).hasSize(1); | |||||
| assertThat(RangeTree.from(spec("[123]"))).hasSize(3); | |||||
| assertThat(RangeTree.from(spec("xx"))).hasSize(100); | |||||
| assertThat(RangeTree.from(spec("[234]xx"))).hasSize(300); | |||||
| assertThat(RangeTree.from(spec("1[234]xx"))).hasSize(300); | |||||
| assertThat(RangeTree.from(spec("1[234][567]xx"))).hasSize(900); | |||||
| assertThat(RangeTree.from(spec("0[123]x[456]x[789]"))).hasSize(2700); | |||||
| } | |||||
| @Test | |||||
| public void testUnion() { | |||||
| RangeTree a = ranges("12xx", "456xx"); | |||||
| assertThat(a.union(a)).isEqualTo(a); | |||||
| assertThat(a.union(RangeTree.empty())).isEqualTo(a); | |||||
| assertThat(RangeTree.empty().union(a)).isEqualTo(a); | |||||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||||
| assertThat(a.union(b)).containsExactly("999", "12xx", "4xxxx"); | |||||
| assertThat(b.union(a)).containsExactly("999", "12xx", "4xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testIntersection() { | |||||
| RangeTree a = ranges("12xx", "456xx"); | |||||
| assertThat(a.intersect(a)).isEqualTo(a); | |||||
| assertThat(a.intersect(RangeTree.empty())).isSameInstanceAs(RangeTree.empty()); | |||||
| assertThat(RangeTree.empty().intersect(a)).isSameInstanceAs(RangeTree.empty()); | |||||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||||
| assertThat(a.intersect(b)).containsExactly("1234", "456xx"); | |||||
| assertThat(b.intersect(a)).containsExactly("1234", "456xx"); | |||||
| } | |||||
| @Test | |||||
| public void testSubtraction() { | |||||
| RangeTree a = ranges("12xx", "456xx"); | |||||
| assertThat(a.subtract(a)).isSameInstanceAs(RangeTree.empty()); | |||||
| assertThat(a.subtract(RangeTree.empty())).isEqualTo(a); | |||||
| assertThat(RangeTree.empty().subtract(a)).isSameInstanceAs(RangeTree.empty()); | |||||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||||
| assertThat(a.subtract(b)).containsExactly("12[0-24-9]x", "123[0-35-9]"); | |||||
| assertThat(b.subtract(a)).containsExactly("999", "4[0-46-9]xxx", "45[0-57-9]xx"); | |||||
| } | |||||
| @Test | |||||
| public void testContainsAll() { | |||||
| RangeTree a = ranges("12[3-6]xx", "13[5-8]xx", "456xxxx"); | |||||
| assertThat(a.containsAll(a)).isTrue(); | |||||
| assertThat(a.containsAll(RangeTree.empty())).isTrue(); | |||||
| assertThat(RangeTree.empty().containsAll(a)).isFalse(); | |||||
| // Test branching, since 12.. and 13... are distinct branches but both contain ..[56][78]x | |||||
| assertThat(a.containsAll(ranges("1[23][56][78]x", "4567890"))).isTrue(); | |||||
| // Path 127.. is not contained. | |||||
| assertThat(a.containsAll(ranges("12[357]xx"))).isFalse(); | |||||
| // Hard to test for, but this should fail immediately (due to length mismatch). | |||||
| assertThat(a.containsAll(ranges("123456"))).isFalse(); | |||||
| // Check edge case for zero-length paths. | |||||
| assertThat(ranges("", "1").containsAll(ranges(""))).isTrue(); | |||||
| assertThat(RangeTree.empty().containsAll(ranges(""))).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testVennDiagram() { | |||||
| // Test basic set-theoretic assumptions about the logical operations. | |||||
| // In theory we could run this test with any non-disjoint pair of trees. | |||||
| RangeTree a = ranges("12xx", "456xx"); | |||||
| RangeTree b = ranges("1234", "4xxxx", "999"); | |||||
| RangeTree intAB = a.intersect(b); | |||||
| RangeTree subAB = a.subtract(b); | |||||
| RangeTree subBA = b.subtract(a); | |||||
| // (A\B) and (B\A) are disjoint with (A^B) and each other. | |||||
| assertThat(subAB.intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||||
| assertThat(subBA.intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||||
| assertThat(subAB.intersect(subBA)).isSameInstanceAs(RangeTree.empty()); | |||||
| // Even the union of (A\B) and (B\A) is disjoint to the intersection. | |||||
| assertThat(subAB.union(subBA).intersect(intAB)).isSameInstanceAs(RangeTree.empty()); | |||||
| // (A\B) + (A^B) = A, (B\A) + (A^B) = B, (A\B) + (B\A) + (A^B) == (A+B) | |||||
| assertThat(subAB.union(intAB)).isEqualTo(a); | |||||
| assertThat(subBA.union(intAB)).isEqualTo(b); | |||||
| assertThat(subAB.union(subBA).union(intAB)).isEqualTo(a.union(b)); | |||||
| } | |||||
| @Test | |||||
| public void testFromRaggedRange() { | |||||
| RangeTree r = RangeTree.from(rangeSetOf(range("123980", "161097"))); | |||||
| // Very 'ragged' ranges produde a lot of range specifications. | |||||
| assertThat(r).containsExactly( | |||||
| "1239[8-9]x", | |||||
| "12[4-9]xxx", | |||||
| "1[3-5]xxxx", | |||||
| "160xxx", | |||||
| "1610[0-8]x", | |||||
| "16109[0-7]"); | |||||
| } | |||||
| @Test | |||||
| public void testComplexSpecsToSimpleRange() { | |||||
| List<RangeSpecification> specs = specs( | |||||
| "12[3-9]", | |||||
| "1[3-9]x", | |||||
| "[2-9]xx", | |||||
| "xxxx", | |||||
| "[0-3]xxxx", | |||||
| "4[0-4]xxx", | |||||
| "45[0-5]xx", | |||||
| "456[0-6]x", | |||||
| "4567[0-8]"); | |||||
| RangeTree r = RangeTree.from(specs); | |||||
| assertThat(r).containsExactly(specs); | |||||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf(range("123", "45678"))); | |||||
| } | |||||
| @Test | |||||
| public void testAsRangeSetMultipleGroups() { | |||||
| // The range specification has 4 ranges, one each for the four 123x prefixes. | |||||
| RangeTree r = ranges("012[3-58][2-7]x"); | |||||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf( | |||||
| range("012320", "012379"), | |||||
| range("012420", "012479"), | |||||
| range("012520", "012579"), | |||||
| range("012820", "012879"))); | |||||
| } | |||||
| @Test | |||||
| public void testAsRangeSetMerging() { | |||||
| // In isolation, the first specification represents two range, and the second represents one. | |||||
| RangeTree r = ranges("12[3-4][7-9]x", "125[0-5]x"); | |||||
| // The range ending 12499 merges with the range starting 12500, giving 2 rather than 3 ranges. | |||||
| assertThat(r.asRangeSet()).isEqualTo(rangeSetOf( | |||||
| range("12370", "12399"), | |||||
| range("12470", "12559"))); | |||||
| } | |||||
| @Test | |||||
| public void testVisitor() { | |||||
| // Carefully construct DFA so depth first visitation order is just incrementing from 0. | |||||
| RangeTree r = ranges("012", "345", "367", "3689"); | |||||
| TestVisitor v = new TestVisitor(); | |||||
| r.accept(v); | |||||
| DfaNode initial = r.getInitial(); | |||||
| DfaNode terminal = RangeTree.getTerminal(); | |||||
| assertThat(v.visited).hasSize(10); | |||||
| // Edges 0 & 3 leave the initial state, edges 2,5,7,9 reach the terminal. | |||||
| assertThat(v.visited.stream().map(Edge::source).filter(initial::equals).count()).isEqualTo(2); | |||||
| assertThat(v.visited.stream().map(Edge::target).filter(terminal::equals).count()).isEqualTo(4); | |||||
| // Check expected edge value masks. | |||||
| for (int n = 0; n < 10; n++) { | |||||
| assertThat(v.visited.get(n).digitMask()).isEqualTo(1 << n); | |||||
| } | |||||
| } | |||||
| @Test | |||||
| public void testMin() { | |||||
| assertThrows(IllegalStateException.class, () -> RangeTree.empty().first()); | |||||
| assertThat(RangeTree.from(RangeSpecification.empty()).first()).isEqualTo(DigitSequence.empty()); | |||||
| RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx"); | |||||
| assertThat(tree.first()).isEqualTo(DigitSequence.of("600")); | |||||
| assertThat(tree.subtract(ranges("[6-8]xx")).first()).isEqualTo(DigitSequence.of("900")); | |||||
| assertThat(tree.subtract(ranges("xxx")).first()).isEqualTo(DigitSequence.of("8000")); | |||||
| assertThat(tree.subtract(ranges("xxx", "8[0-6]xx")).first()) | |||||
| .isEqualTo(DigitSequence.of("8700")); | |||||
| assertThat(tree.subtract(ranges("xxx", "xxxx")).first()).isEqualTo(DigitSequence.of("10000")); | |||||
| } | |||||
| @Test | |||||
| public void testSample() { | |||||
| assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(0)); | |||||
| assertThat(RangeTree.from(RangeSpecification.empty()).sample(0)) | |||||
| .isEqualTo(DigitSequence.empty()); | |||||
| RangeTree tree = ranges("[1-6]xxxx", "[6-9]xx", "[89]xxx"); | |||||
| // sometimes iteration looks ordered ... | |||||
| assertThat(tree.sample(0)).isEqualTo(DigitSequence.of("10000")); | |||||
| assertThat(tree.sample(1)).isEqualTo(DigitSequence.of("10001")); | |||||
| assertThat(tree.sample(10)).isEqualTo(DigitSequence.of("10010")); | |||||
| // but in general sample(n).next() != sample(n+1) | |||||
| assertThat(tree.sample(49999)).isEqualTo(DigitSequence.of("59999")); | |||||
| assertThat(tree.sample(50000)).isEqualTo(DigitSequence.of("600")); | |||||
| assertThat(tree.sample(50001)).isEqualTo(DigitSequence.of("60000")); | |||||
| assertThat(tree.sample(tree.size() - 1)).isEqualTo(DigitSequence.of("9999")); | |||||
| assertThrows(IndexOutOfBoundsException.class, () -> RangeTree.empty().sample(tree.size())); | |||||
| } | |||||
| @Test | |||||
| public void testSignificantDigits() { | |||||
| RangeTree ranges = ranges("123xx", "14567", "789"); | |||||
| assertThat(ranges.significantDigits(3)).containsExactly("123xx", "145xx", "789"); | |||||
| assertThat(ranges.significantDigits(2)).containsExactly("12xxx", "14xxx", "78x"); | |||||
| assertThat(ranges.significantDigits(1)).containsExactly("1xxxx", "7xx"); | |||||
| assertThat(ranges.significantDigits(0)).containsExactly("xxxxx", "xxx"); | |||||
| } | |||||
| @Test | |||||
| public void testPrefixWith() { | |||||
| RangeTree ranges = ranges("123xx", "456x"); | |||||
| assertThat(ranges.prefixWith(spec("00"))).isEqualTo(ranges("00123xx", "00456x")); | |||||
| assertThat(ranges.prefixWith(RangeSpecification.empty())).isSameInstanceAs(ranges); | |||||
| // The prefixing of an empty tree is empty (all paths that exist been prefixed correctly). | |||||
| assertThat(RangeTree.empty().prefixWith(spec("00"))).isEqualTo(RangeTree.empty()); | |||||
| } | |||||
| @Test | |||||
| public void testSlicing() { | |||||
| RangeTree ranges = ranges("", "1", "123", "125xx", "456x"); | |||||
| assertThat(ranges.slice(1)).isEqualTo(ranges("[14]")); | |||||
| assertThat(ranges.slice(2)).isEqualTo(ranges("12", "45")); | |||||
| assertThat(ranges.slice(3)).isEqualTo(ranges("12[35]", "456")); | |||||
| assertThat(ranges.slice(4)).isEqualTo(ranges("125x", "456x")); | |||||
| assertThat(ranges.slice(2, 4)).isEqualTo(ranges("123", "125x", "456x")); | |||||
| assertThat(ranges.slice(0, 5)).isEqualTo(ranges); | |||||
| } | |||||
| @Test | |||||
| public void testSerializingRealWorldExample() { | |||||
| List<RangeSpecification> expected = specs( | |||||
| "11[2-7]xxxxxxx", | |||||
| "12[0-249][2-7]xxxxxx", | |||||
| "12[35-8]x[2-7]xxxxx", | |||||
| "13[0-25][2-7]xxxxxx", | |||||
| "13[346-9]x[2-7]xxxxx", | |||||
| "14[145][2-7]xxxxxx", | |||||
| "14[236-9]x[2-7]xxxxx", | |||||
| "1[59][0235-9]x[2-7]xxxxx", | |||||
| "1[59][14][2-7]xxxxxx", | |||||
| "16[014][2-7]xxxxxx", | |||||
| "16[235-9]x[2-7]xxxxx", | |||||
| "17[1257][2-7]xxxxxx", | |||||
| "17[34689]x[2-7]xxxxx", | |||||
| "18[01346][2-7]xxxxxx", | |||||
| "18[257-9]x[2-7]xxxxx", | |||||
| "2[02][2-7]xxxxxxx", | |||||
| "21[134689]x[2-7]xxxxx", | |||||
| "21[257][2-7]xxxxxx", | |||||
| "23[013][2-7]xxxxxx", | |||||
| "23[24-8]x[2-7]xxxxx", | |||||
| "24[01][2-7]xxxxxx", | |||||
| "24[2-8]x[2-7]xxxxx", | |||||
| "25[0137][2-7]xxxxxx", | |||||
| "25[25689]x[2-7]xxxxx", | |||||
| "26[0158][2-7]xxxxxx", | |||||
| "26[2-4679]x[2-7]xxxxx", | |||||
| "27[13-79]x[2-7]xxxxx", | |||||
| "278[2-7]xxxxxx", | |||||
| "28[1568][2-7]xxxxxx", | |||||
| "28[2-479]x[2-7]xxxxx", | |||||
| "29[14][2-7]xxxxxx", | |||||
| "29[235-9]x[2-7]xxxxx", | |||||
| "301x[2-7]xxxxx", | |||||
| "31[79]x[2-7]xxxxx", | |||||
| "32[1-5]x[2-7]xxxxx", | |||||
| "326[2-7]xxxxxx", | |||||
| "33[2-7]xxxxxxx", | |||||
| "34[13][2-7]xxxxxx", | |||||
| "342[0189][2-7]xxxxx", | |||||
| "342[2-7]xxxxxx", | |||||
| "34[5-8]x[2-7]xxxxx", | |||||
| "35[125689]x[2-7]xxxxx", | |||||
| "35[34][2-7]xxxxxx", | |||||
| "36[01489][2-7]xxxxxx", | |||||
| "36[235-7]x[2-7]xxxxx", | |||||
| "37[02-46][2-7]xxxxxx", | |||||
| "37[157-9]x[2-7]xxxxx", | |||||
| "38[159][2-7]xxxxxx", | |||||
| "38[2-467]x[2-7]xxxxx", | |||||
| "4[04][2-7]xxxxxxx", | |||||
| "41[14578]x[2-7]xxxxx", | |||||
| "41[36][2-7]xxxxxx", | |||||
| "42[1-47][2-7]xxxxxx", | |||||
| "42[5689]x[2-7]xxxxx", | |||||
| "43[15][2-7]xxxxxx", | |||||
| "43[2-467]x[2-7]xxxxx", | |||||
| "45[12][2-7]xxxxxx", | |||||
| "45[4-7]x[2-7]xxxxx", | |||||
| "46[0-26-9][2-7]xxxxxx", | |||||
| "46[35]x[2-7]xxxxx", | |||||
| "47[0-24-9][2-7]xxxxxx", | |||||
| "473x[2-7]xxxxx", | |||||
| "48[013-57][2-7]xxxxxx", | |||||
| "48[2689]x[2-7]xxxxx", | |||||
| "49[014-7][2-7]xxxxxx", | |||||
| "49[2389]x[2-7]xxxxx", | |||||
| "51[025][2-7]xxxxxx", | |||||
| "51[146-9]x[2-7]xxxxx", | |||||
| "52[14-8]x[2-7]xxxxx", | |||||
| "522[2-7]xxxxxx", | |||||
| "53[1346]x[2-7]xxxxx", | |||||
| "53[25][2-7]xxxxxx", | |||||
| "54[14-69]x[2-7]xxxxx", | |||||
| "54[28][2-7]xxxxxx", | |||||
| "55[12][2-7]xxxxxx", | |||||
| "55[46]x[2-7]xxxxx", | |||||
| "56[146-9]x[2-7]xxxxx", | |||||
| "56[25][2-7]xxxxxx", | |||||
| "571[2-7]xxxxxx", | |||||
| "57[2-4]x[2-7]xxxxx", | |||||
| "581[2-7]xxxxxx", | |||||
| "58[2-8]x[2-7]xxxxx", | |||||
| "59[15][2-7]xxxxxx", | |||||
| "59[246]x[2-7]xxxxx", | |||||
| "61[1358]x[2-7]xxxxx", | |||||
| "612[2-7]xxxxxx", | |||||
| "621[2-7]xxxxxx", | |||||
| "62[2457]x[2-7]xxxxx", | |||||
| "631[2-7]xxxxxx", | |||||
| "63[2-4]x[2-7]xxxxx", | |||||
| "641[2-7]xxxxxx", | |||||
| "64[235-7]x[2-7]xxxxx", | |||||
| "65[17][2-7]xxxxxx", | |||||
| "65[2-689]x[2-7]xxxxx", | |||||
| "66[13][2-7]xxxxxx", | |||||
| "66[24578]x[2-7]xxxxx", | |||||
| "671[2-7]xxxxxx", | |||||
| "67[235689]x[2-7]xxxxx", | |||||
| "674[0189][2-7]xxxxx", | |||||
| "674[2-7]xxxxxx", | |||||
| "680[2-7]xxxxxx", | |||||
| "68[1-6]x[2-7]xxxxx", | |||||
| "71[013-9]x[2-7]xxxxx", | |||||
| "712[2-7]xxxxxx", | |||||
| "72[0235-9]x[2-7]xxxxx", | |||||
| "72[14][2-7]xxxxxx", | |||||
| "73[134][2-7]xxxxxx", | |||||
| "73[2679]x[2-7]xxxxx", | |||||
| "74[1-35689]x[2-7]xxxxx", | |||||
| "74[47][2-7]xxxxxx", | |||||
| "75[15][2-7]xxxxxx", | |||||
| "75[2-46-9]x[2-7]xxxxx", | |||||
| "7[67][02-9]x[2-7]xxxxx", | |||||
| "7[67]1[2-7]xxxxxx", | |||||
| "78[013-7]x[2-7]xxxxx", | |||||
| "782[0-6][2-7]xxxxx", | |||||
| "788[0189][2-7]xxxxx", | |||||
| "788[2-7]xxxxxx", | |||||
| "79[0189]x[2-7]xxxxx", | |||||
| "79[2-7]xxxxxxx", | |||||
| "80[2-467]xxxxxxx", | |||||
| "81[1357-9]x[2-7]xxxxx", | |||||
| "816[2-7]xxxxxx", | |||||
| "82[014][2-7]xxxxxx", | |||||
| "82[235-8]x[2-7]xxxxx", | |||||
| "83[03-57-9]x[2-7]xxxxx", | |||||
| "83[126][2-7]xxxxxx", | |||||
| "84[0-24-9]x[2-7]xxxxx", | |||||
| "85xx[2-7]xxxxx", | |||||
| "86[136][2-7]xxxxxx", | |||||
| "86[2457-9]x[2-7]xxxxx", | |||||
| "87[078][2-7]xxxxxx", | |||||
| "87[1-6]x[2-7]xxxxx", | |||||
| "88[1256]x[2-7]xxxxx", | |||||
| "88[34][2-7]xxxxxx", | |||||
| "891[2-7]xxxxxx", | |||||
| "89[2-4]x[2-7]xxxxx"); | |||||
| RangeTree t1 = RangeTree.from(expected); | |||||
| assertThat(t1).containsExactly(expected); | |||||
| assertThat(RangeTree.from(t1.asRangeSet())).containsExactly(expected); | |||||
| } | |||||
| @Test | |||||
| public void testThreadSafety() throws ExecutionException, InterruptedException { | |||||
| // For 10^5 this takes ~500ms. For 10^6 it starts to take non-trivial time (~10 seconds). | |||||
| int numDigits = 5; | |||||
| // At 1000 threads this starts to take non-trivial time. | |||||
| int numThreads = 100; | |||||
| // Collect 10^N ranges from "00..." to "99...", all distinct. | |||||
| List<RangeTree> ranges = Stream | |||||
| .iterate(DigitSequence.zeros(numDigits), DigitSequence::next) | |||||
| .limit((int) Math.pow(10, numDigits)) | |||||
| .map(RangeTreeTest::singletonRange) | |||||
| .collect(Collectors.toCollection(ArrayList::new)); | |||||
| Collections.shuffle(ranges, new Random(1234L)); | |||||
| // Recombining all 10^N ranges should give a single combined block (i.e. "xx..."). Doing it | |||||
| // with high parallelism should test the thread safety of the concurrent interning map. | |||||
| RangeTree combined = new ForkJoinPool(numThreads) | |||||
| .submit(() -> ranges.parallelStream().reduce(RangeTree.empty(), RangeTree::union)) | |||||
| .get(); | |||||
| assertThat(combined).isEqualTo(ranges(Strings.repeat("x", numDigits))); | |||||
| } | |||||
| @AutoValue | |||||
| abstract static class Edge { | |||||
| static Edge of(DfaNode source, DfaNode target, DfaEdge edge) { | |||||
| return new AutoValue_RangeTreeTest_Edge(source, target, edge.getDigitMask()); | |||||
| } | |||||
| abstract DfaNode source(); | |||||
| abstract DfaNode target(); | |||||
| abstract int digitMask(); | |||||
| } | |||||
| // Range tree visitor that captures edges visited (in depth first order) | |||||
| private static final class TestVisitor implements DfaVisitor { | |||||
| List<Edge> visited = new ArrayList<>(); | |||||
| @Override | |||||
| public void visit(DfaNode source, DfaEdge edge, DfaNode target) { | |||||
| visited.add(Edge.of(source, target, edge)); | |||||
| target.accept(this); | |||||
| } | |||||
| } | |||||
| RangeTree ranges(String... s) { | |||||
| return RangeTree.from(specs(s)); | |||||
| } | |||||
| private static RangeSpecification spec(String s) { | |||||
| return RangeSpecification.parse(s); | |||||
| } | |||||
| private static List<RangeSpecification> specs(String... s) { | |||||
| return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList()); | |||||
| } | |||||
| private static Range<DigitSequence> range(String lo, String hi) { | |||||
| return Range.closed(DigitSequence.of(lo), DigitSequence.of(hi)).canonical(domain()); | |||||
| } | |||||
| private static RangeSet<DigitSequence> rangeSetOf(Range<DigitSequence>... r) { | |||||
| return ImmutableRangeSet.copyOf(asList(r)); | |||||
| } | |||||
| private static RangeTree singletonRange(DigitSequence s) { | |||||
| return RangeTree.from(spec(s.toString())); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,57 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.common.truth.Truth8.assertThat; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class PhoneRegionTest { | |||||
| @Test | |||||
| public void testOrdering() { | |||||
| assertThat(Stream.of(r("US"), r("GB"), r("AE"), r("001"), r("KR"), r("MN")).sorted()) | |||||
| .containsAtLeast(r("AE"), r("GB"), r("KR"), r("MN"), r("US"), r("001")) | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testWorld() { | |||||
| assertThat(PhoneRegion.getWorld()).isEqualTo(r("001")); | |||||
| } | |||||
| @Test | |||||
| public void testBadArgs() { | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("ABC"))) | |||||
| .hasMessageThat() | |||||
| .contains("ABC"); | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("us"))) | |||||
| .hasMessageThat() | |||||
| .contains("us"); | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> PhoneRegion.of("000"))) | |||||
| .hasMessageThat() | |||||
| .contains("000"); | |||||
| } | |||||
| private static PhoneRegion r(String cldrCode) { | |||||
| return PhoneRegion.of(cldrCode); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,42 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.i18n; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class SimpleLanguageTagTest { | |||||
| @Test | |||||
| public void testSimple() { | |||||
| assertThat(SimpleLanguageTag.of("en").toString()).isEqualTo("en"); | |||||
| assertThat(SimpleLanguageTag.of("zh_Hant").toString()).isEqualTo("zh-Hant"); | |||||
| } | |||||
| @Test | |||||
| public void testBadArgs() { | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("x"))) | |||||
| .hasMessageThat().contains("x"); | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("EN"))) | |||||
| .hasMessageThat().contains("EN"); | |||||
| assertThat(assertThrows(IllegalArgumentException.class, () -> SimpleLanguageTag.of("003"))) | |||||
| .hasMessageThat().contains("003"); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,82 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.common.truth.Truth8.assertThat; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||||
| import java.util.Optional; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class AltFormatSpecTest { | |||||
| @Test | |||||
| public void testSimple() { | |||||
| FormatTemplate template = FormatTemplate.parse("XXXX XXXX"); | |||||
| RangeSpecification prefix = RangeSpecification.parse("123"); | |||||
| AltFormatSpec spec = AltFormatSpec.create(template, prefix, "foo", Optional.of("Comment")); | |||||
| assertThat(spec.template()).isEqualTo(template); | |||||
| assertThat(spec.prefix()).isEqualTo(prefix); | |||||
| assertThat(spec.parentFormatId()).isEqualTo("foo"); | |||||
| assertThat(spec.comment()).hasValue("Comment"); | |||||
| assertThat(spec.specifier()).isEqualTo("123X XXXX"); | |||||
| } | |||||
| @Test | |||||
| public void testGoodTemplateAndPrefix() { | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "", "XXX XXX"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "123", "123 XXX"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "1234", "123 4XX"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "123456", "123 456"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX**", "123", "123 XXX**"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "12[3-6]", "12[3-6] XXX"); | |||||
| assertGoodTemplateAndPrefix("XXX XXX", "1x3", "1X3 XXX"); | |||||
| } | |||||
| @Test | |||||
| public void testBadTemplateOrPrefix() { | |||||
| // Prefix too long. | |||||
| assertBadTemplateAndPrefix("XXXX", "12345"); | |||||
| // Prefix too long for min length. | |||||
| assertBadTemplateAndPrefix("XXXX**", "12345"); | |||||
| // Bad template chars. | |||||
| assertBadTemplateAndPrefix("XXX-XXX", "123"); | |||||
| // Extra whitespace. | |||||
| assertBadTemplateAndPrefix(" XXXXXX", "123"); | |||||
| // Prefix must not end with "any digit". | |||||
| assertBadTemplateAndPrefix(" XXXXXX", "123xx"); | |||||
| } | |||||
| private static void assertGoodTemplateAndPrefix(String template, String prefix, String spec) { | |||||
| FormatTemplate t = FormatTemplate.parse(template); | |||||
| RangeSpecification p = RangeSpecification.parse(prefix); | |||||
| assertThat(AltFormatSpec.create(t, p, "foo", Optional.empty()).specifier()).isEqualTo(spec); | |||||
| } | |||||
| private static void assertBadTemplateAndPrefix(String template, String prefix) { | |||||
| FormatTemplate t = FormatTemplate.parse(template); | |||||
| RangeSpecification p = RangeSpecification.parse(prefix); | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> AltFormatSpec.create(t, p, "foo", Optional.empty())); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,111 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.base.Joiner; | |||||
| import com.google.common.base.Splitter; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import java.io.IOException; | |||||
| import java.io.StringReader; | |||||
| import java.io.StringWriter; | |||||
| import java.util.Arrays; | |||||
| import java.util.List; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class AltFormatsSchemaTest { | |||||
| @Test | |||||
| public void testSimple_export() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| altFormat("123 XXX XXXX", "foo", "Hello World"))) | |||||
| .containsExactly( | |||||
| "Format ; Parent Format ; Comment", | |||||
| "123 XXX XXXX ; foo ; \"Hello World\"") | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testSimple_import() throws IOException { | |||||
| assertThat( | |||||
| importCsv( | |||||
| "Format ; Parent Format ; Comment", | |||||
| "123 XXX XXXX ; foo ; \"Hello World\"")) | |||||
| .containsExactly( | |||||
| altFormat("123 XXX XXXX", "foo", "Hello World")); | |||||
| } | |||||
| @Test | |||||
| public void testEscapedText_export() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\"))) | |||||
| .containsExactly( | |||||
| "Format ; Parent Format ; Comment", | |||||
| "123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\"") | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testEscapedText_import() throws IOException { | |||||
| assertThat( | |||||
| importCsv( | |||||
| "Format ; Parent Format ; Comment", | |||||
| "123 XXX XXXX ; foo ; \"\\tHello\\nWorld\\\\\"")) | |||||
| .containsExactly( | |||||
| altFormat("123 XXX XXXX", "foo", "\tHello\nWorld\\")); | |||||
| } | |||||
| @Test | |||||
| public void testRetainsExplicitOrdering() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| altFormat("123 XXXXXX", "foo", "First"), | |||||
| altFormat("XX XXXX", "bar", "Second"), | |||||
| altFormat("9X XXX XXX", "baz", "Third"))) | |||||
| .containsExactly( | |||||
| "Format ; Parent Format ; Comment", | |||||
| "123 XXXXXX ; foo ; \"First\"", | |||||
| "XX XXXX ; bar ; \"Second\"", | |||||
| "9X XXX XXX ; baz ; \"Third\"") | |||||
| .inOrder(); | |||||
| } | |||||
| private AltFormatSpec altFormat(String spec, String parentId, String comment) { | |||||
| return AltFormatsSchema.parseAltFormat(spec, parentId, comment); | |||||
| } | |||||
| private static List<String> exportCsv(AltFormatSpec... altFormats) throws IOException { | |||||
| try (StringWriter out = new StringWriter()) { | |||||
| AltFormatsSchema.exportCsv(out, Arrays.asList(altFormats)); | |||||
| // Ignore trailing empty lines. | |||||
| return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString())); | |||||
| } | |||||
| } | |||||
| private static ImmutableList<AltFormatSpec> importCsv(String... lines) | |||||
| throws IOException { | |||||
| // Add a trailing newline, since that's what we expect in the real CSV files. | |||||
| StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n"); | |||||
| return AltFormatsSchema.importAltFormats(file); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,156 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.anchor; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_MOBILE; | |||||
| import com.google.common.base.CharMatcher; | |||||
| import com.google.common.base.Joiner; | |||||
| import com.google.common.base.Splitter; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment.Anchor; | |||||
| import java.io.IOException; | |||||
| import java.io.StringReader; | |||||
| import java.io.StringWriter; | |||||
| import java.util.Arrays; | |||||
| import java.util.List; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class CommentsSchemaTest { | |||||
| private static final PhoneRegion REGION_US = PhoneRegion.of("US"); | |||||
| private static final PhoneRegion REGION_CA = PhoneRegion.of("CA"); | |||||
| private static final Anchor US_TOP = Comment.anchor(REGION_US); | |||||
| private static final Anchor US_FIXED_LINE = anchor(REGION_US, XML_FIXED_LINE); | |||||
| private static final Anchor US_MOBILE = anchor(REGION_US, XML_MOBILE); | |||||
| private static final Anchor US_SHORTCODE = Comment.shortcodeAnchor(REGION_US); | |||||
| private static final Anchor CA_FIXED_LINE = anchor(REGION_CA, XML_FIXED_LINE); | |||||
| @Test | |||||
| public void testSimple_export() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| comment(US_FIXED_LINE, "Hello World"))) | |||||
| .containsExactly( | |||||
| "Region ; Label ; Comment", | |||||
| "US ; XML_FIXED_LINE ; \"Hello World\"") | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testSimple_import() throws IOException { | |||||
| assertThat( | |||||
| importCsv( | |||||
| "Region ; Label ; Comment", | |||||
| "US ; XML_FIXED_LINE ; \"Hello World\"")) | |||||
| .containsExactly( | |||||
| comment(US_FIXED_LINE, "Hello World")); | |||||
| } | |||||
| @Test | |||||
| public void testEscapedText_export() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| comment(US_FIXED_LINE, "\tHello", "World\\"))) | |||||
| .containsExactly( | |||||
| "Region ; Label ; Comment", | |||||
| "US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\"") | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testEscapedText_import() throws IOException { | |||||
| assertThat( | |||||
| importCsv( | |||||
| "Region ; Label ; Comment", | |||||
| "US ; XML_FIXED_LINE ; \"\\tHello\\nWorld\\\\\"")) | |||||
| .containsExactly( | |||||
| comment(US_FIXED_LINE, "\tHello", "World\\")); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering_export() throws IOException { | |||||
| assertThat( | |||||
| exportCsv( | |||||
| comment(US_FIXED_LINE, "First"), | |||||
| comment(US_FIXED_LINE, "Second"), | |||||
| comment(US_FIXED_LINE, "Third"), | |||||
| comment(US_TOP, "Top Level Comment"), | |||||
| comment(US_SHORTCODE, "Shortcode Comment"), | |||||
| comment(US_MOBILE, "Other Type"), | |||||
| comment(CA_FIXED_LINE, "Other Region"))) | |||||
| .containsExactly( | |||||
| "Region ; Label ; Comment", | |||||
| "CA ; XML_FIXED_LINE ; \"Other Region\"", | |||||
| "US ; SC ; \"Shortcode Comment\"", | |||||
| "US ; XML ; \"Top Level Comment\"", | |||||
| "US ; XML_FIXED_LINE ; \"First\"", | |||||
| "US ; XML_FIXED_LINE ; \"Second\"", | |||||
| "US ; XML_FIXED_LINE ; \"Third\"", | |||||
| "US ; XML_MOBILE ; \"Other Type\"") | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering_import() throws IOException { | |||||
| assertThat( | |||||
| importCsv( | |||||
| "Region ; Label ; Comment", | |||||
| "US ; XML_FIXED_LINE ; \"First\"", | |||||
| "US ; XML_FIXED_LINE ; \"Second\"", | |||||
| "US ; XML_FIXED_LINE ; \"Third\"", | |||||
| "US ; XML ; \"Top Level Comment\"", | |||||
| "US ; SC ; \"Shortcode Comment\"", | |||||
| "US ; XML_MOBILE ; \"Other Type\"", | |||||
| "CA ; XML_FIXED_LINE ; \"Other Region\"")) | |||||
| .containsExactly( | |||||
| comment(CA_FIXED_LINE, "Other Region"), | |||||
| comment(US_SHORTCODE, "Shortcode Comment"), | |||||
| comment(US_TOP, "Top Level Comment"), | |||||
| comment(US_FIXED_LINE, "First"), | |||||
| comment(US_FIXED_LINE, "Second"), | |||||
| comment(US_FIXED_LINE, "Third"), | |||||
| comment(US_MOBILE, "Other Type")) | |||||
| .inOrder(); | |||||
| } | |||||
| private Comment comment(Anchor a, String... lines) { | |||||
| return Comment.create(a, Arrays.asList(lines)); | |||||
| } | |||||
| private static List<String> exportCsv(Comment... comments) throws IOException { | |||||
| try (StringWriter out = new StringWriter()) { | |||||
| CommentsSchema.exportCsv(out, Arrays.asList(comments)); | |||||
| // Ignore trailing empty lines. | |||||
| return Splitter.on('\n').splitToList(CharMatcher.is('\n').trimTrailingFrom(out.toString())); | |||||
| } | |||||
| } | |||||
| private static ImmutableList<Comment> importCsv(String... lines) | |||||
| throws IOException { | |||||
| // Add a trailing newline, since that's what we expect in the real CSV files. | |||||
| StringReader file = new StringReader(Joiner.on('\n').join(lines) + "\n"); | |||||
| return CommentsSchema.importComments(file); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,160 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.model; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.common.truth.Truth8.assertThat; | |||||
| import static java.util.Optional.empty; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec.FormatTemplate; | |||||
| import java.util.Optional; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class FormatSpecTest { | |||||
| @Test | |||||
| public void testCreate_national() { | |||||
| national("XXXX"); | |||||
| national("XXX***"); | |||||
| national("#XXX XXX"); | |||||
| national("(#XXX) XX**-XXX"); | |||||
| assertThat(national("XX\\XXX").national().skeleton()).isEqualTo("$1X$2"); | |||||
| } | |||||
| @Test | |||||
| public void testCreate_international() { | |||||
| // The international spec can be a duplicate (signifies international formatting is permitted). | |||||
| international("XXX XXXX", "XXX XXXX"); | |||||
| // Or it can be different (including grouping and separators). | |||||
| international("(#XXX) XXXX", "XXX-XXXX"); | |||||
| } | |||||
| @Test | |||||
| public void testCreate_carrier() { | |||||
| carrier("# XXX XXXX", "# @ XXX XXXX"); | |||||
| carrier("XXX XXXX", "@ XXX XXXX"); | |||||
| // Carrier and national prefix can differ on whether national prefix is needed. | |||||
| carrier("XXX XXXX", "#@ XXX XXXX"); | |||||
| } | |||||
| @Test | |||||
| public void testCreate_national_bad() { | |||||
| assertThrows(IllegalArgumentException.class, () -> national("")); | |||||
| assertThrows(IllegalArgumentException.class, () -> national("Hello")); | |||||
| assertThrows(IllegalArgumentException.class, () -> national("$1")); | |||||
| assertThrows(IllegalArgumentException.class, () -> national("XX**XX")); | |||||
| assertThrows(IllegalArgumentException.class, () -> national("****")); | |||||
| assertThrows(IllegalArgumentException.class, () -> national("@ XXX")); | |||||
| } | |||||
| @Test | |||||
| public void testCreate_international_bad() { | |||||
| // National prefix is not allowed. | |||||
| assertThrows(IllegalArgumentException.class, () -> international("#XXXX", "#XXXX")); | |||||
| // Groups must match. | |||||
| assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XX XX")); | |||||
| assertThrows(IllegalArgumentException.class, () -> international("# XXXX", "XXX")); | |||||
| } | |||||
| @Test | |||||
| public void testCreate_carrier_bad() { | |||||
| // Carrier specs must have '@' present. | |||||
| assertThrows(IllegalArgumentException.class, () -> carrier("XXX XXXX", "XXX XXXX")); | |||||
| // Carrier specs cannot differ after the first group (including separator). | |||||
| assertThrows(IllegalArgumentException.class, () -> carrier("#XXX XXXX", "#@XXX-XXXX")); | |||||
| // National prefix (if present) must come first (if this is ever relaxed, we would need to | |||||
| // change how carrier prefixes are handled and how nationalPrefixForParsing is generated). | |||||
| assertThrows(IllegalArgumentException.class, () -> carrier("# XXX XXXX", "@# XXX XXXX")); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_splitPrefix() { | |||||
| FormatTemplate t = FormatTemplate.parse("(#) XXX - XXX**"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{3,5})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$1 - $2"); | |||||
| assertThat(t.getXmlPrefix()).hasValue("($NP) $FG"); | |||||
| assertThat(t.hasNationalPrefix()).isTrue(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_noPrefix() { | |||||
| FormatTemplate t = FormatTemplate.parse("XXX XX-XX"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$1 $2-$3"); | |||||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_replacementNoNationalPrefix() { | |||||
| FormatTemplate t = FormatTemplate.parse("{XXX>123} XX-XX"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$2-$3"); | |||||
| assertThat(t.getXmlPrefix()).hasValue("123 $FG"); | |||||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_replacementWithNationalPrefix() { | |||||
| FormatTemplate t = FormatTemplate.parse("#{XXX>123} XX-XX"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$2-$3"); | |||||
| assertThat(t.getXmlPrefix()).hasValue("$NP123 $FG"); | |||||
| assertThat(t.hasNationalPrefix()).isTrue(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_replacementNotFirstGroup() { | |||||
| FormatTemplate t = FormatTemplate.parse("XXX {XX>ABC} XX"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{3})(\\d{2})(\\d{2})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$1 ABC $3"); | |||||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| @Test | |||||
| public void testTemplate_removeFirstGroupViaReplacement() { | |||||
| // This test is very important for Argentina, where the leading group must be removed (and a | |||||
| // different mobile token is used after the area code). | |||||
| FormatTemplate t = FormatTemplate.parse("{XX>}XXX XXXX"); | |||||
| assertThat(t.getXmlCapturingPattern()).isEqualTo("(\\d{2})(\\d{3})(\\d{4})"); | |||||
| assertThat(t.getXmlFormat()).isEqualTo("$2 $3"); | |||||
| assertThat(t.getXmlPrefix()).isEmpty(); | |||||
| assertThat(t.hasNationalPrefix()).isFalse(); | |||||
| assertThat(t.hasCarrierCode()).isFalse(); | |||||
| } | |||||
| private static FormatSpec national(String national) { | |||||
| return FormatSpec.of(national, empty(), empty(), empty(), false, empty()); | |||||
| } | |||||
| private static FormatSpec international(String national, String intl) { | |||||
| return FormatSpec.of(national, empty(), Optional.of(intl), empty(), false, empty()); | |||||
| } | |||||
| private static FormatSpec carrier(String national, String carrier) { | |||||
| return FormatSpec.of(national, Optional.of(carrier), empty(), empty(), false, empty()); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,70 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.common.truth.Truth8.assertThat; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class AssignmentTest { | |||||
| private static final Column<String> COL_A = Column.ofString("A"); | |||||
| private static final Column<String> COL_B = Column.ofString("B"); | |||||
| private static final Column<Integer> COL_X = Column.ofUnsignedInteger("X"); | |||||
| private static final Schema SCHEMA = Schema.builder().add(COL_A).add(COL_B).add(COL_X).build(); | |||||
| @Test | |||||
| public void testParsing() { | |||||
| assertAssignment(Assignment.parse("A=foo", SCHEMA), COL_A, "foo"); | |||||
| assertAssignment(Assignment.parse(" B = bar ", SCHEMA), COL_B, "bar"); | |||||
| assertUnassignment(Assignment.parse("A=", SCHEMA), COL_A); | |||||
| assertAssignment(Assignment.parse("X=23", SCHEMA), COL_X, 23); | |||||
| assertThrows(IllegalArgumentException.class, () -> Assignment.parse("C=Nope", SCHEMA)); | |||||
| assertThrows(IllegalArgumentException.class, () -> Assignment.parse("X=NaN", SCHEMA)); | |||||
| } | |||||
| @Test | |||||
| public void testOf() { | |||||
| assertAssignment(Assignment.of(COL_A, "foo"), COL_A, "foo"); | |||||
| assertThat(Assignment.of(COL_A, "foo")).isNotEqualTo(Assignment.of(COL_A, "bar")); | |||||
| assertThat(Assignment.of(COL_A, "")).isNotEqualTo(Assignment.of(COL_B, "")); | |||||
| assertThat(Assignment.of(COL_A, COL_A.defaultValue())).isNotEqualTo(Assignment.unassign(COL_A)); | |||||
| assertThrows(NullPointerException.class, () -> Assignment.of(COL_A, null)); | |||||
| } | |||||
| @Test | |||||
| public void testUnassign() { | |||||
| // Not much else to do here... | |||||
| assertThat(Assignment.unassign(COL_A)).isEqualTo(Assignment.unassign(COL_A)); | |||||
| assertUnassignment(Assignment.unassign(COL_A), COL_A); | |||||
| } | |||||
| private static <T extends Comparable<T>> void assertAssignment( | |||||
| Assignment<?> a, Column<T> c, T v) { | |||||
| assertThat(a.column()).isSameInstanceAs(c); | |||||
| assertThat(a.value()).hasValue(v); | |||||
| } | |||||
| private static void assertUnassignment(Assignment<?> a, Column<?> c) { | |||||
| assertThat(a.column()).isSameInstanceAs(c); | |||||
| assertThat(a.value()).isEmpty(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,71 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||||
| import static java.util.Arrays.asList; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import java.util.Arrays; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class ChangeTest { | |||||
| private static final Column<String> COL_A = Column.ofString("A"); | |||||
| private static final Column<String> COL_B = Column.ofString("B"); | |||||
| @Test | |||||
| public void testEmpty() { | |||||
| assertThat(Change.empty().getRanges()).isEmpty(); | |||||
| assertThat(Change.empty().getAssignments()).isEmpty(); | |||||
| // Not all "no-op" changes are equal to the "empty" change (unlike RangeTree). This should be | |||||
| // fine however since Changes are expected to have a very short lifecycle in most code and not | |||||
| // be used as keys in maps etc... | |||||
| assertThat(Change.empty()) | |||||
| .isNotEqualTo(Change.builder(RangeTree.empty()).assign(COL_A, "foo").build()); | |||||
| assertThat(Change.empty()).isNotEqualTo(Change.builder(ranges("12xxxx")).build()); | |||||
| } | |||||
| @Test | |||||
| public void testBuilder() { | |||||
| Change c = Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_B, "bar").build(); | |||||
| assertThat(c.getRanges()).containsExactly("12xxxx"); | |||||
| Assignment<String> assignFoo = Assignment.of(COL_A, "foo"); | |||||
| Assignment<String> assignBar = Assignment.of(COL_B, "bar"); | |||||
| assertThat(c.getAssignments()).containsExactly(assignFoo, assignBar); | |||||
| assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(assignFoo, assignBar))); | |||||
| // Don't allow same column twice (this could be relaxed in future if necessary)! | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> Change.builder(ranges("12xxxx")).assign(COL_A, "foo").assign(COL_A, "bar").build()); | |||||
| } | |||||
| @Test | |||||
| public void testBuilderUnassignment() { | |||||
| Change c = Change.builder(ranges("12xxxx")).unassign(COL_A).build(); | |||||
| Assignment<String> unassign = Assignment.unassign(COL_A); | |||||
| assertThat(c.getAssignments()).containsExactly(unassign); | |||||
| assertThat(c).isEqualTo(Change.of(ranges("12xxxx"), asList(unassign))); | |||||
| } | |||||
| private static RangeTree ranges(String... rangeSpecs) { | |||||
| return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,58 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class ColumnGroupTest { | |||||
| @Test | |||||
| public void testGroupColumns() { | |||||
| Column<Boolean> prototype = Column.ofBoolean("Region"); | |||||
| ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype); | |||||
| Column<Boolean> us = group.getColumnFromId("US"); | |||||
| assertThat(us.getName()).isEqualTo("Region:US"); | |||||
| assertThat(us.type()).isEqualTo(Boolean.class); | |||||
| Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA")); | |||||
| assertThat(ca.getName()).isEqualTo("Region:CA"); | |||||
| // Only the suffix part should be given to get the column from the group. | |||||
| assertThrows(IllegalArgumentException.class, () -> group.getColumnFromId("Region:US")); | |||||
| } | |||||
| @Test | |||||
| public void testExtractGroupColumns() { | |||||
| Column<String> first = Column.ofString("FirstColumn"); | |||||
| Column<String> last = Column.ofString("LastColumn"); | |||||
| Column<Boolean> prototype = Column.ofBoolean("Region"); | |||||
| ColumnGroup<PhoneRegion, Boolean> group = ColumnGroup.byRegion(prototype); | |||||
| Column<Boolean> us = group.getColumnFromId("US"); | |||||
| Column<Boolean> ca = group.getColumn(PhoneRegion.of("CA")); | |||||
| // The prototype is a valid column, but it's not part of its own group. | |||||
| assertThat(group.extractGroupColumns(ImmutableSet.of(first, us, prototype, ca, last))) | |||||
| .containsExactly(PhoneRegion.of("US"), us, PhoneRegion.of("CA"), ca).inOrder(); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,93 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType.XML_UNKNOWN; | |||||
| import static java.lang.Boolean.FALSE; | |||||
| import static java.lang.Boolean.TRUE; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.XmlNumberType; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class ColumnTest { | |||||
| @Test | |||||
| public void testBooleanColumn() { | |||||
| Column<Boolean> column = Column.ofBoolean("bool"); | |||||
| assertThat(column.getName()).isEqualTo("bool"); | |||||
| assertThat(column.type()).isEqualTo(Boolean.class); | |||||
| assertThat(column.cast(true)).isTrue(); | |||||
| assertThrows(ClassCastException.class, () -> column.cast("")); | |||||
| // All upper or all lower case are accepted. | |||||
| assertThat(column.parse("true")).isTrue(); | |||||
| assertThat(column.parse("false")).isFalse(); | |||||
| assertThat(column.parse("TRUE")).isTrue(); | |||||
| assertThat(column.parse("FALSE")).isFalse(); | |||||
| assertThat(column.serialize(TRUE)).isEqualTo("true"); | |||||
| assertThat(column.serialize(FALSE)).isEqualTo("false"); | |||||
| // We're lenient, but not that lenient. | |||||
| assertThrows(IllegalArgumentException.class, () -> column.parse("TruE")); | |||||
| assertThrows(IllegalArgumentException.class, () -> column.parse("FaLse")); | |||||
| assertThrows(IllegalArgumentException.class, () -> Column.ofBoolean("Foo:Bar")); | |||||
| } | |||||
| @Test | |||||
| public void testStringColumn() { | |||||
| Column<String> column = Column.ofString("string"); | |||||
| assertThat(column.getName()).isEqualTo("string"); | |||||
| assertThat(column.type()).isEqualTo(String.class); | |||||
| assertThat(column.cast("hello")).isEqualTo("hello"); | |||||
| assertThat(column.parse("")).isNull(); | |||||
| assertThrows(ClassCastException.class, () -> column.cast(true)); | |||||
| // Anything other than the empty string is permitted. | |||||
| assertThat(column.parse("world")).isEqualTo("world"); | |||||
| assertThat(column.serialize("world")).isEqualTo("world"); | |||||
| // Unquoted whitespace is stripped. | |||||
| assertThat(column.parse(" world ")).isEqualTo("world"); | |||||
| // You can preserve whitespace by surrounding the string with double quotes. | |||||
| assertThat(column.parse("\" world \"")).isEqualTo(" world "); | |||||
| assertThat(column.serialize(" world ")).isEqualTo("\" world \""); | |||||
| // And null is always the empty string. | |||||
| assertThat(column.serialize(null)).isEqualTo(""); | |||||
| assertThrows(IllegalArgumentException.class, () -> Column.ofString("Foo:Bar")); | |||||
| } | |||||
| @Test | |||||
| public void testEnumColumn() { | |||||
| Column<ValidNumberType> column = Column.of(ValidNumberType.class, "type", UNKNOWN); | |||||
| assertThat(column.getName()).isEqualTo("type"); | |||||
| assertThat(column.type()).isEqualTo(ValidNumberType.class); | |||||
| assertThat(column.cast(FIXED_LINE)).isEqualTo(FIXED_LINE); | |||||
| assertThrows(ClassCastException.class, () -> column.cast("")); | |||||
| // Several case formats are supported. | |||||
| assertThat(column.parse("FIXED_LINE")).isEqualTo(FIXED_LINE); | |||||
| assertThat(column.parse("fixed_line")).isEqualTo(FIXED_LINE); | |||||
| assertThat(column.parse("fixedLine")).isEqualTo(FIXED_LINE); | |||||
| // We're lenient, but not that lenient. | |||||
| assertThrows(IllegalArgumentException.class, () -> column.parse("fIxEdLiNe")); | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> Column.of(XmlNumberType.class, "Foo:Bar", XML_UNKNOWN)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,177 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvParser.rowMapper; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.i18n.phonenumbers.metadata.table.CsvParser.RowMapper; | |||||
| import java.util.ArrayList; | |||||
| import java.util.List; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class CsvParserTest { | |||||
| @Test | |||||
| public void testSimple() { | |||||
| // Simplest case. | |||||
| assertSingleRow(CsvParser.commaSeparated(), "Hello,World!", "Hello", "World!"); | |||||
| // Empty row yields one empty value in the "first column" (matches behaviour with quoting). | |||||
| assertSingleRow(CsvParser.commaSeparated(), "", ""); | |||||
| assertSingleRow(CsvParser.commaSeparated(), "\"\"", ""); | |||||
| // Trailing delimiter yields a trailing empty value (matches behaviour with quoting). | |||||
| assertSingleRow(CsvParser.commaSeparated(), "foo,", "foo", ""); | |||||
| assertSingleRow(CsvParser.commaSeparated(), "foo,\"\"", "foo", ""); | |||||
| } | |||||
| @Test | |||||
| public void testOtherDelimiters() { | |||||
| // Tabs sequences are not "folded" (maybe this could be an option?) | |||||
| assertSingleRow(CsvParser.tabSeparated(), "Hello\t\tWorld!", "Hello", "", "World!"); | |||||
| assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!"); | |||||
| } | |||||
| @Test | |||||
| public void testWhitespaceTrimming() { | |||||
| // Whitespace is preserved by default, but can be trimmed. | |||||
| assertSingleRow(CsvParser.commaSeparated(), | |||||
| " foo, bar, baz ", " foo", " bar", " baz "); | |||||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||||
| " foo, bar, baz ", "foo", "bar", "baz"); | |||||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||||
| " foo, , ", "foo", "", ""); | |||||
| } | |||||
| @Test | |||||
| public void testQuoting() { | |||||
| // Quoting works as expected (and combines with whitespace trimming). | |||||
| assertSingleRow(CsvParser.commaSeparated(), | |||||
| "\"foo\",\"\"\"bar, baz\"\"\"", "foo", "\"bar, baz\""); | |||||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), | |||||
| " \"foo\" , \"\"\"bar, baz\"\"\" ", "foo", "\"bar, baz\""); | |||||
| } | |||||
| @Test | |||||
| public void testQuoting_illegal() { | |||||
| // Without whitespace trimming any quotes in "unquoted" values are not permitted. | |||||
| assertThrows(IllegalArgumentException.class, () -> | |||||
| parse(CsvParser.commaSeparated(), "foo, \"bar, baz\"")); | |||||
| } | |||||
| @Test | |||||
| public void testDelimiter() { | |||||
| assertSingleRow(CsvParser.tabSeparated(), "Hello\tWorld!", "Hello", "World!"); | |||||
| assertSingleRow(CsvParser.withSeparator(';'), "Hello;World!", "Hello", "World!"); | |||||
| } | |||||
| @Test | |||||
| public void testUnicode() { | |||||
| assertSingleRow(CsvParser.withSeparator('-'), "😱-😂-💩", "😱", "😂", "💩"); | |||||
| assertSingleRow(CsvParser.commaSeparated(), "\0,😱😂,\n", "\0", "😱😂", "\n"); | |||||
| // Fun fact, not all ISO control codes count as "whitespace". | |||||
| assertSingleRow(CsvParser.commaSeparated().trimWhitespace(), "\0,😱😂,\n", "\0", "😱😂", ""); | |||||
| } | |||||
| @Test | |||||
| public void testMultiline() { | |||||
| // Newlines become literals in quoted values. | |||||
| List<List<String>> rows = parse(CsvParser.commaSeparated().allowMultiline(), | |||||
| "foo,\"Hello,", | |||||
| "World!\""); | |||||
| assertThat(rows).hasSize(1); | |||||
| assertThat(rows.get(0)).containsExactly("foo", "Hello,\nWorld!").inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testMultilineWithTrimming() { | |||||
| List<List<String>> rows = parse( | |||||
| CsvParser.commaSeparated().allowMultiline().trimWhitespace(), | |||||
| " foo , \" Hello,", | |||||
| "World! \" "); | |||||
| assertThat(rows).hasSize(1); | |||||
| assertThat(rows.get(0)).containsExactly("foo", " Hello,\nWorld! ").inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testMultiline_illegal() { | |||||
| // If not configured for multiline values, this is an unterminated quoted value. | |||||
| assertThrows(IllegalArgumentException.class, () -> | |||||
| parse(CsvParser.commaSeparated(), "foo,\"Hello,", "World!\"")); | |||||
| // This fails because no more lines exist (even if multiline is allowed) | |||||
| assertThrows(IllegalArgumentException.class, () -> | |||||
| parse(CsvParser.commaSeparated().allowMultiline(), "foo,\"Hello,")); | |||||
| } | |||||
| @Test | |||||
| public void testRowMapping() { | |||||
| List<ImmutableMap<String, String>> rows = parseMap( | |||||
| CsvParser.commaSeparated(), | |||||
| rowMapper(), | |||||
| "FOO,BAR", | |||||
| "foo,bar", | |||||
| "Hello,World!", | |||||
| "No Trailing,", | |||||
| ",", | |||||
| ""); | |||||
| assertThat(rows).hasSize(5); | |||||
| assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder(); | |||||
| assertThat(rows.get(1)).containsExactly("FOO", "Hello", "BAR", "World!").inOrder(); | |||||
| assertThat(rows.get(2)).containsExactly("FOO", "No Trailing").inOrder(); | |||||
| assertThat(rows.get(3)).isEmpty(); | |||||
| assertThat(rows.get(4)).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testRowMapping_withHeader() { | |||||
| List<String> header = new ArrayList<>(); | |||||
| List<ImmutableMap<String, String>> rows = parseMap( | |||||
| CsvParser.commaSeparated(), | |||||
| rowMapper(header::addAll), | |||||
| "FOO,BAR", | |||||
| "foo,bar"); | |||||
| assertThat(rows).hasSize(1); | |||||
| assertThat(header).containsExactly("FOO", "BAR").inOrder(); | |||||
| assertThat(rows.get(0)).containsExactly("FOO", "foo", "BAR", "bar").inOrder(); | |||||
| } | |||||
| private void assertSingleRow(CsvParser parser, String line, String... values) { | |||||
| List<List<String>> rows = parse(parser, line); | |||||
| assertThat(rows).hasSize(1); | |||||
| assertThat(rows.get(0)).containsExactlyElementsIn(values).inOrder(); | |||||
| } | |||||
| private static List<List<String>> parse(CsvParser parser, String... lines) { | |||||
| List<List<String>> rows = new ArrayList<>(); | |||||
| parser.parse(Stream.of(lines), r -> rows.add(r.collect(toImmutableList()))); | |||||
| return rows; | |||||
| } | |||||
| private static List<ImmutableMap<String, String>> parseMap( | |||||
| CsvParser p, RowMapper mapper, String... lines) { | |||||
| List<ImmutableMap<String, String>> rows = new ArrayList<>(); | |||||
| p.parse(Stream.of(lines), mapper.mapTo(rows::add)); | |||||
| return rows; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,275 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.AREA_CODE_LENGTH; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.COMMENT; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.FIXED_LINE_OR_MOBILE; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType.MOBILE; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.FORMAT; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.REGIONS; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TABLE_COLUMNS; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.TYPE; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toCsv; | |||||
| import static com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.toRangeTable; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.ALL; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.CHANGES; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.LHS; | |||||
| import static com.google.i18n.phonenumbers.metadata.table.CsvTable.DiffMode.RHS; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.common.collect.HashBasedTable; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ExamplesTableSchema.ExampleNumberKey; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import java.io.IOException; | |||||
| import java.io.PrintWriter; | |||||
| import java.io.StringReader; | |||||
| import java.io.StringWriter; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.IntStream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class CsvTableTest { | |||||
| private static final CsvKeyMarshaller<String> TEST_MARSHALLER = | |||||
| CsvKeyMarshaller.ofSortedString("Id"); | |||||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||||
| @Test | |||||
| public void testRangeTableExport() throws IOException { | |||||
| ImmutableList<Column<?>> columns = | |||||
| ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_CA, REGION_US, COMMENT); | |||||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||||
| .apply(row(columns, key("1", 7), MOBILE, 0, true, true)) | |||||
| .apply(row(columns, key("2x[34]", 7, 8), FIXED_LINE_OR_MOBILE, 0, true, null, "Foo Bar")) | |||||
| .apply(row(columns, key("345", 8), FIXED_LINE, 3, true, null)) | |||||
| .apply(row(columns, key("456x8", 8), FIXED_LINE, 3, null, true)) | |||||
| .build(); | |||||
| CsvTable<RangeKey> csv = toCsv(table); | |||||
| assertCsv(csv, | |||||
| "Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment", | |||||
| "1 ; 7 ; MOBILE ; 0 ; \"CA,US\"", | |||||
| "2x[34] ; 7,8 ; FIXED_LINE_OR_MOBILE ; 0 ; \"CA\" ; \"Foo Bar\"", | |||||
| "345 ; 8 ; FIXED_LINE ; 3 ; \"CA\"", | |||||
| "456x8 ; 8 ; FIXED_LINE ; 3 ; \"US\""); | |||||
| assertThat(toRangeTable(csv)).isEqualTo(table); | |||||
| } | |||||
| @Test | |||||
| public void testExampleNumberExport() throws IOException { | |||||
| Table<PhoneRegion, ValidNumberType, DigitSequence> table = HashBasedTable.create(); | |||||
| table.put(PhoneRegion.of("US"), ValidNumberType.TOLL_FREE, DigitSequence.of("800123456")); | |||||
| table.put(PhoneRegion.of("US"), ValidNumberType.PREMIUM_RATE, DigitSequence.of("945123456")); | |||||
| table.put(PhoneRegion.of("CA"), ValidNumberType.MOBILE, DigitSequence.of("555123456")); | |||||
| // Ordering is well defined in the CSV output. | |||||
| // TODO: Consider making columns able to identify if their values need CSV escaping. | |||||
| CsvTable<ExampleNumberKey> csv = ExamplesTableSchema.toCsv(table); | |||||
| assertCsv(csv, | |||||
| "Region ; Type ; Number", | |||||
| "CA ; MOBILE ; \"555123456\"", | |||||
| "US ; TOLL_FREE ; \"800123456\"", | |||||
| "US ; PREMIUM_RATE ; \"945123456\""); | |||||
| assertThat(ExamplesTableSchema.toExampleTable(csv)).isEqualTo(table); | |||||
| } | |||||
| @Test | |||||
| public void testDiff() throws IOException { | |||||
| ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT); | |||||
| RangeTable lhs = RangeTable.builder(TABLE_COLUMNS) | |||||
| .apply(row(columns, key("1", 6), "Left Side Only")) | |||||
| .apply(row(columns, key("3", 6), "Left Value")) | |||||
| .apply(row(columns, key("4", 6), "Same Value")) | |||||
| .build(); | |||||
| RangeTable rhs = RangeTable.builder(TABLE_COLUMNS) | |||||
| .apply(row(columns, key("2", 6), "Right Side Only")) | |||||
| .apply(row(columns, key("3", 6), "Right Value")) | |||||
| .apply(row(columns, key("4", 6), "Same Value")) | |||||
| .build(); | |||||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), ALL), | |||||
| "Diff ; Prefix ; Length ; Comment", | |||||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||||
| ">>>> ; 3 ; 6 ; \"Right Value\"", | |||||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), CHANGES), | |||||
| "Diff ; Prefix ; Length ; Comment", | |||||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||||
| ">>>> ; 3 ; 6 ; \"Right Value\""); | |||||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), LHS), | |||||
| "Diff ; Prefix ; Length ; Comment", | |||||
| "---- ; 1 ; 6 ; \"Left Side Only\"", | |||||
| "<<<< ; 3 ; 6 ; \"Left Value\"", | |||||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||||
| assertCsv(CsvTable.diff(toCsv(lhs), toCsv(rhs), RHS), | |||||
| "Diff ; Prefix ; Length ; Comment", | |||||
| "++++ ; 2 ; 6 ; \"Right Side Only\"", | |||||
| ">>>> ; 3 ; 6 ; \"Right Value\"", | |||||
| "==== ; 4 ; 6 ; \"Same Value\""); | |||||
| } | |||||
| @Test | |||||
| public void testEscaping() throws IOException { | |||||
| ImmutableList<Column<?>> columns = ImmutableList.of(COMMENT); | |||||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||||
| .apply(row(columns, key("1", 6), "Doubling \" Double Quotes")) | |||||
| .apply(row(columns, key("2", 6), "Escaping \n Newlines")) | |||||
| .apply(row(columns, key("3", 6), "Other \t \\ \r Escaping")) | |||||
| .build(); | |||||
| assertCsv(toCsv(table), | |||||
| "Prefix ; Length ; Comment", | |||||
| "1 ; 6 ; \"Doubling \"\" Double Quotes\"", | |||||
| "2 ; 6 ; \"Escaping \\n Newlines\"", | |||||
| "3 ; 6 ; \"Other \\t \\\\ \\r Escaping\""); | |||||
| } | |||||
| @Test | |||||
| public void testOrdering() throws IOException { | |||||
| // This came up in relation to discovering that ImmutableSet.copyOf(TreeBasedTable) does not | |||||
| // result in rows/columns in the order of the TreeBasedTable's column comparator. Hence the | |||||
| // code does a copy via a temporary ImmutableTable.Builder. | |||||
| ImmutableList<Column<?>> columns = | |||||
| ImmutableList.of(TYPE, AREA_CODE_LENGTH, REGION_US, COMMENT); | |||||
| RangeTable table = RangeTable.builder(TABLE_COLUMNS) | |||||
| .apply(row(columns, key("1", 4), null, null, null, "Foo Bar")) | |||||
| .apply(row(columns, key("2", 4), null, null, true)) | |||||
| .apply(row(columns, key("3", 4), null, 2)) | |||||
| .apply(row(columns, key("4", 4), MOBILE)) | |||||
| .build(); | |||||
| CsvTable<RangeKey> csv = toCsv(table); | |||||
| assertCsv( | |||||
| csv, | |||||
| "Prefix ; Length ; Type ; Area Code Length ; Regions ; Comment", | |||||
| "1 ; 4 ; ; ; ; \"Foo Bar\"", | |||||
| "2 ; 4 ; ; ; \"US\"", | |||||
| "3 ; 4 ; ; 2", | |||||
| "4 ; 4 ; MOBILE"); | |||||
| assertThat(toRangeTable(csv)).isEqualTo(table); | |||||
| } | |||||
| // This is (Jan 2019) currently impossible using ImmutableTable. | |||||
| @Test | |||||
| public void testOptionalRowOrdering() throws IOException { | |||||
| CsvKeyMarshaller<Integer> unorderedIntegerMarshaller = | |||||
| new CsvKeyMarshaller<>( | |||||
| n -> IntStream.of(n).boxed().map(Object::toString), | |||||
| p -> Integer.parseInt(p.get(0)), | |||||
| Optional.empty(), | |||||
| "Unordered"); | |||||
| CsvSchema<Integer> schema = | |||||
| CsvSchema.of(unorderedIntegerMarshaller, RangesTableSchema.SCHEMA.columns()); | |||||
| CsvTable.Builder<Integer> csv = CsvTable.builder(schema); | |||||
| csv.putRow(4, ImmutableMap.of(COMMENT, "Foo Bar")); | |||||
| csv.putRow(1, ImmutableMap.of(FORMAT, "Quux")); | |||||
| csv.putRow(3, ImmutableMap.of(AREA_CODE_LENGTH, 2)); | |||||
| csv.putRow(2, ImmutableMap.of(TYPE, MOBILE)); | |||||
| assertCsv( | |||||
| csv.build(), | |||||
| "Unordered ; Type ; Area Code Length ; Format ; Comment", | |||||
| "4 ; ; ; ; \"Foo Bar\"", | |||||
| "1 ; ; ; \"Quux\"", | |||||
| "3 ; ; 2", | |||||
| "2 ; MOBILE"); | |||||
| } | |||||
| @Test | |||||
| public void testUnsafeString() { | |||||
| Column<String> unsafe = Column.ofString("unsafe"); | |||||
| CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build()); | |||||
| CsvTable<String> csv = | |||||
| CsvTable.builder(schema).put("key", unsafe, "Control chars Not \0 Allowed").build(); | |||||
| assertThrows(IllegalArgumentException.class, () -> export(csv, false)); | |||||
| } | |||||
| private enum Perverse { | |||||
| UNSAFE_VALUE() { | |||||
| @Override | |||||
| public String toString() { | |||||
| return "Unsafe ; for \n \"CSV\""; | |||||
| } | |||||
| }; | |||||
| } | |||||
| @Test | |||||
| public void testPerverseEdgeCase() { | |||||
| Column<Perverse> unsafe = Column.of(Perverse.class, "Unsafe", Perverse.UNSAFE_VALUE); | |||||
| CsvSchema<String> schema = CsvSchema.of(TEST_MARSHALLER, Schema.builder().add(unsafe).build()); | |||||
| CsvTable<String> csv = | |||||
| CsvTable.builder(schema).put("key", unsafe, Perverse.UNSAFE_VALUE).build(); | |||||
| assertThrows(IllegalArgumentException.class, () -> export(csv, false)); | |||||
| } | |||||
| private static <K> void assertCsv(CsvTable<K> csv, String... lines) throws IOException { | |||||
| String aligned = join(lines); | |||||
| // Assumes test values don't contain semi-colons where space matters. | |||||
| String unaligned = aligned.replaceAll(" *; *", ";"); | |||||
| String exported = export(csv, true); | |||||
| assertThat(exported).isEqualTo(aligned); | |||||
| assertThat(export(csv, false)).isEqualTo(unaligned); | |||||
| CsvTable<K> imported = CsvTable.importCsv(csv.getSchema(), new StringReader(exported)); | |||||
| assertThat(csv).isEqualTo(imported); | |||||
| } | |||||
| private static String export(CsvTable<?> csv, boolean align) { | |||||
| StringWriter out = new StringWriter(); | |||||
| csv.exportCsv(new PrintWriter(out), align); | |||||
| return out.toString(); | |||||
| } | |||||
| private static Change row(ImmutableList<Column<?>> columns, RangeKey key, Object... values) { | |||||
| Change.Builder row = Change.builder(key.asRangeTree()); | |||||
| checkArgument(values.length <= columns.size()); | |||||
| int n = 0; | |||||
| for (Object v : values) { | |||||
| if (v != null) { | |||||
| Column<?> c = columns.get(n); | |||||
| row.assign(c, c.cast(v)); | |||||
| } | |||||
| n++; | |||||
| } | |||||
| return row.build(); | |||||
| } | |||||
| private static String join(String... lines) { | |||||
| return String.join("\n", lines) + "\n"; | |||||
| } | |||||
| private static RangeKey key(String spec, Integer... lengths) { | |||||
| RangeSpecification prefix = | |||||
| spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec); | |||||
| return RangeKey.create(prefix, ImmutableSet.copyOf(lengths)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,132 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class RangeKeyTest { | |||||
| @Test | |||||
| public void testEmpty() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.empty()); | |||||
| assertThat(keys).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testZeroLengthMatch() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(RangeTree.from(RangeSpecification.empty())); | |||||
| assertThat(keys).containsExactly(key("", 0)); | |||||
| } | |||||
| @Test | |||||
| public void testOnlyAnyPath() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("xxx", "xxxx", "xxxxx")); | |||||
| assertThat(keys).containsExactly(key("", 3, 4, 5)); | |||||
| } | |||||
| @Test | |||||
| public void testSimple() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxx", "123xxxx", "123xxxxx")); | |||||
| assertThat(keys).containsExactly(key("123", 6, 7, 8)); | |||||
| } | |||||
| @Test | |||||
| public void testEmbeddedRanges() { | |||||
| ImmutableList<RangeKey> keys = | |||||
| RangeKey.decompose(ranges("1x", "1xx", "1xx23", "1xx23x", "1xx23xx")); | |||||
| assertThat(keys).containsExactly(key("1", 2, 3), key("1xx23", 5, 6, 7)).inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testSplitFactors() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("123xxxx", "1234x", "1234xx")); | |||||
| // If the input wasn't "factored" first, this would result in: | |||||
| // key("123[0-35-9]", 7), key("1234", 5, 6, 7) | |||||
| assertThat(keys).containsExactly(key("123", 7), key("1234", 5, 6)).inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testMergeStrategy() { | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges("12[0-4]xxx", "12xxx", "12xx")); | |||||
| // The merge strategy for factorizing the tree will prefer to keep the longer paths intact | |||||
| // and split shorter paths around it. Using the other strategy we would get: | |||||
| // key("12", 4, 5), key("12[0-4]", 6) | |||||
| assertThat(keys).containsExactly(key("12[0-4]", 4, 5, 6), key("12[5-9]", 4, 5)).inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testAsRangeSpecifications() { | |||||
| assertThat(key("", 3, 4, 5).asRangeSpecifications()) | |||||
| .containsExactly(spec("xxx"), spec("xxxx"), spec("xxxxx")).inOrder(); | |||||
| assertThat(key("1[2-4]", 3, 4, 5).asRangeSpecifications()) | |||||
| .containsExactly(spec("1[2-4]x"), spec("1[2-4]xx"), spec("1[2-4]xxx")).inOrder(); | |||||
| assertThat(key("1x[468]", 3, 5, 7).asRangeSpecifications()) | |||||
| .containsExactly(spec("1x[468]"), spec("1x[468]xx"), spec("1x[468]xxxx")).inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testSimpleRealWorldData() { | |||||
| // From ITU German numbering plan, first few fixed line ranges. | |||||
| PrefixTree prefixes = | |||||
| PrefixTree.from(ranges("20[1-389]", "204[135]", "205[1-468]", "206[4-6]", "20[89]")); | |||||
| RangeTree ranges = prefixes.retainFrom( | |||||
| ranges("xxxxxx", "xxxxxxx", "xxxxxxxx", "xxxxxxxxx", "xxxxxxxxxx", "xxxxxxxxxxx")); | |||||
| ImmutableList<RangeKey> keys = RangeKey.decompose(ranges); | |||||
| assertThat(keys).containsExactly( | |||||
| key("20[1-389]", 6, 7, 8, 9, 10, 11), | |||||
| key("204[135]", 6, 7, 8, 9, 10, 11), | |||||
| key("205[1-468]", 6, 7, 8, 9, 10, 11), | |||||
| key("206[4-6]", 6, 7, 8, 9, 10, 11)) | |||||
| .inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void testContains() { | |||||
| RangeKey key = key("1[23]", 7, 8, 9); | |||||
| assertThat(key.contains(digitSequence("12"), 8)).isTrue(); | |||||
| assertThat(key.contains(digitSequence("12"), 10)).isFalse(); | |||||
| assertThat(key.contains(digitSequence("7"), 8)).isFalse(); | |||||
| } | |||||
| private static RangeKey key(String spec, Integer... lengths) { | |||||
| RangeSpecification prefix = | |||||
| spec.isEmpty() ? RangeSpecification.empty() : RangeSpecification.parse(spec); | |||||
| return RangeKey.create(prefix, ImmutableSet.copyOf(lengths)); | |||||
| } | |||||
| private static RangeTree ranges(String... spec) { | |||||
| return RangeTree.from(Stream.of(spec).map(RangeSpecification::parse)); | |||||
| } | |||||
| private static RangeSpecification spec(String spec) { | |||||
| return RangeSpecification.parse(spec); | |||||
| } | |||||
| private static DigitSequence digitSequence(String spec) { | |||||
| return DigitSequence.of(spec); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,412 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.collect.ImmutableList.toImmutableList; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.FIXED_LINE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.MOBILE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.PREMIUM_RATE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.SHARED_COST; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.TOLL_FREE; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTableSubject.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.testing.RangeTreeSubject.assertThat; | |||||
| import static java.util.stream.IntStream.rangeClosed; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.common.collect.Table.Cell; | |||||
| import com.google.common.collect.Tables; | |||||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import java.util.Arrays; | |||||
| import java.util.Map; | |||||
| import java.util.Optional; | |||||
| import java.util.function.Function; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class RangeTableTest { | |||||
| private static final Column<ValidNumberType> TYPE = | |||||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||||
| public static final Column<Integer> AREA_CODE_LENGTH = Column.ofUnsignedInteger("AreaCodeLength"); | |||||
| private static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||||
| private static final Schema SCHEMA = | |||||
| Schema.builder().add(TYPE).add(AREA_CODE_LENGTH).add(REGIONS).build(); | |||||
| // This is essentially the most "extreme" simplification you can have. All detail is removed and | |||||
| // lengths are merged into a contiguous range. It's basically like turning a range into "\d{n,m}" | |||||
| // For example, { "123", "12345" } becomes { "xxx", "xxxx", "xxxxx" }. | |||||
| private static final Function<Change, RangeTree> EXTREME_SIMPLIFICATION = | |||||
| c -> RangeTree.from( | |||||
| rangeClosed(c.getRanges().getLengths().first(), c.getRanges().getLengths().last()) | |||||
| .mapToObj(RangeSpecification::any)); | |||||
| @Test | |||||
| public void testEmptyMap() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA).build(); | |||||
| assertThat(table).isEmpty(); | |||||
| } | |||||
| @Test | |||||
| public void testBasicAssign() { | |||||
| RangeTable.Builder table = RangeTable.builder(SCHEMA); | |||||
| table.assign(TYPE, MOBILE, ranges("1[234]xxxx"), OverwriteMode.ALWAYS); | |||||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[234]xxxx")); | |||||
| table.assign(TYPE, null, ranges("13xxxx"), OverwriteMode.ALWAYS); | |||||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("1[24]xxxx")); | |||||
| Assignment<ValidNumberType> fixedLine = Assignment.of(TYPE, FIXED_LINE); | |||||
| // Overwrite an existing range. | |||||
| table.assign(fixedLine, ranges("14xxxx"), OverwriteMode.ALWAYS); | |||||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("14xxxx")); | |||||
| // Partially overwrite an existing range (same value). | |||||
| table.assign(fixedLine, ranges("1[34]xxxx"), OverwriteMode.SAME); | |||||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[34]xxxx")); | |||||
| // Fail to overwrite range with a different value in "SAME" mode. | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> table.assign(fixedLine, ranges("1[23]xxxx"), OverwriteMode.SAME)); | |||||
| // Add new ranges (but never overwriting). | |||||
| table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER); | |||||
| assertThat(table.getRanges(TYPE, MOBILE)).isEqualTo(ranges("12xxxx")); | |||||
| assertThat(table.getRanges(TYPE, FIXED_LINE)).isEqualTo(ranges("1[3-5]xxxx")); | |||||
| // Fail to write ranges with the same value in "NEVER" mode. | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> table.assign(fixedLine, ranges("15xxxx"), OverwriteMode.NEVER)); | |||||
| // Unassignment (null value) makes no sense for modes other than "ALWAYS". | |||||
| // TODO: This highlights the way this API is bad, make a separate "unassign" method. | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> table.assign(TYPE, null, ranges("123"), OverwriteMode.SAME)); | |||||
| assertThrows(IllegalArgumentException.class, | |||||
| () -> table.assign(TYPE, null, ranges("123"), OverwriteMode.NEVER)); | |||||
| } | |||||
| @Test | |||||
| public void testApplyChanges() { | |||||
| // Changes ordered top-to-bottom. | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign( | |||||
| ranges("[18]2xxxxx"), ImmutableMap.of(TYPE, MOBILE, AREA_CODE_LENGTH, 3))) | |||||
| .apply(assign(ranges("7xxxxxx"), TYPE, MOBILE)) | |||||
| .apply(assign(ranges("[1-3]xxxxxx"), TYPE, FIXED_LINE)) | |||||
| .build(); | |||||
| // The union of all the ranges. | |||||
| assertThat(table).allRanges().containsExactly("[1-37]xxxxxx", "82xxxxx"); | |||||
| // The ranges assigned for various columns. | |||||
| assertThat(table).assigned(TYPE).containsExactly("[1-37]xxxxxx", "82xxxxx"); | |||||
| assertThat(table).assigned(AREA_CODE_LENGTH).containsExactly("[18]2xxxxx"); | |||||
| // Note that the 12xxxxx range is replaced by the fixed line in the type map. | |||||
| assertThat(table).assigned(TYPE, FIXED_LINE).containsExactly("[1-3]xxxxxx"); | |||||
| assertThat(table).assigned(TYPE, MOBILE).containsExactly("7xxxxxx", "82xxxxx"); | |||||
| // Area code length unaffected by update of the 12xxxxx range (only type was affected). | |||||
| assertThat(table).assigned(AREA_CODE_LENGTH, 3).containsExactly("[18]2xxxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testBareRangeAddition() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .add(ranges("1xxxxx")) | |||||
| .apply(assign(ranges("12xxxx"), TYPE, MOBILE)) | |||||
| .build(); | |||||
| assertThat(table).allRanges().containsExactly("1xxxxx"); | |||||
| // Note that there is not "getUnassignedRanges()" on RangeTable (yet), so we fudge it by | |||||
| // checking that there's only one column and looking at all the assigned ranges in it. | |||||
| assertThat(table).hasColumns(TYPE); | |||||
| assertThat(table).assigned(TYPE).containsExactly("12xxxx"); | |||||
| // Also check that the re-built builder remembers the unassigned ranges. | |||||
| RangeTable.Builder builder = table.toBuilder(); | |||||
| assertThat(builder.getAllRanges()).containsExactly("1xxxxx"); | |||||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("12xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testAssignAndUnassign() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("1xxxxx"), TYPE, MOBILE)) | |||||
| .apply(unassign(ranges("1[0-4]xxxx"), TYPE)) | |||||
| .build(); | |||||
| assertThat(table).allRanges().containsExactly("1xxxxx"); | |||||
| assertThat(table).hasColumns(TYPE); | |||||
| assertThat(table).assigned(TYPE).containsExactly("1[5-9]xxxx"); | |||||
| // Also check that the re-built builder remembers the unassigned ranges. | |||||
| RangeTable.Builder builder = table.toBuilder(); | |||||
| assertThat(builder.getAllRanges()).containsExactly("1xxxxx"); | |||||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[5-9]xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testAssignAndRemove() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("1xxxxx"), TYPE, MOBILE)) | |||||
| .remove(ranges("1[5-9]xxxx")) | |||||
| .build(); | |||||
| assertThat(table).allRanges().containsExactly("1[0-4]xxxx"); | |||||
| assertThat(table).hasColumns(TYPE); | |||||
| assertThat(table).assigned(TYPE).containsExactly("1[0-4]xxxx"); | |||||
| RangeTable.Builder builder = table.toBuilder(); | |||||
| assertThat(builder.getAllRanges()).containsExactly("1[0-4]xxxx"); | |||||
| assertThat(builder.getAssignedRanges(TYPE)).containsExactly("1[0-4]xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testTableImportExport() { | |||||
| RangeTable original = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE)) | |||||
| .apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3)) | |||||
| .apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2)) | |||||
| .build(); | |||||
| Table<RangeSpecification, Column<?>, Optional<?>> exported = original.toImmutableTable(); | |||||
| assertThat(exported).hasSize(8); | |||||
| assertThat(exported).containsCell(assigned("1xxxxxx", TYPE, MOBILE)); | |||||
| assertThat(exported).containsCell(assigned("1xxxxxx", AREA_CODE_LENGTH, 3)); | |||||
| assertThat(exported).containsCell(assigned("2xxxxxx", TYPE, FIXED_LINE)); | |||||
| assertThat(exported).containsCell(assigned("2xxxxxx", AREA_CODE_LENGTH, 2)); | |||||
| assertThat(exported).containsCell(assigned("3xxxxxx", TYPE, MOBILE)); | |||||
| assertThat(exported).containsCell(assigned("3xxxxxx", AREA_CODE_LENGTH, 2)); | |||||
| assertThat(exported).containsCell(assigned("4xxxxxx", TYPE, FIXED_LINE)); | |||||
| assertThat(exported).containsCell(assigned("4xxxxxx", AREA_CODE_LENGTH, 3)); | |||||
| RangeTable imported = RangeTable.from(SCHEMA, exported); | |||||
| assertThat(imported).isEqualTo(original); | |||||
| assertThat(imported.toImmutableTable()).isEqualTo(exported); | |||||
| } | |||||
| @Test | |||||
| public void testColumnGroupMapping() { | |||||
| // Changes ordered top-to-bottom. | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("1xxxxx"), ImmutableMap.of(REGION_US, true))) | |||||
| .apply(assign(ranges("2xxxxx"), ImmutableMap.of(REGION_CA, true))) | |||||
| .apply(assign(ranges("3xxxxx"), ImmutableMap.of(REGION_US, true, REGION_CA, true))) | |||||
| .build(); | |||||
| // The union of all the ranges. | |||||
| assertThat(table).allRanges().containsExactly("[1-3]xxxxx"); | |||||
| Map<PhoneRegion, Column<Boolean>> regionMap = REGIONS.extractGroupColumns(table.getColumns()); | |||||
| assertThat(regionMap.keySet()).containsExactly(PhoneRegion.of("US"), PhoneRegion.of("CA")); | |||||
| assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("US")))).containsExactly("[13]xxxxx"); | |||||
| assertThat(table.getAssignedRanges(regionMap.get(PhoneRegion.of("CA")))).containsExactly("[23]xxxxx"); | |||||
| // If a column in a group is not present, it counts as having no ranges, but if a plain column | |||||
| // is not in the schema at all, it's an error. | |||||
| assertThat(table.getAssignedRanges(REGIONS.getColumn(PhoneRegion.of("CH")))).isEmpty(); | |||||
| Column<String> bogus = Column.ofString("Bogus"); | |||||
| assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(bogus)); | |||||
| Column<String> nope = ColumnGroup.byRegion(bogus).getColumn(PhoneRegion.of("US")); | |||||
| assertThrows(IllegalArgumentException.class, () -> table.getAssignedRanges(nope)); | |||||
| } | |||||
| @Test | |||||
| public void testSubTable() { | |||||
| RangeTable original = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("[13]xxxxxx"), TYPE, MOBILE)) | |||||
| .apply(assign(ranges("[24]xxxxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("[14]xxxxxx"), AREA_CODE_LENGTH, 3)) | |||||
| .apply(assign(ranges("[23]xxxxxx"), AREA_CODE_LENGTH, 2)) | |||||
| .build(); | |||||
| // Restrict to the ranges in which area code length is 2, but keep only the type column. | |||||
| RangeTable subTable = original.subTable(original.getRanges(AREA_CODE_LENGTH, 2), TYPE); | |||||
| assertThat(subTable).hasColumns(TYPE); | |||||
| assertThat(subTable).hasRowCount(2); | |||||
| assertThat(subTable).hasRanges("2xxxxxx", FIXED_LINE); | |||||
| assertThat(subTable).hasRanges("3xxxxxx", MOBILE); | |||||
| } | |||||
| @Test | |||||
| public void testGetPrefixMap() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("1234xxxx", "1256xxxx"), TYPE, MOBILE)) | |||||
| .apply(assign(ranges("1236xxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE)) | |||||
| .apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE)) | |||||
| .build(); | |||||
| ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 0); | |||||
| assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("1234", "125"))); | |||||
| assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("1236"))); | |||||
| // The ranges 4xxxx and 49xxxx overlap (since 49 is a prefix for both) and the prefix map | |||||
| // contains the shortest unique prefix for each range. The mapping from TOLL_FREE could not | |||||
| // contain only "4[0-8]" since that would not match "49123". Overlapping range lengths with | |||||
| // different types is thus highly problematic, but the prefix map will contain mappings for | |||||
| // both, and it's up to the caller to handle this, possibly by ordering any checks made. | |||||
| assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4"))); | |||||
| assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49"))); | |||||
| } | |||||
| @Test | |||||
| public void testGetPrefixMap_minLength() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| .apply(assign(ranges("123xxxxx", "1256xxxx"), TYPE, MOBILE)) | |||||
| .apply(assign(ranges("124xxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("4xxxx"), TYPE, TOLL_FREE)) | |||||
| .apply(assign(ranges("49xxxx"), TYPE, PREMIUM_RATE)) | |||||
| .build(); | |||||
| ImmutableMap<ValidNumberType, PrefixTree> map = table.getPrefixMap(TYPE, 3); | |||||
| assertThat(map).containsEntry(MOBILE, PrefixTree.from(ranges("12[35]"))); | |||||
| assertThat(map).containsEntry(FIXED_LINE, PrefixTree.from(ranges("124"))); | |||||
| assertThat(map).containsEntry(TOLL_FREE, PrefixTree.from(ranges("4"))); | |||||
| assertThat(map).containsEntry(PREMIUM_RATE, PrefixTree.from(ranges("49"))); | |||||
| } | |||||
| @Test | |||||
| public void testSimplify_multipleColumns() { | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| // This can't be simplified since expanding any of the area code length ranges will overlap | |||||
| // (possibly with the unassigned area code length ranges). | |||||
| .apply(assign(ranges("1[0-4]x_xxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("12x_xxxx"), AREA_CODE_LENGTH, 2)) | |||||
| .apply(assign(ranges("123_xxxx"), AREA_CODE_LENGTH, 3)) | |||||
| .apply(assign(ranges("123_4xxx"), AREA_CODE_LENGTH, 4)) | |||||
| // This can be simplified since it expands into "empty" ranges. | |||||
| .apply(assign(ranges("156_xxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("156_xxxx"), AREA_CODE_LENGTH, 3)) | |||||
| .apply(assign(ranges("234_xxxx"), TYPE, MOBILE)) | |||||
| // This should be ignored since simplification happens only on the other columns. | |||||
| .apply(assign(ranges("[12]23_xxxx"), REGION_CA, true)) | |||||
| .build(); | |||||
| RangeTable simplified = | |||||
| table.simplify(c -> c.getRanges().significantDigits(2), 0, TYPE, AREA_CODE_LENGTH); | |||||
| assertThat(simplified).hasColumns(TYPE, AREA_CODE_LENGTH); | |||||
| // The 156 range got pulled back to 2 digits (the other was already 2 digits). | |||||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("1[0-4]x_xxxx", "15x_xxxx"); | |||||
| // The 234 range got pulled back to 2 digits. | |||||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("23x_xxxx"); | |||||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 2).containsExactly("12[0-24-9]_xxxx"); | |||||
| // The 123 ranges were preserved, but the 156 range was pulled back to 2 digits. | |||||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 3) | |||||
| .containsExactly("123_[0-35-9]xxx", "15x_xxxx"); | |||||
| assertThat(simplified).assigned(AREA_CODE_LENGTH, 4).containsExactly("123_4xxx"); | |||||
| } | |||||
| @Test | |||||
| public void testSimplify_chineseRanges() { | |||||
| // This mimics real data found in the CN regular expression whereby a SHARED_COST range | |||||
| // partially overlaps with the fixed line prefixes. | |||||
| RangeTable table = RangeTable.builder(SCHEMA) | |||||
| // The pattern is: | |||||
| // abc | length=10 | FIXED_LINE | |||||
| // abc100 | length=8 | FIXED_LINE | |||||
| // abc95 | length=8,9 | FIXED_LINE | |||||
| // abc96 | length=8,9 | SHARED_COST | |||||
| .apply(assign(ranges("123_xxx_xxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("123_100xx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("123_95xxx", "123_95xxxx"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("123_96xxx", "123_96xxxx"), TYPE, SHARED_COST)) | |||||
| // Just add a range that sits "either side" of what's being simplified to ensure it | |||||
| // doesn't "leak". | |||||
| .apply(assign(ranges("1[13]4_56xx_xxxx"), TYPE, MOBILE)) | |||||
| .build(); | |||||
| RangeTable simplified = table.simplify(c -> c.getRanges().significantDigits(3), 0, TYPE); | |||||
| // The simplification function just takes the first 3 significant digits. If the "shared cost" | |||||
| // ranges were not overlapping, this would result in a "fixed line" range of "123xxx..." with | |||||
| // lengths 8,9,10. However to avoid corrupting the shared cost range, we end up with: | |||||
| // abc | length=10 | FIXED_LINE | |||||
| // abc[0-8] | length=8,9 | FIXED_LINE | |||||
| // abc9[0-57-9] | length=8,9 | FIXED_LINE | |||||
| // abc96 | length=8,9 | SHARED_COST | |||||
| assertThat(simplified).hasColumns(TYPE); | |||||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly( | |||||
| "123_xxx_xxxx", | |||||
| "123_[0-8]xx_xx", | |||||
| "123_[0-8]xx_xxx", | |||||
| "123_9[0-57-9]x_xx", | |||||
| "123_9[0-57-9]x_xxx"); | |||||
| assertThat(simplified).assigned(TYPE, SHARED_COST).containsExactly( | |||||
| "123_96x_xx", | |||||
| "123_96x_xxx"); | |||||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly( | |||||
| "1[13]4_xxxx_xxxx"); | |||||
| } | |||||
| @Test | |||||
| public void testSimplify_overlappingCheck() { | |||||
| Schema shortcodeSchema = Schema.builder().add(TYPE).build(); | |||||
| RangeTable table = RangeTable.builder(shortcodeSchema) | |||||
| .apply(assign(ranges("123x"), TYPE, FIXED_LINE)) | |||||
| .apply(assign(ranges("12x", "12xxx"), TYPE, MOBILE)) | |||||
| .build(); | |||||
| // The simplification function here is good for testing edge case behaviour since it's | |||||
| // essentially the most "extreme" simplification you can have. | |||||
| RangeTable simplified = table.simplify(EXTREME_SIMPLIFICATION, 0, TYPE); | |||||
| assertThat(simplified).hasColumns(TYPE); | |||||
| assertThat(simplified).assigned(TYPE, FIXED_LINE).containsExactly("123x"); | |||||
| assertThat(simplified).assigned(TYPE, MOBILE).containsExactly("12x", "12[0-24-9]x", "12xxx"); | |||||
| } | |||||
| private static RangeTree ranges(String... rangeSpecs) { | |||||
| return RangeTree.from(Arrays.stream(rangeSpecs).map(RangeSpecification::parse)); | |||||
| } | |||||
| private static <T extends Comparable<T>> Change assign( | |||||
| RangeTree ranges, Column<T> column, T value) { | |||||
| return Change.builder(ranges).assign(column, value).build(); | |||||
| } | |||||
| private static <T extends Comparable<T>> Change unassign(RangeTree ranges, Column<T> column) { | |||||
| return Change.builder(ranges).unassign(column).build(); | |||||
| } | |||||
| private Change assign(RangeTree ranges, Map<Column<?>, ?> map) { | |||||
| return Change.of(ranges, | |||||
| map.entrySet().stream() | |||||
| .map(e -> Assignment.of(e.getKey(), e.getValue())) | |||||
| .collect(toImmutableList())); | |||||
| } | |||||
| private static Cell<RangeSpecification, Column<?>, Optional<?>> assigned( | |||||
| String range, Column<?> column, Object value) { | |||||
| return Tables.immutableCell(RangeSpecification.parse(range), column, Optional.of(value)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,71 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.table; | |||||
| import static com.google.common.truth.Truth.assertThat; | |||||
| import static com.google.common.truth.Truth8.assertThat; | |||||
| import static com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType.UNKNOWN; | |||||
| import static org.junit.Assert.assertThrows; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import java.util.stream.Stream; | |||||
| import org.junit.Test; | |||||
| import org.junit.runner.RunWith; | |||||
| import org.junit.runners.JUnit4; | |||||
| @RunWith(JUnit4.class) | |||||
| public class SchemaTest { | |||||
| private static final Column<ValidNumberType> TYPE = | |||||
| Column.of(ValidNumberType.class, "Type", UNKNOWN); | |||||
| private static final Column<String> OPERATORS = Column.ofString("Operators"); | |||||
| private static final ColumnGroup<PhoneRegion, Boolean> REGIONS = | |||||
| ColumnGroup.byRegion(Column.ofBoolean("Region")); | |||||
| private static final Column<Boolean> REGION_US = REGIONS.getColumn(PhoneRegion.of("US")); | |||||
| private static final Column<Boolean> REGION_CA = REGIONS.getColumn(PhoneRegion.of("CA")); | |||||
| private static final Column<Boolean> BOGUS = Column.ofBoolean("Bogus"); | |||||
| private static final Schema SCHEMA = | |||||
| Schema.builder().add(TYPE).add(OPERATORS).add(REGIONS).build(); | |||||
| @Test | |||||
| public void testColumnOrdering() { | |||||
| assertThat(Stream.of(OPERATORS, REGION_US, TYPE, REGION_CA).sorted(SCHEMA.ordering())) | |||||
| .containsExactly(TYPE, OPERATORS, REGION_CA, REGION_US) | |||||
| .inOrder(); | |||||
| // The names are the columns/groups (but not the names of columns in groups, such as | |||||
| // "Region:US", since those are functionally generated and aren't known by the schema. | |||||
| assertThat(SCHEMA.names()).containsExactly("Type", "Operators", "Region").inOrder(); | |||||
| } | |||||
| @Test | |||||
| public void test() { | |||||
| assertThat(SCHEMA.getColumn("Type")).isEqualTo(TYPE); | |||||
| assertThat(SCHEMA.getColumn("Region:US")).isEqualTo(REGION_US); | |||||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Region")); | |||||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.getColumn("Bogus")); | |||||
| } | |||||
| @Test | |||||
| public void testCheckColumn() { | |||||
| assertThat(SCHEMA.checkColumn(TYPE)).isEqualTo(TYPE); | |||||
| assertThat(SCHEMA.checkColumn(REGION_US)).isEqualTo(REGION_US); | |||||
| assertThrows(IllegalArgumentException.class, () -> SCHEMA.checkColumn(BOGUS)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,132 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.testing; | |||||
| import static com.google.common.base.Strings.lenientFormat; | |||||
| import static com.google.common.truth.Fact.simpleFact; | |||||
| import static com.google.common.truth.Truth.assertAbout; | |||||
| import static java.util.Arrays.asList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableTable; | |||||
| import com.google.common.truth.FailureMetadata; | |||||
| import com.google.common.truth.Subject; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import java.util.Optional; | |||||
| import javax.annotation.Nullable; | |||||
| /** A Truth subject for asserting on {@link RangeTable} instances. */ | |||||
| public class RangeTableSubject extends Subject { | |||||
| /** Returns Truth subject for asserting on a {@link RangeTable}. */ | |||||
| public static RangeTableSubject assertThat(@Nullable RangeTable table) { | |||||
| return assertAbout(RangeTableSubject.SUBJECT_FACTORY).that(table); | |||||
| } | |||||
| private static final Factory<RangeTableSubject, RangeTable> SUBJECT_FACTORY = | |||||
| RangeTableSubject::new; | |||||
| private final RangeTable actual; | |||||
| private RangeTableSubject(FailureMetadata failureMetadata, @Nullable RangeTable subject) { | |||||
| super(failureMetadata, subject); | |||||
| this.actual = subject; | |||||
| } | |||||
| // Add more methods below as needed. | |||||
| /** Asserts that the table is empty. */ | |||||
| public void isEmpty() { | |||||
| if (!actual.isEmpty()) { | |||||
| failWithActual(simpleFact("expected to be empty")); | |||||
| } | |||||
| } | |||||
| /** Asserts that the table has exactly the given columns in the given order (and no others). */ | |||||
| public void hasColumns(Column<?>... columns) { | |||||
| check("getColumns()").that(actual.getColumns()).containsExactlyElementsIn(asList(columns)); | |||||
| } | |||||
| /** Asserts that the table has the specified number of rows. */ | |||||
| public void hasRowCount(int count) { | |||||
| check("toImmutableTable().rowKeySet().size()") | |||||
| .that(actual.toImmutableTable().rowKeySet().size()) | |||||
| .isEqualTo(count); | |||||
| } | |||||
| /** | |||||
| * Asserts the specified range has the given values for each column. All columns need to be | |||||
| * specified, with {@code null} meanings "no value present". This method does not ensure that no | |||||
| * other ranges were also assigned the same values, so for complete coverage in a test it's best | |||||
| * to use this in conjunction with something like {@link #allRanges()}. | |||||
| */ | |||||
| public void hasRanges(String spec, Object... values) { | |||||
| ImmutableTable<RangeSpecification, Column<?>, Optional<?>> table = | |||||
| this.actual.toImmutableTable(); | |||||
| RangeSpecification rowKey = RangeSpecification.parse(spec); | |||||
| if (!table.rowKeySet().contains(rowKey)) { | |||||
| failWithoutActual( | |||||
| simpleFact( | |||||
| lenientFormat( | |||||
| "specified row %s does not exist in the table: rows=%s", | |||||
| rowKey, table.rowKeySet()))); | |||||
| } | |||||
| ImmutableMap<Column<?>, Optional<?>> row = table.row(rowKey); | |||||
| if (row.size() != values.length) { | |||||
| failWithoutActual( | |||||
| simpleFact( | |||||
| lenientFormat( | |||||
| "incorrect number of columns: expected %s, got %s", row.size(), values.length))); | |||||
| } | |||||
| int n = 0; | |||||
| for (Optional<?> actual : row.values()) { | |||||
| Object expected = values[n++]; | |||||
| if (actual.isPresent()) { | |||||
| if (!actual.get().equals(expected)) { | |||||
| failWithoutActual( | |||||
| simpleFact( | |||||
| lenientFormat("unexpected value in row: expected %s, got %s", expected, actual))); | |||||
| } | |||||
| } else if (expected != null) { | |||||
| failWithoutActual(simpleFact(lenientFormat("missing value in row: expected %s", expected))); | |||||
| } | |||||
| } | |||||
| } | |||||
| /** | |||||
| * Returns a {@link RangeTreeSubject} for asserting about the ranges assigned to the given value | |||||
| * in the specified column. | |||||
| */ | |||||
| public RangeTreeSubject assigned(Column<?> column, Object value) { | |||||
| return RangeTreeSubject.assertWithMessageThat( | |||||
| actual.getRanges(column, value), "%s in column %s", value, column); | |||||
| } | |||||
| /** | |||||
| * Returns a {@link RangeTreeSubject} for asserting about all ranges assigned in the specified | |||||
| * column. | |||||
| */ | |||||
| public RangeTreeSubject assigned(Column<?> column) { | |||||
| return RangeTreeSubject.assertWithMessageThat( | |||||
| actual.getAssignedRanges(column), "column %s", column); | |||||
| } | |||||
| /** Returns a {@link RangeTreeSubject} for asserting about all ranges in the table. */ | |||||
| public RangeTreeSubject allRanges() { | |||||
| return RangeTreeSubject.assertWithMessageThat(actual.getAllRanges(), "all ranges"); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,118 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.testing; | |||||
| import static com.google.common.truth.Fact.simpleFact; | |||||
| import static com.google.common.truth.Truth.assertAbout; | |||||
| import static com.google.common.truth.Truth.assertWithMessage; | |||||
| import com.google.common.collect.FluentIterable; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.truth.FailureMetadata; | |||||
| import com.google.common.truth.Subject; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.PrefixTree; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import javax.annotation.Nullable; | |||||
| /** A Truth subject for asserting on {@link RangeTree} instances. */ | |||||
| public class RangeTreeSubject extends Subject { | |||||
| public static RangeTreeSubject assertThat(@Nullable RangeTree tree) { | |||||
| return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree); | |||||
| } | |||||
| public static RangeTreeSubject assertThat(@Nullable PrefixTree tree) { | |||||
| return assertAbout(RangeTreeSubject.SUBJECT_FACTORY).that(tree.asRangeTree()); | |||||
| } | |||||
| public static RangeTreeSubject assertWithMessageThat( | |||||
| @Nullable RangeTree tree, String message, Object... args) { | |||||
| return assertWithMessage(message, args).about( | |||||
| RangeTreeSubject.SUBJECT_FACTORY).that(tree); | |||||
| } | |||||
| private static final Factory<RangeTreeSubject, RangeTree> SUBJECT_FACTORY = | |||||
| RangeTreeSubject::new; | |||||
| private final RangeTree actual; | |||||
| private RangeTreeSubject(FailureMetadata failureMetadata, @Nullable RangeTree subject) { | |||||
| super(failureMetadata, subject); | |||||
| this.actual = subject; | |||||
| } | |||||
| // Add more methods below as needed. | |||||
| public void isEmpty() { | |||||
| if (!actual.isEmpty()) { | |||||
| failWithActual(simpleFact("expected to be empty")); | |||||
| } | |||||
| } | |||||
| public void isNotEmpty() { | |||||
| if (actual.isEmpty()) { | |||||
| failWithActual(simpleFact("expected not to be empty")); | |||||
| } | |||||
| } | |||||
| public void hasSize(long size) { | |||||
| check("size()").withMessage("size").that(actual.size()).isEqualTo(size); | |||||
| } | |||||
| public void contains(String digits) { | |||||
| DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits); | |||||
| if (!actual.contains(seq)) { | |||||
| failWithActual("expected to contain ", digits); | |||||
| } | |||||
| } | |||||
| public void doesNotContain(String digits) { | |||||
| DigitSequence seq = digits.isEmpty() ? DigitSequence.empty() : DigitSequence.of(digits); | |||||
| if (actual.contains(seq)) { | |||||
| failWithActual("expected not to contain", digits); | |||||
| } | |||||
| } | |||||
| public void containsExactly(RangeSpecification spec) { | |||||
| RangeTree tree = RangeTree.from(spec); | |||||
| if (!actual.equals(tree)) { | |||||
| failWithActual("expected to be equal to", spec); | |||||
| } | |||||
| } | |||||
| public void containsExactly(Iterable<RangeSpecification> specs) { | |||||
| RangeTree tree = RangeTree.from(specs); | |||||
| if (!actual.equals(tree)) { | |||||
| failWithActual("expected to be equal to", specs); | |||||
| } | |||||
| } | |||||
| public void containsExactly(String spec) { | |||||
| containsExactly(RangeSpecification.parse(spec)); | |||||
| } | |||||
| public void containsExactly(String... specs) { | |||||
| containsExactly(FluentIterable.from(specs).transform(RangeSpecification::parse)); | |||||
| } | |||||
| public void hasLengths(Integer... lengths) { | |||||
| check("getLengths()") | |||||
| .that(actual.getLengths()) | |||||
| .containsExactlyElementsIn(ImmutableSet.copyOf(lengths)); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,477 @@ | |||||
| /* | |||||
| * Copyright (C) 2017 The Libphonenumber Authors. | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| package com.google.i18n.phonenumbers.metadata.testing; | |||||
| import static com.google.common.base.Preconditions.checkArgument; | |||||
| import static com.google.common.base.Preconditions.checkNotNull; | |||||
| import static com.google.common.base.Preconditions.checkState; | |||||
| import static com.google.common.collect.ImmutableMap.toImmutableMap; | |||||
| import static com.google.common.collect.ImmutableSet.toImmutableSet; | |||||
| import static java.lang.Boolean.TRUE; | |||||
| import static java.util.function.Function.identity; | |||||
| import com.google.common.collect.HashBasedTable; | |||||
| import com.google.common.collect.ImmutableList; | |||||
| import com.google.common.collect.ImmutableMap; | |||||
| import com.google.common.collect.ImmutableSet; | |||||
| import com.google.common.collect.Maps; | |||||
| import com.google.common.collect.Table; | |||||
| import com.google.i18n.phonenumbers.metadata.DigitSequence; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeSpecification; | |||||
| import com.google.i18n.phonenumbers.metadata.RangeTree; | |||||
| import com.google.i18n.phonenumbers.metadata.Types; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion; | |||||
| import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag; | |||||
| import com.google.i18n.phonenumbers.metadata.model.AltFormatSpec; | |||||
| import com.google.i18n.phonenumbers.metadata.model.FormatSpec; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Attributes; | |||||
| import com.google.i18n.phonenumbers.metadata.model.NumberingScheme.Comment; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtTariff; | |||||
| import com.google.i18n.phonenumbers.metadata.model.RangesTableSchema.ExtType; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.model.ShortcodesTableSchema.ShortcodeType; | |||||
| import com.google.i18n.phonenumbers.metadata.model.XmlRangesSchema; | |||||
| import com.google.i18n.phonenumbers.metadata.proto.Types.ValidNumberType; | |||||
| import com.google.i18n.phonenumbers.metadata.table.Column; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable; | |||||
| import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode; | |||||
| import java.util.ArrayList; | |||||
| import java.util.Arrays; | |||||
| import java.util.HashMap; | |||||
| import java.util.LinkedHashMap; | |||||
| import java.util.List; | |||||
| import java.util.Map; | |||||
| import java.util.Map.Entry; | |||||
| import java.util.Optional; | |||||
| import java.util.stream.Stream; | |||||
| /** | |||||
| * Reusable test-only builder for numbering schemes. More methods can be added as necessary to | |||||
| * support whatever is needed for testing. | |||||
| * | |||||
| * <p>Note that the various "modifer" classes returned by methods such as "addRanges()" are | |||||
| * designed only as fluent APIs and instances of modifiers should never be assigned to variables | |||||
| * and especially not interleaved with other mutations of the range tables. | |||||
| */ | |||||
| public final class TestNumberingScheme { | |||||
| /** | |||||
| * Returns a mutable numbering scheme builder for testing. Since an IDD is always required by | |||||
| * NumberingScheme for geographic regions, a default value of "00" is set by default. This can be | |||||
| * overridden or reset by {@code setInternationalPrefix{}} and {@code clearInternationalPrefix()}. | |||||
| */ | |||||
| public static TestNumberingScheme forCallingCode( | |||||
| String cc, PhoneRegion main, PhoneRegion... others) { | |||||
| return new TestNumberingScheme(DigitSequence.of(cc), main, ImmutableSet.copyOf(others)); | |||||
| } | |||||
| private final DigitSequence callingCode; | |||||
| private final PhoneRegion mainRegion; | |||||
| private final ImmutableSet<PhoneRegion> otherRegions; | |||||
| private final ImmutableMap<PhoneRegion, Column<Boolean>> regionMap; | |||||
| // See setNationalPrefix() / clearNationalPrefix() | |||||
| private final List<DigitSequence> nationalPrefix = new ArrayList<>(); | |||||
| // See setInternationalPrefix() / clearInternationalPrefix() | |||||
| private Optional<DigitSequence> internationalPrefix = Optional.empty(); | |||||
| // See setCarrierPrefixes() | |||||
| private RangeTree carrierPrefixes = RangeTree.empty(); | |||||
| // Uses the CSV schema (rather than XML) since that handles type/tariff better. | |||||
| private final RangeTable.Builder csvRanges = RangeTable.builder(RangesTableSchema.TABLE_COLUMNS); | |||||
| private final Map<PhoneRegion, RangeTable.Builder> shortcodes = new HashMap<>(); | |||||
| private final Map<FormatSpec, String> formats = new LinkedHashMap<>(); | |||||
| // Alternate formats are largely separate from everything else. | |||||
| private ImmutableList<AltFormatSpec> altFormats = ImmutableList.of(); | |||||
| // Explicit example numbers. | |||||
| private final Table<PhoneRegion, ValidNumberType, DigitSequence> examples = | |||||
| HashBasedTable.create(); | |||||
| private final List<Comment> comments = new ArrayList<>(); | |||||
| private TestNumberingScheme( | |||||
| DigitSequence cc, PhoneRegion main, ImmutableSet<PhoneRegion> others) { | |||||
| checkArgument(!others.contains(main), "duplicate regions"); | |||||
| this.callingCode = checkNotNull(cc); | |||||
| this.mainRegion = checkNotNull(main); | |||||
| this.otherRegions = others; | |||||
| this.regionMap = Stream.concat(Stream.of(main), others.stream()) | |||||
| .collect(toImmutableMap(identity(), RangesTableSchema.REGIONS::getColumn)); | |||||
| // Set a reasonable IDD default for geographic regions. | |||||
| if (!main.equals(PhoneRegion.getWorld())) { | |||||
| setInternationalPrefix("00"); | |||||
| } | |||||
| } | |||||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||||
| public TestNumberingScheme setNationalPrefix(String prefix) { | |||||
| checkArgument(!prefix.isEmpty(), "national prefix must not be empty"); | |||||
| this.nationalPrefix.clear(); | |||||
| this.nationalPrefix.add(DigitSequence.of(prefix)); | |||||
| return this; | |||||
| } | |||||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||||
| public TestNumberingScheme setNationalPrefixes(String... prefix) { | |||||
| List<String> prefixes = Arrays.asList(prefix); | |||||
| this.nationalPrefix.clear(); | |||||
| prefixes.forEach(p -> { | |||||
| checkArgument(!p.isEmpty(), "national prefix must not be empty"); | |||||
| this.nationalPrefix.add(DigitSequence.of(p)); | |||||
| }); | |||||
| return this; | |||||
| } | |||||
| /** Removes the national prefix */ | |||||
| public TestNumberingScheme clearNationalPrefix() { | |||||
| this.nationalPrefix.clear(); | |||||
| return this; | |||||
| } | |||||
| /** Sets the international prefix of this scheme, replacing any previous value. */ | |||||
| public TestNumberingScheme setInternationalPrefix(String prefix) { | |||||
| checkState(!mainRegion.equals(PhoneRegion.getWorld()), | |||||
| "[%s] cannot set IDD for non-geographic calling code", callingCode); | |||||
| this.internationalPrefix = Optional.of(DigitSequence.of(prefix)); | |||||
| return this; | |||||
| } | |||||
| /** Removes the international prefix */ | |||||
| public TestNumberingScheme clearInternationalPrefix() { | |||||
| this.internationalPrefix = Optional.empty(); | |||||
| return this; | |||||
| } | |||||
| /** Sets the national prefix of this scheme, replacing any previous value. */ | |||||
| public TestNumberingScheme setCarrierPrefixes(String... prefix) { | |||||
| this.carrierPrefixes = RangeTree.from(Arrays.stream(prefix).map(RangeSpecification::parse)); | |||||
| return this; | |||||
| } | |||||
| /** | |||||
| * Adds ranges (which must not already exist) to the scheme. This method returns a fluent API | |||||
| * for modifying the newly added ranges. | |||||
| */ | |||||
| public RangeModifier addRanges(ExtType type, ExtTariff tariff, String... specs) { | |||||
| return addRanges(type, tariff, rangesOf(specs)); | |||||
| } | |||||
| /** | |||||
| * Adds ranges (which must not already exist) to the scheme. This method returns a fluent API | |||||
| * for modifying the newly added ranges. | |||||
| */ | |||||
| public RangeModifier addRanges(ExtType type, ExtTariff tariff, RangeTree ranges) { | |||||
| RangeTree overlap = csvRanges.getAllRanges().intersect(ranges); | |||||
| checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap); | |||||
| csvRanges.assign(RangesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER); | |||||
| csvRanges.assign(RangesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER); | |||||
| // Setting all regions here generates "legal" numbering schemes by default. | |||||
| regionMap.values().forEach(c -> csvRanges.assign(c, true, ranges, OverwriteMode.NEVER)); | |||||
| return new RangeModifier(ranges); | |||||
| } | |||||
| /** Removes ranges (which need not already exist) from the scheme. */ | |||||
| public void removeRanges(String... specs) { | |||||
| removeRanges(rangesOf(specs)); | |||||
| } | |||||
| /** Removes ranges (which need not already exist) from the scheme. */ | |||||
| public void removeRanges(RangeTree ranges) { | |||||
| csvRanges.remove(ranges); | |||||
| } | |||||
| /** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */ | |||||
| public RangeModifier forRangesIn(String... specs) { | |||||
| return forRangesIn(rangesOf(specs)); | |||||
| } | |||||
| /** Returns a fluent API for modifying existing ranges (constrained by the given bounds). */ | |||||
| public RangeModifier forRangesIn(RangeTree ranges) { | |||||
| return new RangeModifier(ranges.intersect(csvRanges.getAllRanges())); | |||||
| } | |||||
| /** | |||||
| * Adds shortcodes (which must not already exist) to a given region in the scheme. This method | |||||
| * returns a fluent API for modifying the newly added shortcodes. | |||||
| */ | |||||
| public ShortcodeModifier addShortcodes( | |||||
| PhoneRegion region, ShortcodeType type, ExtTariff tariff, String... specs) { | |||||
| return addShortcodes(region, type, tariff, rangesOf(specs)); | |||||
| } | |||||
| /** | |||||
| * Adds shortcodes (which must not already exist) to a given region in the scheme. This method | |||||
| * returns a fluent API for modifying the newly added shortcodes. | |||||
| */ | |||||
| public ShortcodeModifier addShortcodes( | |||||
| PhoneRegion region, ShortcodeType type, ExtTariff tariff, RangeTree ranges) { | |||||
| RangeTable.Builder table = shortcodes | |||||
| .computeIfAbsent(region, r -> RangeTable.builder(ShortcodesTableSchema.SCHEMA.columns())); | |||||
| RangeTree overlap = table.getAllRanges().intersect(ranges); | |||||
| checkArgument(overlap.isEmpty(), "ranges already added: %s", overlap); | |||||
| table.assign(ShortcodesTableSchema.TYPE, checkNotNull(type), ranges, OverwriteMode.NEVER); | |||||
| table.assign(ShortcodesTableSchema.TARIFF, checkNotNull(tariff), ranges, OverwriteMode.NEVER); | |||||
| return new ShortcodeModifier(region, ranges); | |||||
| } | |||||
| /** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */ | |||||
| public ShortcodeModifier forShortcodesIn(PhoneRegion region, String... specs) { | |||||
| return forShortcodesIn(region, rangesOf(specs)); | |||||
| } | |||||
| /** Returns a fluent API for modifying existing shortcodes (constrained by the given bounds). */ | |||||
| public ShortcodeModifier forShortcodesIn(PhoneRegion region, RangeTree ranges) { | |||||
| RangeTable.Builder shortcodeTable = | |||||
| checkNotNull(shortcodes.get(region), "no shortcodes in region %s", region); | |||||
| return new ShortcodeModifier(region, ranges.intersect(shortcodeTable.getAllRanges())); | |||||
| } | |||||
| public TypeModifier forRangeTypes(PhoneRegion region, ExtType type, ExtTariff tariff) { | |||||
| return new TypeModifier(region, type, tariff); | |||||
| } | |||||
| public TestNumberingScheme setAlternateFormats(List<AltFormatSpec> altFormats) { | |||||
| this.altFormats = ImmutableList.copyOf(altFormats); | |||||
| return this; | |||||
| } | |||||
| /** Builds a valid numbering scheme from the current state of this builder. */ | |||||
| public NumberingScheme build() { | |||||
| Attributes attributes = Attributes.create( | |||||
| callingCode, | |||||
| mainRegion, | |||||
| otherRegions, | |||||
| ImmutableSet.copyOf(nationalPrefix), | |||||
| carrierPrefixes, | |||||
| // This is currently simplistic (only 1 value) and could be extended for tests if needed. | |||||
| internationalPrefix.map(Object::toString).orElse(""), | |||||
| internationalPrefix.map(p -> RangeTree.from(RangeSpecification.from(p))) | |||||
| .orElse(RangeTree.empty()), | |||||
| "", | |||||
| ImmutableSet.of()); | |||||
| RangeTable xmlTable = XmlRangesSchema.fromExternalTable(csvRanges.build()); | |||||
| ImmutableMap<PhoneRegion, RangeTable> shortcodeMap = | |||||
| shortcodes.entrySet().stream() | |||||
| .collect(toImmutableMap(Entry::getKey, e -> e.getValue().build())); | |||||
| // Some formats may have been unassigned by modifications to the test scheme. Only copy the | |||||
| // formats with keys that exist in the range tables at the time the scheme is built. | |||||
| ImmutableSet<String> assignedFormats = Stream.concat( | |||||
| xmlTable.getAssignedValues(XmlRangesSchema.FORMAT).stream(), | |||||
| shortcodeMap.values().stream() | |||||
| .flatMap(t -> t.getAssignedValues(ShortcodesTableSchema.FORMAT).stream())) | |||||
| .collect(toImmutableSet()); | |||||
| ImmutableMap<String, FormatSpec> formatMap = formats.entrySet().stream() | |||||
| .filter(e -> assignedFormats.contains(e.getValue())) | |||||
| .collect(toImmutableMap(Entry::getValue, Entry::getKey)); | |||||
| return NumberingScheme.from( | |||||
| attributes, | |||||
| xmlTable, | |||||
| Maps.transformValues(shortcodes, RangeTable.Builder::build), | |||||
| formatMap, | |||||
| altFormats, | |||||
| fillInMissingExampleNumbersFrom(xmlTable, examples), | |||||
| comments); | |||||
| } | |||||
| public TerritoryModifier forTerritory(PhoneRegion region) { | |||||
| return new TerritoryModifier(region); | |||||
| } | |||||
| /** Fluent API for modifying a set of ranges. */ | |||||
| public final class RangeModifier { | |||||
| private final RangeTree ranges; | |||||
| private RangeModifier(RangeTree ranges) { | |||||
| checkArgument(!ranges.isEmpty(), "cannot modify empty ranges"); | |||||
| this.ranges = ranges; | |||||
| } | |||||
| /** Sets the regions in which the ranges are valid. */ | |||||
| public RangeModifier setRegions(PhoneRegion... regions) { | |||||
| ImmutableSet<PhoneRegion> regionsToSet = ImmutableSet.copyOf(regions); | |||||
| checkArgument(regionMap.keySet().containsAll(regionsToSet)); | |||||
| regionMap.forEach((r, c) -> | |||||
| csvRanges.assign(c, regionsToSet.contains(r), ranges, OverwriteMode.ALWAYS)); | |||||
| return this; | |||||
| } | |||||
| /** Sets ranges to be "national only" dialing. */ | |||||
| public RangeModifier setNationalOnly(boolean nationalOnly) { | |||||
| csvRanges.assign(RangesTableSchema.NATIONAL_ONLY, nationalOnly, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| /** Sets the area code length of the ranges. */ | |||||
| public RangeModifier setAreaCodeLength(int n) { | |||||
| csvRanges.assign(RangesTableSchema.AREA_CODE_LENGTH, n, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| /** Sets the format assigned to the ranges. */ | |||||
| public RangeModifier setFormat(FormatSpec format) { | |||||
| String id = | |||||
| formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1)); | |||||
| csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| public RangeModifier setFormat(String id, FormatSpec format) { | |||||
| formats.put(format, id); | |||||
| csvRanges.assign(RangesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| /** Clears the format assigned to the ranges. */ | |||||
| public RangeModifier clearFormat() { | |||||
| csvRanges.assign(RangesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| public RangeModifier setGeocode(SimpleLanguageTag lang, String name) { | |||||
| csvRanges.assign( | |||||
| RangesTableSchema.GEOCODES.getColumn(lang), name, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| } | |||||
| /** Fluent API for modifying a set of shortcodes in a region. */ | |||||
| public final class ShortcodeModifier { | |||||
| private final PhoneRegion region; | |||||
| private final RangeTree ranges; | |||||
| private ShortcodeModifier(PhoneRegion region, RangeTree ranges) { | |||||
| checkArgument(!ranges.isEmpty(), "cannot modify empty ranges"); | |||||
| this.region = checkNotNull(region); | |||||
| this.ranges = ranges; | |||||
| } | |||||
| private RangeTable.Builder shortcode() { | |||||
| return shortcodes.get(region); | |||||
| } | |||||
| /** Sets the format assigned to the shortcodes. */ | |||||
| public ShortcodeModifier setFormat(FormatSpec format) { | |||||
| String id = | |||||
| formats.computeIfAbsent(format, f -> String.format("__fmt_%02d", formats.size() + 1)); | |||||
| shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| /** Sets the format assigned to the shortcodes. */ | |||||
| public ShortcodeModifier setFormat(String id, FormatSpec format) { | |||||
| formats.put(format, id); | |||||
| shortcode().assign(ShortcodesTableSchema.FORMAT, id, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| /** Clears the format assigned to the shortcodes. */ | |||||
| public ShortcodeModifier clearFormat() { | |||||
| shortcode().assign(ShortcodesTableSchema.FORMAT, null, ranges, OverwriteMode.ALWAYS); | |||||
| return this; | |||||
| } | |||||
| } | |||||
| /** Fluent API for modifying attributes of range types. */ | |||||
| public final class TypeModifier { | |||||
| private final PhoneRegion region; | |||||
| private final ExtType type; | |||||
| private final ExtTariff tariff; | |||||
| public TypeModifier(PhoneRegion region, ExtType type, ExtTariff tariff) { | |||||
| this.region = checkNotNull(region); | |||||
| this.type = checkNotNull(type); | |||||
| this.tariff = checkNotNull(tariff); | |||||
| checkArgument(regionMap.containsKey(region), | |||||
| "invalid test region '%s' not in: %s", region, regionMap.keySet()); | |||||
| } | |||||
| public TypeModifier setExampleNumber(String ex) { | |||||
| inferValidNumberType(type, tariff) | |||||
| .ifPresent(t -> examples.put(region, t, DigitSequence.of(ex))); | |||||
| return this; | |||||
| } | |||||
| public TypeModifier addComment(String... lines) { | |||||
| inferValidNumberType(type, tariff) | |||||
| .flatMap(Types::toXmlType) | |||||
| .ifPresent(t -> comments.add( | |||||
| Comment.create(Comment.anchor(region, t), Arrays.asList(lines)))); | |||||
| return this; | |||||
| } | |||||
| } | |||||
| /** Fluent API for modifying territory-level attributes. */ | |||||
| public final class TerritoryModifier { | |||||
| private final PhoneRegion region; | |||||
| public TerritoryModifier(PhoneRegion region) { | |||||
| this.region = checkNotNull(region); | |||||
| } | |||||
| public TerritoryModifier addComment(String... lines) { | |||||
| comments.add(Comment.create(Comment.anchor(region), Arrays.asList(lines))); | |||||
| return this; | |||||
| } | |||||
| } | |||||
| private Table<PhoneRegion, ValidNumberType, DigitSequence> fillInMissingExampleNumbersFrom( | |||||
| RangeTable xmlTable, Table<PhoneRegion, ValidNumberType, DigitSequence> examples) { | |||||
| // Take a copy since the build() method is not meant to be modifying the builder itself. | |||||
| HashBasedTable<PhoneRegion, ValidNumberType, DigitSequence> examplesCopy = | |||||
| HashBasedTable.create(examples); | |||||
| addMissingExampleNumbersFor(mainRegion, xmlTable, examplesCopy); | |||||
| otherRegions.forEach(r -> addMissingExampleNumbersFor(r, xmlTable, examplesCopy)); | |||||
| return examplesCopy; | |||||
| } | |||||
| private static void addMissingExampleNumbersFor( | |||||
| PhoneRegion region, | |||||
| RangeTable xmlTable, | |||||
| Table<PhoneRegion, ValidNumberType, DigitSequence> examples) { | |||||
| Column<Boolean> regionColumn = XmlRangesSchema.REGIONS.getColumn(region); | |||||
| RangeTable regionTable = | |||||
| xmlTable.subTable(xmlTable.getRanges(regionColumn, TRUE), XmlRangesSchema.TYPE); | |||||
| for (ValidNumberType type : regionTable.getAssignedValues(XmlRangesSchema.TYPE)) { | |||||
| if (examples.contains(region, type)) { | |||||
| continue; | |||||
| } | |||||
| RangeTree ranges = regionTable.getRanges(XmlRangesSchema.TYPE, type); | |||||
| // Assigned types must be assigned via non empty ranges (so first() cannot fail). | |||||
| examples.put(region, type, ranges.first()); | |||||
| } | |||||
| } | |||||
| private static RangeTree rangesOf(String... specs) { | |||||
| checkArgument(specs.length > 0, "must provide at least one range specifier"); | |||||
| RangeTree ranges = RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse)); | |||||
| checkArgument(!ranges.getInitial().canTerminate(), "cannot add the empty digit sequence"); | |||||
| return ranges; | |||||
| } | |||||
| private static Optional<ValidNumberType> inferValidNumberType(ExtType type, ExtTariff tariff) { | |||||
| // Tariff takes precedence over type. | |||||
| Optional<ValidNumberType> vnt = tariff.toValidNumberType(); | |||||
| if (!vnt.isPresent()) { | |||||
| vnt = type.toValidNumberType(); | |||||
| } | |||||
| return vnt; | |||||
| } | |||||
| } | |||||