Project import generated by Copybara. (#2890)

PiperOrigin-RevId: 509849832
3 years ago · f63cf2c937
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/LengthsParser.java
@ -0,0 +1,71 @@
 /*
 * Copyright (C) 2022 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata;

 import static com.google.common.base.CharMatcher.whitespace;
 import static com.google.common.base.Preconditions.checkArgument;
 import static java.lang.Integer.parseUnsignedInt;

 import com.google.common.base.CharMatcher;
 import com.google.common.base.Splitter;
 import com.google.common.collect.ContiguousSet;
 import com.google.common.collect.ImmutableSortedSet;
 import java.util.List;
 import java.util.NavigableSet;
 import java.util.TreeSet;

 /** Parses strings of form "4,7-9,11" which are used as length specifiers across LPN metadata */
 public final class LengthsParser {

  private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults(whitespace());
  private static final Splitter RANGE_SPLITTER =
      Splitter.on('-').trimResults(whitespace()).limit(2);
  private static final CharMatcher ALLOWED_CHARACTERS =
      CharMatcher.inRange('0', '9').or(CharMatcher.anyOf("-,")).or(whitespace());

  /** Returns the set of integers specified by this string. */
  public static ImmutableSortedSet<Integer> parseLengths(String s) {
    checkArgument(
        ALLOWED_CHARACTERS.matchesAllOf(s),
        "Length specifier contains forbidden characters: %s",
        s);
    NavigableSet<Integer> lengths = new TreeSet<>();
    for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
      if (lengthOrRange.contains("-")) {
        List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
        int lo = parseUnsignedInt(lohi.get(0));
        int hi = parseUnsignedInt(lohi.get(1));
        checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
        checkArgument(
            lengths.isEmpty() || lo > lengths.last(),
            "Numbers in length specifier are out of order: %s",
            s);
        lengths.addAll(ContiguousSet.closed(lo, hi));
      } else {
        int length = parseUnsignedInt(lengthOrRange);
        checkArgument(
            lengths.isEmpty() || length > lengths.last(),
            "Numbers in length specifier are out of order: %s",
            s);
        lengths.add(length);
      }
    }
    return ImmutableSortedSet.copyOf(lengths);
  }

  private LengthsParser() {}
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/Types.java
@ -71,7 +71,7 @@ public final class Types {
          .put(XML_VOIP, VOIP)
          .put(XML_UAN, UAN)
          .put(XML_VOICEMAIL, VOICEMAIL)
          .build();
          .buildOrThrow();

  /** Returns the set of valid XML type names. */
  public static ImmutableSet<String> getXmlNames() {
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcher.java
@ -0,0 +1,317 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher;

 import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode.State;

 /**
 * Matches phone number regular expressions based on compact compiled data generated by
 * {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler
 * MatcherCompiler}. Typically the phone number regular expression will be compiled at build time
 * and the resulting matcher data will be packaged into the binary which needs it, or downloaded at
 * run time.
 * <p>
 * This class is designed to be lightweight and fast, and should be simple to implement in many
 * different languages (C++, Python, JS, etc.).
 *
 * TODO: Consider UnisgnedBytes.toInt(x) to avoid lots of (x & 0xFF).
 */
 public abstract class DigitSequenceMatcher {

  /** Possible result types returned by a match operation. */
  public enum Result {
    /** The match operation was a success and the input was matched. */
    MATCHED,
    /** The match operation failed because unexpected input was encountered. */
    INVALID,
    /**
     * The match operation failed because the input terminated too soon (ie, the input was a
     * valid prefix for the matcher).
     */
    TOO_SHORT,
    /**
     * The match operation failed due to the existence of additional input after matching had
     * completed (ie, the the input would have matched if it were shorter).
     */
    TOO_LONG;
  }

  /** An iterator of {@code int}, used to supply the matcher with a sequence of input digits. */
  public interface DigitSequence {
    /** Returns true if there are more digits available. */
    boolean hasNext();

    /**
     * Return the next digit value (from 0 to 9 inclusive, not a char value). The matcher does not
     * test for invalid digits, so returning values outside this range will have undefined results,
     * including false positive results.
     */
    int next();
  }

  /** Internal abstraction to allow matching over either byte arrays or strings. */
  interface DataView {
    /** Return the unsigned byte value at the given offset from the current position. */
    int peekByte(int offset);

    /** Return the unsigned byte value at the current position and move ahead 1 byte. */
    int readByte();

    /** Return the unsigned short value at the current position and move ahead 2 bytes. */
    int readShort();

    /** Return the unsigned int value at the current position and move ahead 4 bytes. */
    int readInt();

    /** Adjust the current position by the given (non-negative) offset. */
    State branch(int offset);

    /**
     * Adjust the current position by the unsigned byte offset value read from the current
     * position plus the given index. This is used to implement maps and branching ranges.
     */
    State jumpTable(int index);
  }

  /**
   * Creates a new matcher which reads instructions directly from the given byte array. Typically
   * it is expected that this method will consume byte arrays packaged into a binary at build time
   * (the MatcherCompiler is not suitable for direct parsing of regular expressions at run time).
   * <p>
   * See {@code MatcherCompiler.compile(...)}.
   */
  public static DigitSequenceMatcher create(byte[] data) {
    if (data.length == 0) {
      throw new IllegalArgumentException("matcher data cannot be empty");
    }
    return new ByteArrayMatcher(data);
  }

  /**
   * Creates a new matcher which reads instructions from the given string. Typically it is expected
   * that this method will be used when matcher data is packaged as literal Java string constants
   * in (auto-generated) source files.
   * <p>
   * See {@code MatcherCompiler.compileToUnquotedJavaSourceString(...)}.
   */
  public static DigitSequenceMatcher create(String data) {
    if (data.isEmpty()) {
      throw new IllegalArgumentException("matcher data cannot be empty");
    }
    return new StringMatcher(data);
  }

  abstract DataView newDataView();

  abstract int size();

  /** Matches the input against this matcher, returning a result code. */
  public Result match(DigitSequence in) {
    State state = runMatcher(in);
    switch (state) {
      case TERMINAL:
        return !in.hasNext() ? Result.MATCHED : Result.TOO_LONG;
      case TRUNCATED:
        return Result.TOO_SHORT;
      case INVALID:
        return Result.INVALID;
      default:
        throw new AssertionError("unexpected state: " + state);
    }
  }

  private State runMatcher(DigitSequence in) {
    DataView data = newDataView();
    State state;
    do {
      state = OpCode.decode(data.peekByte(0)).execute(data, in);
    } while (state == State.CONTINUE);
    return state;
  }

  @Override
  public String toString() {
    int size = size();
    StringBuilder out = new StringBuilder(size + " :: [ ");
    DataView data = newDataView();
    while (size-- > 0) {
      out.append(Integer.toHexString(data.readByte())).append(", ");
    }
    out.setLength(out.length() - 2);
    out.append(" ]");
    return out.toString();
  }

  /** A matcher for reading instructions from a byte array. */
  private static final class ByteArrayMatcher extends DigitSequenceMatcher {

    private class ByteArrayData implements DataView {
      int position = 0;

      @Override public int peekByte(int offset) {
        return bytes[position + offset] & 0xFF;
      }

      @Override public int readByte() {
        return bytes[position++] & 0xFF;
      }

      @Override public int readShort() {
        return (readByte() << 8) | readByte();
      }

      @Override public int readInt() {
        return (readShort() << 16) | readShort();
      }

      @Override public State branch(int offset) {
        position += offset;
        return offset != 0 ? State.CONTINUE : State.TERMINAL;
      }

      @Override public State jumpTable(int index) {
        return branch(peekByte(index));
      }
    }

    private final byte[] bytes;

    private ByteArrayMatcher(byte[] data) {
      this.bytes = data;
    }

    @Override
    DataView newDataView() {
      return new ByteArrayData();
    }

    @Override
    int size() {
      return bytes.length;
    }
  }

  /** A matcher for reading instructions from a String. */
  private static final class StringMatcher extends DigitSequenceMatcher {

    /*
     * Note: Using unsigned shift "x >>> 1" is more likely to be free as part of a data load
     * instruction than "x / 2".
     */

    private class StringData implements DataView {
      int position = 0;

      @Override public int peekByte(int offset) {
        offset += position;
        int data = bytes.charAt(offset >>> 1);
        // char := hi [ even-byte | odd-byte  ] lo
        return (offset & 1) != 0 ? data & 0xFF : data >>> 8;
      }

      @Override public int readByte() {
        int data = bytes.charAt(position >>> 1);
        // char := hi [ even-byte | odd-byte  ] lo
        data = (position & 1) != 0 ? data & 0xFF : data >>> 8;
        position += 1;
        return data;
      }

      @Override public int readShort() {
        int data = bytes.charAt(position >>> 1);
        // Adding 2 early does not affect odd/even (but does reference next char).
        position += 2;
        if ((position & 1) != 0) {
          data = ((data & 0xFF) << 8) | (bytes.charAt(position >>> 1) >>> 8);
        }
        return data;
      }

      @Override public int readInt() {
        return (readShort() << 16) | readShort();
      }

      @Override public State branch(int offset) {
        position += offset;
        return offset != 0 ? State.CONTINUE : State.TERMINAL;
      }

      @Override public State jumpTable(int index) {
        return branch(peekByte(index));
      }
    }

    private final String bytes;

    private StringMatcher(String bytes) {
      this.bytes = bytes;
    }

    @Override
    DataView newDataView() {
      return new StringData();
    }

    @Override
    int size() {
      int size = 2 * bytes.length();
      if ((bytes.charAt(bytes.length() - 1) & 0xFF) == 0xFF) {
        size -= 1;
      }
      return size;
    }
  }

  /** An iterator of {@code int} that yields a sequence of input digits from a string. */
  private static final class StringDigits implements DigitSequence {
    private final CharSequence number;
    private int n = 0;

    private StringDigits(CharSequence number) {
      this.number = number;
    }

    @Override public int next() {
      if (n < 0 || n >= number.length()) {
        throw new IndexOutOfBoundsException(
            "index '" + n + "' out of bounds for input: " + number);
      }
      char c = number.charAt(n);
      if (c < '0' || c > '9') {
        throw new IllegalArgumentException(
            "non-digit character '" + c + "' [" + ((int) c) + "] at index " + n + " in: " + number);
      }
      n++;
      return c - '0';
    }

    @Override public boolean hasNext() {
      return n < number.length();
    }
  }

  /**
   * Returns an instance of DigitSequence based on the input string. The input string may only
   * contain digits.
   */
  public static DigitSequence digitsFromString(CharSequence number) {
    return new StringDigits(number);
  }

  /** A matcher has no internal state and is just a factory for data specific implementations. */
  private DigitSequenceMatcher() { }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/OpCode.java
@ -0,0 +1,262 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher;

 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DataView;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DigitSequence;

 /**
 * Implementation of instructions for the phone number matcher state machine.
 * <p>
 * <h3>Jump Tables</h3>
 *
 * Several instructions use a "jump table" concept which is simply a contiguous region of bytes
 * containing offsets from which a new position is calculated. The new position is the current
 * position (at the start of the jump table) plus the value of the chosen jump offset.
 *
 * <pre>{@code
 * [    ...    | JUMP_0 | JUMP_1 | ... | JUMP_N |    ...    |  DEST  |  ...
 *  position --^            ^                               ^
 *             `---index ---'                               |
 *  offset     `----------------  [ position + index ] -----'
 *
 *  position = position + unsignedByteValueAt(position + index)
 * }</pre>
 *
 * A jump offset of zero signifies that the state jumped to is terminal (this avoids having to jump
 * to a termination byte). A jump table will always occur immediately after an associated
 * instruction and the instruction's stated size includes the number of bytes in the jump table.
 */
 public enum OpCode {
  /**
   * Jumps ahead by between 1 and 4095 bytes from the end of this opcode. This opcode does not
   * consume any input.
   * <p>
   * This is a variable length instruction, taking one byte for offsets up to 15 and (if EXT is set)
   * two bytes for larger offsets up to 4095. The jump offset signifies how many bytes to skip after
   * this instruction.
   * <p>
   * As a special case, a single byte branch with a jump offset of zero (represented by a single
   * zero byte) can be used to signify that the current state is terminal and the state machine
   * should exit (a zero jump offset never makes sense in any instruction).
   *
   * <pre>{@code
   * [ 0 | 0 |  JUMP   ]
   * [ 0 | 1 |  JUMP   |  EXT_JUMP   ]
   *  <3>.<1>.<-- 4 -->.<---- 8 ---->
   * }</pre>
   */
  BRANCH(0) {
    @Override
    State execute(DataView data, DigitSequence ignored) {
      int op = data.readByte();
      int offset = op & 0xF;
      if ((op & (1 << 4)) != 0) {
        offset = (offset << 8) + data.readByte();
      }
      return data.branch(offset);
    }
  },
  /**
   * Accepts a single input (and transition to a single state). Inputs not matching "VAL" are
   * invalid from the current state. If "TRM" is set then the state being transitioned from may
   * terminate.
   *
   * <pre>{@code
   * [ 1 |TRM|  VAL  ]
   *  <3>.<1>.<- 4 ->
   * }</pre>
   */
  SINGLE(1) {
    @Override
    State execute(DataView data, DigitSequence in) {
      int op = data.readByte();
      if (!in.hasNext()) {
        return ((op & (1 << 4)) != 0) ? State.TERMINAL : State.TRUNCATED;
      }
      int n = in.next();
      return ((op & 0xF) == n) ? State.CONTINUE : State.INVALID;
    }
  },
  /**
   * Accept any input to transition to a single state one or more times.
   * <p>
   * If "TRM" is set then every state that is transitioned from may terminate.
   *
   * <pre>{@code
   * [ 2 |TRM| NUM-1 ]
   *  <3>.<1>.<- 4 ->
   * }</pre>
   */
  ANY(2) {
    @Override
    State execute(DataView data, DigitSequence in) {
      int op = data.readByte();
      int num = (op & 0xF) + 1;
      boolean isTerminating = (op & (1 << 4)) != 0;
      while (num-- > 0) {
        if (!in.hasNext()) {
          return isTerminating ? State.TERMINAL : State.TRUNCATED;
        }
        in.next();
      }
      return State.CONTINUE;
    }
  },
  /**
   * Accepts multiple inputs to transition to one or two states. The bit-set has the Nth bit set if
   * we should accept digit N (bit-0 is the lowest bit of the 2 byte form of the instruction).
   * <p>
   * This is a variable length instruction which either treats non-matched inputs as invalid
   * (2 byte form) or branches to one of two states via a 2-entry jump table (4 byte form).
   * <p>
   * If "TRM" is set then the state being transitioned from may terminate.
   *
   * <pre>{@code
   * [ 3 |TRM| 0 |---|   BIT SET  ]
   * [ 3 |TRM| 1 |---|   BIT SET  |  JUMP_IN  | JUMP_OUT  ]
   *  <3>.<1>.<1>.<1>.<--- 10 --->.<--- 8 --->.<--- 8 --->
   * }</pre>
   */
  RANGE(3) {
    @Override
    State execute(DataView data, DigitSequence in) {
      int op = data.readShort();
      if (!in.hasNext()) {
        return ((op & (1 << 12)) != 0) ? State.TERMINAL : State.TRUNCATED;
      }
      int n = in.next();
      if ((op & (1 << 11)) == 0) {
        // 2 byte form, non-matched input is invalid.
        return ((op & (1 << n)) != 0) ? State.CONTINUE : State.INVALID;
      }
      // 4 byte form uses jump table (use bitwise negation so a set bit becomes a 0 index).
      return data.jumpTable((~op >>> n) & 1);
    }
  },
  /**
   * Accept multiple inputs to transition to between one and ten states via jump offsets. Inputs
   * not encoded in "CODED MAP" are invalid from the current state.
   * <p>
   * Because there is no room for a termination bit in this instruction, there is an alternate
   * version, {@code TMAP}, which should be used when transitioning from a terminating state.
   * <p>
   * TODO: Figure out if we can save one bit here and merge MAP and TMAP.
   *
   * <pre>{@code
   * [ 4 |      CODED MAP       |  JUMP_1   |  ... |  JUMP_N   ]
   *  <3>.<-------- 29 -------->.<--- 8 --->.  ... .<--- 8 --->
   * }</pre>
   */
  MAP(4) {
    @Override
    State execute(DataView data, DigitSequence in) {
      return map(data, in, State.TRUNCATED);
    }
  },
  /**
   * Like {@code MAP} but transitions from a terminating state.
   */
  TMAP(5) {
    @Override
    State execute(DataView data, DigitSequence in) {
      return map(data, in, State.TERMINAL);
    }
  };

  /** The types of states that the state-machine can be in. */
  public enum State {
    CONTINUE, TERMINAL, INVALID, TRUNCATED;
  }

  private static final OpCode[] VALUES = values();

  /**
   * Encode maps as 29 bits where each digit takes a different number of bits to encode its offset.
   * Specifically:
   * <ul>
   * <li>The first entry (matching 0) has only two possible values (it is either not present or maps
   * to the first entry in the jump table), so takes only 1 bit.
   * <li>The second entry (matching 1) has three possible values (not present or maps to either the
   * first or second entry in the jump table), so it takes 2 bits.
   * <li>In general the entry matching digit N has (N+1) possible states and takes log2(N+1) bits.
   * </ul>
   */
  private static final long MAP_SHIFT_BITS = 0L << 0 | // 1 bit  (1x, mask=1)
      1L << 5 | 3L << 10 |                             // 2 bits (2x, mask=3)
      5L << 15 | 8L << 20 | 11L << 25 | 14L << 30 |    // 3 bits (4x, mask=7)
      17L << 35 | 21L << 40 | 25L << 45;               // 4 bits (3x, mask=F)

  /**
   * A table of values with which to mask the coded jump table map, after shifting it. Each nibble
   * is a mask of up to 4 bits to extract the encoded index from a map instruction after it has
   * been shifted.
   */
  private static final long MAP_MASK_BITS = 0xFFF7777331L;

  /**
   * Returns the number of bits we must shift the coded jump table map for a digit with value
   * {@code n} such that the jump index is in the lowest bits.
   */
  public static int getMapShift(int n) {
    return (int) (MAP_SHIFT_BITS >>> (5 * n)) & 0x1F;
  }

  /**
   * Returns a mask we must apply to the shifted jump table map to extract only the jump index from
   * the lowest bits.
   */
  public static int getMapMask(int n) {
    return (int) (MAP_MASK_BITS >>> (4 * n)) & 0xF;
  }

  /**
   * Executes a map instruction by decoding the map data and selecting a jump offset to apply.
   */
  private static State map(DataView data, DigitSequence in, State noInputState) {
    int op = data.readInt();
    if (!in.hasNext()) {
      return noInputState;
    }
    int n = in.next();
    // Coded indices are 1-to-10 (0 is the "invalid" state).
    int index = ((op >>> getMapShift(n)) & getMapMask(n));
    if (index == 0) {
      return State.INVALID;
    }
    // Jump offsets are zero based.
    return data.jumpTable(index - 1);
  }

  /**
   * Returns the opcode associated with the given unsigned byte value (the first byte of any
   * instruction).
   */
  static OpCode decode(int unsignedByte) {
    return VALUES[unsignedByte >>> 5];
  }

  private OpCode(int code) {
    // Assertion checks during enum creation. Opcodes must be 3 bits and match the ordinal of the
    // enum (this prevents issues if reordering enums occurs).
    if ((code & ~0x7) != 0 || code != ordinal()) {
      throw new AssertionError("bad opcode value: " + code);
    }
  }

  abstract State execute(DataView data, DigitSequence in);
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherBytes.java
@ -0,0 +1,247 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.io.ByteArrayDataOutput;
 import com.google.common.io.ByteStreams;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler.Sequence;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 /**
 * Renders the final bytecode representation for the matcher by connecting sequences of operations
 * together and fixing-up offsets and branch instructions. This is essentially the higher-level
 * aspect of matcher bytecode compilation.
 * <p>
 * Unlike {@link MatcherCompiler} in which a lot of the data is immutable (because sequences can
 * be defined in isolation), there's a lot of mutable state in this class due to the need to build
 * and manage offsets between the sequences, which relies on the order in which other sequences
 * have been rendered.
 */
 class MatcherBytes {
  /**
   * A partial order on byte sequences based on their size. This is not "equivalent to equals" and
   * must not be used to construct an ordered set.
   */
  private static final Comparator<SequenceBytes> DECREASING_BY_SIZE =
      new Comparator<SequenceBytes>() {
        @Override public int compare(SequenceBytes lhs, SequenceBytes rhs) {
          return Integer.compare(rhs.size(), lhs.size());
        }
      };

  /**
   * Sequences we have not considered for rendering yet.
   */
  private final List<Sequence> remainingSequences;
  /**
   * Candidate sequences whose dependent sequences have all been rendered, and which may themselves
   * now be rendered.
   */
  private final Set<Sequence> canditiateSequences = new LinkedHashSet<>();
  /**
   * Sequences which have been rendered (used to determine when other sequences become renderable).
   */
  private final Set<Sequence> compiledSequences = new HashSet<>();
  /**
   * A map from which are final nodes of a sequence to the sequence they belong to. The key set of
   * this map is a subset of all nodes.
   */
  private final Map<DfaNode, SequenceBytes> sequenceMap = new HashMap<>();
  /**
   * A list of compiled byte sequences in reverse order (ie, the sequence with the terminal node
   * in it is first in this list and the sequence with the initial node is last). Compilation
   * occurs in reverse order to allow offsets between sequences to be calculated as we go.
   */
  private final List<SequenceBytes> reverseOrder = new ArrayList<>();
  /** Statistics instance for collecting inforation about the compilation. */
  private final Statistics stats;

  MatcherBytes(Iterable<Sequence> allSequences, Statistics stats) {
    // Our set of remaining sequences just starts out as all the sequences.
    // Sequences are processed in reverse order, so reverse the sorted sequences before beginning.
    remainingSequences = Lists.reverse(Lists.newArrayList(allSequences));
    this.stats = Preconditions.checkNotNull(stats);
  }

  /**
   * Compiles all sequences into a single byte buffer suitable for use by a
   * {@code DigitSequenceMatcher}.
   */
  byte[] compile() {
    int totalSequenceCount = remainingSequences.size();
    // Sequences with not dependent sequences are compiled first.
    compileFinalSequences();
    // Determine new candidate sequences.
    while (compiledSequences.size() < totalSequenceCount) {
      // We won't always add a new candidate sequence each time around the loop, but the set
      // should never be emptied until the final sequence is processed.
      for (Iterator<Sequence> it = remainingSequences.iterator(); it.hasNext();) {
        Sequence s = it.next();
        if (compiledSequences.containsAll(s.unorderedOutSequences())) {
          canditiateSequences.add(s);
          it.remove();
        }
      }
      // Compile the next candidate sequence.
      Sequence toCompile = Iterables.get(canditiateSequences, 0);
      reverseOrder.add(compile(toCompile));
      compiledSequences.add(toCompile);
      canditiateSequences.remove(toCompile);
    }
    // We should have always exhausted the candidate sequences when we've finished rendering.
    Preconditions.checkState(remainingSequences.isEmpty());
    Preconditions.checkState(canditiateSequences.isEmpty());
    return concatSequenceBytesInForwardOrder();
  }

  /**
   * Compiles any sequences which have no dependencies and orders them by size to heuristically
   * reduce the size of branch offsets needed to reach them.
   */
  private void compileFinalSequences() {
    for (Iterator<Sequence> it = remainingSequences.iterator(); it.hasNext();) {
      Sequence s = it.next();
      if (s.isFinal()) {
        reverseOrder.add(compile(s));
        compiledSequences.add(s);
        it.remove();
      }
    }
    // They are ordered by size (shortest first) because this will tend to reduce the number of
    // 2-byte branch instructions needed to jump to them.
    Collections.sort(reverseOrder, DECREASING_BY_SIZE);
  }

  /** Compiles a sequence for which all dependent sequences have already been compiled. */
  private SequenceBytes compile(Sequence sequence) {
    // Note: Even non branching sequences will have an out node here.
    Map<DfaNode, Integer> offsetMap = new HashMap<>();
    for (DfaNode out : sequence.getOutStates()) {
      SequenceBytes targetSequence = sequenceMap.get(out);
      int offsetToStartOfSequence = 0;
      for (int n = reverseOrder.size() - 1; n >= 0 && reverseOrder.get(n) != targetSequence; n--) {
        offsetToStartOfSequence += reverseOrder.get(n).size();
      }
      if (offsetToStartOfSequence > 0 && targetSequence.isTerminator()) {
        // If we would explicitly jump to a terminator sequence, we can just exit
        // unconditionally at this point.
        offsetToStartOfSequence = Operation.TERMINATION_OFFSET;
      }
      offsetMap.put(out, offsetToStartOfSequence);
    }
    SequenceBytes compiled = new SequenceBytes(sequence, offsetMap, stats);
    sequenceMap.put(sequence.getInitialState(), compiled);
    return compiled;
  }

  /** Creates the final, single buffer of bytecode instructions for the matcher. */
  private byte[] concatSequenceBytesInForwardOrder() {
    try {
      ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
      for (int n = reverseOrder.size() - 1; n >= 0; n--) {
        outBuffer.write(reverseOrder.get(n).getBytes());
      }
      return outBuffer.toByteArray();
    } catch (IOException e) {
      throw new AssertionError("ByteArrayOutputStream cannot throw IOException");
    }
  }

  /** Renders a sequence (along with a map of branch offsets) to its bytecode form. */
  private static byte[] renderSequence(
      Sequence sequence, Map<DfaNode, Integer> offsetMap, Statistics stats) {
    // Because our operations come from a sequence, we can assert that only the last operation
    // could possibly be branching.
    List<Operation> ops = sequence.createOps();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput(baos);
    // Write all but the last operation (there are no branches to worry about).
    for (int n = 0; n < ops.size() - 1; n++) {
      ops.get(n).writeTo(outBytes, null, stats);
    }
    Operation lastOp = Iterables.getLast(ops);
    if (lastOp.isTerminating()) {
      stats.record(Statistics.Type.TERMINATING);
    }
    if (lastOp.isBranching()) {
      // A branching operation uses the offset map directly to fill in its jump table information.
      lastOp.writeTo(outBytes, offsetMap, stats);
    } else {
      // A non-branching operation does not use offsets, but we may need to add an explicit branch
      // instruction after it.
      lastOp.writeTo(outBytes, null, stats);
      if (!offsetMap.isEmpty()) {
        // When adding a branch instruction, there should only be a single offset to use.
        int offset = Iterables.getOnlyElement(offsetMap.values());
        if (offset >= 0) {
          // The offset could still be zero, but this is handled correctly by writeBranch().
          Operation.writeBranch(outBytes, offset, stats);
        } else {
          // This is a terminal instruction and the matcher should exit.
          Preconditions.checkArgument(offset == Operation.TERMINATION_OFFSET);
          Operation.writeTerminator(outBytes, stats);
        }
      }
    }
    return baos.toByteArray();
  }

  /**
   * A single compiled sequence of operations. This is just a holder for a {@link Sequence} and the
   * compiled bytes it produces.
   */
  static class SequenceBytes {
    private final Sequence sequence;
    private final byte[] bytes;

    SequenceBytes(Sequence sequence, Map<DfaNode, Integer> offsetMap, Statistics stats) {
      this.sequence = sequence;
      this.bytes = renderSequence(sequence, offsetMap, stats);
    }

    Sequence getSequence() {
      return sequence;
    }

    boolean isTerminator() {
      return sequence.isFinal() && sequence.size() == 1;
    }

    int size() {
      return bytes.length;
    }

    byte[] getBytes() {
      return bytes;
    }
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompiler.java
@ -0,0 +1,299 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.collect.ImmutableMap.toImmutableMap;
 import static com.google.common.collect.ImmutableSet.toImmutableSet;
 import static java.lang.Integer.numberOfTrailingZeros;

 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.graph.MutableValueGraph;
 import com.google.common.graph.ValueGraph;
 import com.google.common.graph.ValueGraphBuilder;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Set;
 import java.util.function.Function;

 /**
 * Compiles non-capturing phone number regular expressions into sequences of bytes suitable for
 * creating {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher
 * DigitSequenceMatcher} instances.
 */
 public final class MatcherCompiler {
  /**
   * Compiles the given {@code RangeTree} into a sequence of bytes suitable for creating a
   * {@link com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher
   * DigitSequenceMatcher}.
   */
  public static byte[] compile(RangeTree dfa) {
    return compile(dfa, Statistics.NO_OP);
  }

  /**
   * As {@link #compile(RangeTree)} but additionally accepts a {@link Statistics} instance
   * to record metrics about the compilation.
   */
  public static byte[] compile(RangeTree dfa, Statistics stats) {
    return new MatcherCompiler(dfa).compile(stats);
  }

  /** The DFA from which the matcher data is to be compiled. */
  private final ValueGraph<DfaNode, DfaEdge> dfa;
  /** The unique initial node of the DFA. */
  private final DfaNode init;
  /**
   * A map from nodes which are at the beginning of a sequence to that sequence. Not all nodes
   * will be present in the key set of this map.
   */
  private final ImmutableMap<DfaNode, Sequence> seqStart;

  /**
   * Builds a graph directly from the DFA in a RangeTree.
   *
   * <p>Rather than deal with the DFA tree directly (which is deliberately opaque as a data
   * structure) we serialize it into a more maleable ValueGraph. This allows simpler graph
   * traversal while maintaining a simple-as-possible node/edge structure. It's okay to reuse the
   * RangeTree types {@code DfaNode} and {@code DfaEdge} here because they have the expected
   * semantics (e.g. conforming to equals/hashcode etc...) but care must be taken not to keep the
   * instances around for a long time, since this will keep larger parts of the original DFA alive
   * in the garbage collector (but this is fine since only bytes are returned from this class).
   */
  private static ValueGraph<DfaNode, DfaEdge> buildGraph(RangeTree dfa) {
    Preconditions.checkArgument(!dfa.isEmpty());
    MutableValueGraph<DfaNode, DfaEdge> graph =
        ValueGraphBuilder.directed().allowsSelfLoops(false).build();
    graph.addNode(dfa.getInitial());
    DfaVisitor visitor = new DfaVisitor() {
      @Override
      public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
        boolean isFirstVisit = graph.addNode(target);
        graph.putEdgeValue(source, target, edge);
        if (isFirstVisit) {
          target.accept(this);
        }
      }
    };
    dfa.accept(visitor);
    return graph;
  }

  /**
   * Creates a {@code MatcherCompiler} from the given automaton by generating all the
   * {@code Sequence}'s of operations necessary to represent it.
   */
  MatcherCompiler(RangeTree ranges) {
    this.dfa = buildGraph(ranges);
    this.init = ranges.getInitial();
    LinkedHashMap<DfaNode, Sequence> start = new LinkedHashMap<>();
    buildSequencesFrom(init, start);
    this.seqStart = ImmutableMap.copyOf(start);
  }

  /**
   * Returns the output targets of the given node sorted according to the lowest "accepting" digit
   * on the corresponding edge. This ordering is necessary for stability, but also correctness when
   * building mapping operations. Apart from special cases (e.g. only one output) this is the only
   * method which should be used to obtain output nodes.
   */
  private ImmutableSet<DfaNode> sortedOutputs(DfaNode source) {
    Comparator<DfaNode> ordering = Comparator.comparing(
        target -> numberOfTrailingZeros(dfa.edgeValue(source, target).get().getDigitMask()));
    return dfa.successors(source).stream().sorted(ordering).collect(toImmutableSet());
  }

  /** Returns the single output target of the given node (or throws an exception). */
  private DfaNode singleOutput(DfaNode source) {
    return Iterables.getOnlyElement(dfa.successors(source));
  }

  /**
   * Builds the output map from a given node in the DFA in the correct order. Note that because
   * ImmutableSetMultimap.Builder orders keys based on the first time they are added, and we add
   * keys (nodes) in the order of the input by which they can be reached, the keys of the returned
   * map are ordered by the lowest digit in their set of values (inputs). This is necessary for
   * correct behaviour in the "Mapping" operation.
   */
  private ImmutableMap<DfaNode, Integer> getOutMap(DfaNode source) {
    Function<DfaNode, Integer> getMask =
        target -> dfa.edgeValue(source, target).get().getDigitMask();
    return sortedOutputs(source).stream().collect(toImmutableMap(Function.identity(), getMask));
  }

  /**
   * Recursively builds sequences by traversing the DFA and grouping successive sub-sequences of
   * nodes which neither branch, nor are branched to. Each such sub-sequence is represented by a
   * {@code Sequence} instance (a list of non-branching operations, optionally terminated with a
   * branching operation).
   */
  private void buildSequencesFrom(DfaNode start, LinkedHashMap<DfaNode, Sequence> map) {
    if (map.containsKey(start)) {
      return;
    }
    DfaNode current = start;
    ImmutableList.Builder<DfaNode> nodes = ImmutableList.builder();
    while (true) {
      nodes.add(current);
      if (dfa.outDegree(current) != 1) {
        break;
      }
      DfaNode next = singleOutput(current);
      if (dfa.inDegree(next) > 1) {
        break;
      }
      current = next;
    }
    Sequence seq = new Sequence(nodes.build());
    map.put(start, seq);
    // Recurse from the outputs at the end of the sequence according to their edge values.
    // IMPORTANT: We must not use "current.successors()" here since we need the order of insertion
    // to be well defined and ValueGraph does not make good enough promises about node ordering.
    for (DfaNode out : sortedOutputs(current)) {
      buildSequencesFrom(out, map);
    }
  }

  /** Creates and compiles a {@code MatcherBytes} instance to render the output bytes. */
  byte[] compile(Statistics stats) {
    return createMatcherBytes(stats).compile();
  }

  /** Creates a mutable {@code MatcherBytes} instance which will render the output bytes. */
  MatcherBytes createMatcherBytes(Statistics stats) {
    return new MatcherBytes(seqStart.values(), stats);
  }

  /**
   * A contiguous sub-sequence of nodes in the DFA which neither branch, nor are branched to.
   * <p>
   * The important property of a {@code Sequence} is that branching may only occur at the end of a
   * {@code Sequence} and branches may only jump to the start of another {@code Sequence}. This
   * makes it easier to separate the compilation of operations (inside sequences) from the
   * management of branches and offsets (between sequences).
   */
  class Sequence {
    private final ImmutableList<DfaNode> nodes;

    Sequence(ImmutableList<DfaNode> nodes) {
      checkArgument(!nodes.isEmpty());
      this.nodes = nodes;
    }

    private Operation getOp(DfaNode node) {
      return Operation.from(node.canTerminate(), getOutMap(node));
    }

    /**
     * Returns the operations representing this sequence, merging successive operations where
     * possible. The final list of operations is guaranteed to have at most one branching operation
     * which (if present) will always be the last element in the list.
     */
    List<Operation> createOps() {
      List<Operation> ops = new ArrayList<>();
      Operation current = getOp(nodes.get(0));
      for (int n = 1; n < nodes.size(); n++) {
        Operation next = getOp(nodes.get(n));
        Operation merged = current.mergeWith(next);
        if (merged != null) {
          current = merged;
        } else {
          ops.add(current);
          current = next;
        }
      }
      ops.add(current);
      return ops;
    }

    DfaNode getInitialState() {
      return Iterables.get(nodes, 0);
    }

    DfaNode getFinalState() {
      return Iterables.getLast(nodes);
    }

    Set<DfaNode> getOutStates() {
      return sortedOutputs(getFinalState());
    }

    /**
     * Not the same as "terminating" for an operation. A sequence is "final" if no other sequences
     * follow it. Normally there is only one final sequence in a normalized DFA, even if that
     * sequence contains only a single terminating node. However not all terminating nodes are
     * in final sequences.
     */
    boolean isFinal() {
      return getOutStates().isEmpty();
    }

    /** Returns the number of nodes that this sequence represents. */
    int size() {
      return nodes.size();
    }

    ImmutableSet<Sequence> unorderedOutSequences() {
      return getOutStates().stream().map(seqStart::get).collect(toImmutableSet());
    }

    @Override
    public String toString() {
      return toString(new StringBuilder(), 0).toString();
    }

    private StringBuilder toString(StringBuilder buf, int indent) {
      List<Operation> ops = createOps();
      appendIndent(buf, indent).append(
          String.format("{%s} %s", nodes.get(0), Joiner.on(" >> ").join(ops)));
      ImmutableList<DfaNode> outs = Iterables.getLast(ops).getOuts();
      if (!outs.isEmpty()) {
        buf.append(" {\n");
        for (DfaNode out : outs) {
          seqStart.get(out).toString(buf, indent + 1);
        }
        appendIndent(buf, indent).append("}\n");
      } else {
        buf.append('\n');
      }
      return buf;
    }
  }

  @Override
  public String toString() {
    return seqStart.get(init).toString();
  }

  private static StringBuilder appendIndent(StringBuilder out, int indent) {
    for (int n = 0; n < indent; n++) {
      out.append("  ");
    }
    return out;
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Operation.java
@ -0,0 +1,600 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.common.collect.ImmutableSetMultimap.flatteningToImmutableSetMultimap;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static java.lang.Integer.numberOfTrailingZeros;
 import static java.util.stream.Collectors.joining;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSetMultimap;
 import com.google.common.collect.Iterables;
 import com.google.common.io.ByteArrayDataOutput;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.Statistics.Type;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;

 /**
 * A specific instance of a number matching operation derived from a DFA. Operations are created by
 * analyzing a sequence in a DFA and knowing how to write the corresponding instruction(s) as bytes
 * (to be processed by DigitSequenceMatcher or similar).
 */
 abstract class Operation {
  /** Represents the digits which can be accepted during matching operations. */
  private enum Digit {
    // Order of enums must match the digit value itself (this is checked for in the constructor).
    ZERO(0), ONE(1), TWO(2), THREE(3), FOUR(4), FIVE(5), SIX(6), SEVEN(7), EIGHT(8), NINE(9);

    private static final Digit[] VALUES = values();

    // Iteration order is order of enum declaration (and thus also the value order).
    public static final ImmutableSet<Digit> ALL = ImmutableSet.copyOf(VALUES);

    Digit(int value) {
      // No need to store the digit value if we know it matches our ordinal value.
      Preconditions.checkArgument(value == ordinal());
    }

    /** Returns the digit corresponding to the integral value in the range {@code 0...9}. */
    public static Digit of(int n) {
      return VALUES[n];
    }

    /**
     * Returns the set of digits corresponding to a bit-mask in which bits 0 to 9 represent the
     * corresponding digits.
     */
    public static ImmutableSet<Digit> fromMask(int mask) {
      Preconditions.checkArgument(mask >= 1 && mask <= ALL_DIGITS_MASK);
      if (mask == ALL_DIGITS_MASK) {
        return ALL;
      }
      ImmutableSet.Builder<Digit> digits = ImmutableSet.builder();
      for (int n = 0; n <= 9; n++) {
        if ((mask & (1 << n)) != 0) {
          digits.add(VALUES[n]);
        }
      }
      return digits.build();
    }

    /** Returns the integer value of this digit instance. */
    public int value() {
      return ordinal();
    }
  }

  /**
   * An invalid jump offset indicating that instead of jumping to a new instruction, the state
   * machine can just terminate (used to avoid jumping directly to the termination instruction).
   */
  static final int TERMINATION_OFFSET = -1;

  /** The number of bytes required by a "long" branch instruction. */
  private static final int LONG_BRANCH_SIZE = 2;

  private final boolean isTerminating;
  private final boolean isBranching;

  private Operation(boolean isTerminating, boolean isBranching) {
    this.isTerminating = isTerminating;
    this.isBranching = isBranching;
  }

  /** Returns whether this operation can terminate the state machine when it has been reached. */
  boolean isTerminating() {
    return isTerminating;
  }

  /**
   * Returns whether this operation is branching. A branching operation has more than one output
   * node it can reach.
   */
  boolean isBranching() {
    return isBranching;
  }

  /**
   * Returns the output nodes of this operation. For branching operations the order of multiple
   * output nodes is defined by the operation itself (most operations are not branching and have
   * only one output state anyway).
   */
  abstract ImmutableList<DfaNode> getOuts();

  /** Returns the op-code for this operation, used when writing out instruction bytes. */
  abstract OpCode getOpCode();

  /** Writes this operation out as a series of instruction bytes. */
  abstract void writeImpl(
      ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats);

  void writeTo(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
    if (isTerminating()) {
      stats.record(Type.TERMINATING);
    }
    writeImpl(out, offsetMap, stats);
  }

  /**
   * Merges two adjacent operations (a poor man's compiler optimization). Useful for collapsing
   * sequences of "ANY" operations. If this instruction cannot be merged with the given "next"
   * instruction then it should return {@code null}, which is the default behavior.
   *
   * @param next the operation following this operation which we will try and merge with.
   */
  Operation mergeWith(Operation next) {
    return null;
  }

  /** Writes a branch instructions into the output byte sequence. */
  static void writeBranch(ByteArrayDataOutput out, int jump, Statistics stats) {
    Preconditions.checkArgument(jump >= 0 && jump < 0x1000, "invalid jump: " + jump);
    if (jump == 0) {
      stats.record(Type.CONTINUATION);
    } else if (jump < 16) {
      stats.record(Type.SHORT_BRANCH);
      out.writeByte((OpCode.BRANCH.ordinal() << 5) | jump);
    } else {
      stats.record(jump < 0x100 ? Type.MEDIUM_BRANCH : Type.LONG_BRANCH);
      out.writeShort((OpCode.BRANCH.ordinal() << 13) | (1 << 12) | jump);
    }
  }

  /** Writes a termination byte into the output byte sequence. */
  static void writeTerminator(ByteArrayDataOutput out, Statistics stats) {
    stats.record(Type.FINAL);
    out.writeByte(0);
  }

  /**
   * Creates a new operation to represent the output state transition given by {@code outMasks}.
   * Note that where multiple nodes exist in {@code outMasks}, their ordering must be consistent
   * with the {@code Mapping} operation (whereby nodes are ordered by the lowest bit set in the
   * corresponding mask.
   */
  static Operation from(boolean isTerminating, ImmutableMap<DfaNode, Integer> outMasks) {
    if (outMasks.isEmpty()) {
      // No out nodes; then it's a "Terminal" operation.
      Preconditions.checkState(isTerminating);
      return new Operation.Terminal();
    }
    ImmutableList<DfaNode> outStates = outMasks.keySet().asList();
    if (outStates.size() == 1) {
      DfaNode outState = Iterables.getOnlyElement(outStates);
      int digitMask = outMasks.get(outState);
      if (Integer.bitCount(digitMask) == 1) {
        // One output state reached by a single input; then it's a "Single" operation.
        return new Operation.Single(isTerminating, numberOfTrailingZeros(digitMask), outStates);
      }
      if (digitMask == ALL_DIGITS_MASK) {
        // One output state reached by any input; then it's an "Any" operation.
        return new Operation.Any(isTerminating, 1, outStates);
      }
      // One output state reached other general input; then it's a "Range" operation.
      return new Operation.Range(isTerminating, digitMask, outStates);
    }
    if (outStates.size() == 2) {
      // Test if the 2 disjoint masks cover all inputs. If so, use a shorter branch operation.
      List<Integer> masks = outMasks.values().asList();
      if ((masks.get(0) | masks.get(1)) == ALL_DIGITS_MASK) {
        // One of two output nodes reached by any input; then it's a branching "Range" operation.
        return new Operation.Range(isTerminating, masks.get(0), outStates);
      }
    }
    // Any other combination of nodes or inputs; then it's a "Mapping" operation. This code relies
    // on the ordering of entries in the output map to correspond to edge order.
    return new Operation.Mapping(isTerminating, outMasks);
  }

  /** Respresents a state with no legal outputs, which must be a terminal state in the matcher. */
  private static final class Terminal extends Operation {
    Terminal() {
      super(true, true);
    }

    @Override
    OpCode getOpCode() {
      return OpCode.BRANCH;
    }

    @Override
    ImmutableList<DfaNode> getOuts() {
      return ImmutableList.of();
    }

    @Override
    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
      writeTerminator(out, stats);
    }

    @Override
    public String toString() {
      return "TERMINAL";
    }
  }

  /**
   * Respresents a state which can be transitioned from to a single output state via a single input
   * (eg, "0" or "9").
   */
  private static final class Single extends Operation {
    private final Digit digit;
    private final ImmutableList<DfaNode> outs;

    Single(boolean isTerminating, int digit, ImmutableList<DfaNode> outs) {
      super(isTerminating, false);
      Preconditions.checkArgument(outs.size() == 1);
      this.digit = Digit.of(digit);
      this.outs = outs;
    }

    @Override
    OpCode getOpCode() {
      return OpCode.SINGLE;
    }

    @Override ImmutableList<DfaNode> getOuts() {
      return outs;
    }

    @Override
    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
      //  <--------- 1 byte --------->
      // [ OPCODE | TRM |    VALUE    ]
      out.writeByte((getOpCode().ordinal() << 5)
          | (isTerminating() ? (1 << 4) : 0)
          | digit.value());
    }

    @Override
    public String toString() {
      return format(digit.value());
    }
  }

  /**
   * Respresents a state which can be transitioned from to a single output state via any input
   * (ie, "\d"). Successive "Any" oeprations can be merged to represent a repeated sequence
   * (eg, "\d{5}").
   */
  private static final class Any extends Operation {
    private final int count;
    private final ImmutableList<DfaNode> outs;

    Any(boolean isTerminating, int count, ImmutableList<DfaNode> outs) {
      super(isTerminating, false);
      Preconditions.checkArgument(outs.size() == 1);
      Preconditions.checkArgument(count > 0);
      this.count = count;
      this.outs = outs;
    }

    @Override
    OpCode getOpCode() {
      return OpCode.ANY;
    }

    @Override ImmutableList<DfaNode> getOuts() {
      return outs;
    }

    @Override
    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> unused, Statistics stats) {
      int remainingCount = count;
      //  <--------- 1 byte --------->
      // [ OPCODE | TRM |   COUNT-1   ]
      int anyN = (getOpCode().ordinal() << 5) | (isTerminating() ? (1 << 4) : 0);
      while (remainingCount > 16) {
        out.writeByte(anyN | 15);
        remainingCount -= 16;
      }
      out.writeByte(anyN | remainingCount - 1);
    }

    @Override
    public Operation mergeWith(Operation next) {
      if (next.getOpCode() == OpCode.ANY && isTerminating() == next.isTerminating()) {
        return new Any(isTerminating(), this.count + ((Any) next).count, ((Any) next).outs);
      }
      return null;
    }

    @Override
    public String toString() {
      return format(count);
    }
  }

  /**
   * Represents a state which can be transitioned from via an arbitrary set of inputs to either
   * one or two output nodes (eg, "[23-69]" or "[0-4]X|[5-9]Y"). In the case where there are two
   * output nodes, any input must reach one of the two possible nodes (ie, there is no invalid
   * input).
   */
  private static final class Range extends Operation {
    private final ImmutableSet<Digit> digits;
    private final ImmutableList<DfaNode> outs;

    Range(boolean isTerminating, int digitMask, ImmutableList<DfaNode> outs) {
      super(isTerminating, outs.size() == 2);
      Preconditions.checkArgument(outs.size() <= 2);
      this.digits = Digit.fromMask(digitMask);
      this.outs = outs;
    }

    @Override
    OpCode getOpCode() {
      return OpCode.RANGE;
    }

    /**
     * For branching Range operations (with 2 output nodes), the order is that the state matched
     * by {@code digits} is the first state and the state reached by any other input is second.
     */
    @Override ImmutableList<DfaNode> getOuts() {
      return outs;
    }

    @Override
    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
      //  <-------------- 2 bytes --------------> <-------- 2 bytes --------->
      // [ OPCODE | TRM |  0  |     BIT SET      ]
      // [ OPCODE | TRM |  1  |     BIT SET      |   JUMP_IN   |   JUMP_OUT   ]
      out.writeShort((getOpCode().ordinal() << 13)
          | (isTerminating() ? (1 << 12) : 0)
          | (isBranching() ? (1 << 11) : 0)
          | asBitMask(digits));
      if (isBranching()) {
        writeJumpTable(out, ImmutableList.of(
            offsetMap.get(outs.get(0)), offsetMap.get(outs.get(1))), stats);
      }
    }

    @Override
    public String toString() {
      return format(asRangeString(digits));
    }
  }

  /**
   * Represents a state in the matcher which can be transitioned from via an arbitrary set of
   * inputs, to an arbitrary set of nodes. This is the most general form of operation and (apart
   * from branches) provides the only truly necessary instruction in the matcher; everything else
   * is just some specialization of this operation.
   */
  private static final class Mapping extends Operation {
    private final ImmutableSetMultimap<DfaNode, Digit> nodeMap;

    Mapping(boolean isTerminating, ImmutableMap<DfaNode, Integer> outMasks) {
      super(isTerminating, true);
      this.nodeMap = outMasks.entrySet().stream()
          .collect(flatteningToImmutableSetMultimap(
              Entry::getKey, e -> Digit.fromMask(e.getValue()).stream()));
    }

    @Override
    OpCode getOpCode() {
      return isTerminating() ? OpCode.TMAP : OpCode.MAP;
    }

    /**
     * For Mapping operations, output node order is defined by the lowest digit by which that
     * node can be reached. For example, if a map operation can reach three nodes {@code A},
     * {@code B} and {@code C} via inputs in the ranges {@code [1-38]}, {@code [4-6]} and
     * {@code [09]} respectively, then they will be ordered {@code (C, A, B)}.
     */
    @Override ImmutableList<DfaNode> getOuts() {
      return nodeMap.keySet().asList();
    }

    @Override
    void writeImpl(ByteArrayDataOutput out, Map<DfaNode, Integer> offsetMap, Statistics stats) {
      //  <------------ 4 bytes ------------> <-- 1 byte per offset --->
      // [ OPCODE |        CODED MAP         |  JUMP_1  | ... | JUMP_N  ]
      out.writeInt((getOpCode().ordinal() << 29) | asCodedMap(nodeMap));
      ImmutableList<Integer> offsets =
          getOuts().stream().map(offsetMap::get).collect(toImmutableList());
      writeJumpTable(out, offsets, stats);
    }

    @Override
    public String toString() {
      return format(nodeMap.asMap().values().stream()
          .map(Operation::asRangeString).collect(joining(", ")));
    }
  }

  String format(Object extra) {
    return String.format("%s%s : %s", getOpCode(), isTerminating() ? "*" : "", extra);
  }

  /**
   * Returns an integer with the lowest 10 bits set in accordance with the digits in the given set.
   */
  private static int asBitMask(ImmutableSet<Digit> digits) {
    int bitMask = 0;
    for (Digit digit : digits) {
      bitMask |= (1 << digit.value());
    }
    return bitMask;
  }

  /**
   * Returns a integer with the lowest 29 bits set to encode an arbitrary mapping from input digit
   * to an output index. The 29 bits are partitioned such that lower inputs require fewer bits to
   * encode (output indices are assigned as they are encountered, starting at the first input).
   * Each digit can then be quickly mapped to either its 1-indexed output node, or 0 if the input
   * was invalid.
   */
  private static int asCodedMap(ImmutableSetMultimap<DfaNode, Digit> nodeMap) {
    int codedMap = 0;
    List<DfaNode> outs = nodeMap.keySet().asList();
    for (int n = 0; n < outs.size(); n++) {
      for (Digit digit : nodeMap.get(outs.get(n))) {
        // Coded indices are 1-to-10 (0 is the "invalid" node).
        codedMap |= ((n + 1) << OpCode.getMapShift(digit.value()));
      }
    }
    return codedMap;
  }

  /**
   * Writes a sequence of offsets representing a unsigned byte-based jump table after either a
   * Mapping or Range instruction. This accounts correctly for the need to introduce a new
   * "trampoline" branch instruction after the jump table (when the desired offset is too large
   * to fit in a single unsigned byte).
   * <p>
   * Offsets are either:
   * <ul>
   * <li>The number of bytes to jump from the end of the current {@code Sequence} bytes to the
   *     start of the destination {@code Sequence} bytes.
   * <li>{@code -1} to indicate that a terminal node has been reached.
   * </ul>
   * <p>
   * Note that the offset written into the jump table itself must be relative to the beginning of
   * the jump table and so must be adjusted by the number of bytes in the jump table and any other
   * branch instructions that follow it. This it probably the most awkward logic in the entire
   * compiler.
   */
  static void writeJumpTable(ByteArrayDataOutput out, List<Integer> offsets,
      Statistics stats) {
    int jumpTableSize = offsets.size();
    boolean needsExtraBranches = false;
    for (int n = 0; n < jumpTableSize && !needsExtraBranches; n++) {
      // Check whether the adjusted offset (ie, the one we would write) will fit in a byte.
      // It's no issue to have offsets of -1 as it can never trigger "needsExtraBranches".
      needsExtraBranches = (offsets.get(n) + jumpTableSize >= 0x100);
    }
    if (needsExtraBranches) {
      // We only get here if at least one offset (after adjustment by the original jump table size)
      // would not fit into a byte. Now we must calculate exactly how many extra branches we are
      // going to need. For this we must assume the worst case adjustment of "3 x jumpTableSize"
      // which is 1 byte for the jump table offset and 2 bytes for the extra branch for every entry.
      // This is pessimistic because there will now be cases where we write a trampoline jump for
      // an offset that could have fitted had we not assumed that we might need the extra space for
      // the branch. However these cases are rare enough that we choose to ignore them.
      int maxOffsetAdjust = ((1 + LONG_BRANCH_SIZE) * jumpTableSize);
      int extraBranchCount = 0;
      for (int n = 0; n < jumpTableSize; n++) {
        if (offsets.get(n) + maxOffsetAdjust >= 0x100) {
          extraBranchCount += 1;
        }
      }
      // Now we know a reasonable upper bound for how many extra branches are needed, use this to
      // adjust the actual offsets and write them. When a "trampoline" branch instruction is needed
      // we split the offset so the jump table jumps to the branch instruction and that jumps the
      // rest. Branch instructions are positioned, in order, immediately after the jump table.
      List<Integer> extraBranchOffsets = new ArrayList<>();
      int totalOffsetAdjust = jumpTableSize + (LONG_BRANCH_SIZE * extraBranchCount);
      for (int n = 0; n < jumpTableSize; n++) {
        int offset = offsets.get(n);
        if (offset >= 0) {
          int worstCaseOffset = offset + maxOffsetAdjust;
          // Get the actual total offset we want to jump by.
          offset += totalOffsetAdjust;
          // Use the worst case offset here so we repeat exactly the same decision as the loop
          // above (otherwise we might add fewer branches which would screw up our offsets).
          if (worstCaseOffset >= 0x100) {
            // Split the original offset, recording the jump to the trampoline branch as well as
            // the branch offset itself. Note that the offset adjustment changes as more trampoline
            // branches are encountered (but the overall offset jumped remains the same).
            int extraBranchIndex = extraBranchOffsets.size();
            // This offset will always be small (max jump table is 10 entries, so offset to the
            // last possible branch will be at most 28 bytes).
            int branchInstructionOffset = jumpTableSize + (LONG_BRANCH_SIZE * extraBranchIndex);
            // Subtract one additional branch instruction here because when we trampoline jump, we
            // jump to the start of the branch instruction, but jump away from the end of it.
            extraBranchOffsets.add((offset - branchInstructionOffset) - LONG_BRANCH_SIZE);
            offset = branchInstructionOffset;
          }
          // Write the total offset (offset must be < 0x100 here as worstCaseOffset was < 0x100).
          Preconditions.checkState(offset < 0x100, "jump too long: %s", offset);
          out.writeByte(offset);
        } else {
          // If the destination of this jump would just be a termination instruction, just write
          // the termination byte here directly (no point jumping to the termination byte).
          Preconditions.checkArgument(offset == TERMINATION_OFFSET, "bad offset: %s", offset);
          writeTerminator(out, stats);
        }
      }
      // Write out the trampoline jumps in the order they were found.
      for (int offset : extraBranchOffsets) {
        stats.record(Type.DOUBLE_JUMP);
        Operation.writeBranch(out, offset, stats);
      }
    } else {
      // In the simple case, there are no extra branches, so we just write the offsets we have.
      // This has the same effect as running the code above with (extraBranchCount == 0) but can be
      // reached more optimistically because we don't need to account for the worst case offset
      // adjustment when deciding if it's safe to just use the offsets we were given. It's a form
      // of hysteresis between the no-branch and extra-branch cases.
      for (int n = 0; n < jumpTableSize; n++) {
        int offset = offsets.get(n);
        if (offset >= 0) {
          offset += jumpTableSize;
          Preconditions.checkState(offset < 0x100, "jump too long: " + offset);
          out.writeByte(offset);
        } else {
          writeTerminator(out, stats);
        }
      }
    }
  }

  // Helper function for asRanges() to print a single range (eg, "[014-7]").
  private static String asRangeString(Collection<Digit> digits) {
    StringBuilder out = new StringBuilder();
    out.append("[");
    Digit lhs = null;
    Digit rhs = null;
    for (Digit digit : digits) {
      if (lhs != null) {
        if (digit.value() == rhs.value() + 1) {
          rhs = digit;
          continue;
        }
        if (rhs != lhs) {
          if (rhs.value() > lhs.value() + 1) {
            out.append("-");
          }
          out.append(rhs.value());
        }
      }
      lhs = digit;
      rhs = digit;
      out.append(lhs.value());
    }
    if (rhs != lhs) {
      if (rhs.value() > lhs.value() + 1) {
        out.append("-");
      }
      out.append(rhs.value());
    }
    out.append("]");
    return out.toString();
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/Statistics.java
@ -0,0 +1,44 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;


 /**
 * A simple class for capturing statistics produced during regular expression compilation. This can
 * be used to quantify how proposed changes to the byte-code definition will affect the size of any
 * compiled matcher bytes.
 */
 public interface Statistics {

  public static final Statistics NO_OP = new Statistics() {
    @Override public void record(Type type) { }
  };

  /** The type of things we are counting. */
  public enum Type {
    SHORT_BRANCH,
    MEDIUM_BRANCH,
    LONG_BRANCH,
    DOUBLE_JUMP,
    CONTINUATION,
    TERMINATING,
    FINAL;
  }

  /** Records an operation of the specified type during bytecode compilation. */
  void record(Type type);
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/ExamplesTableSchema.java
@ -96,7 +96,7 @@ public final class ExamplesTableSchema {
    for (Cell<PhoneRegion, ValidNumberType, DigitSequence> c : table.cellSet()) {
      out.put(ExampleNumberKey.of(c.getRowKey(), c.getColumnKey()), NUMBER, c.getValue());
    }
    return CsvTable.from(SCHEMA, out.build());
    return CsvTable.from(SCHEMA, out.buildOrThrow());
  }

  /**
@ -110,7 +110,7 @@ public final class ExamplesTableSchema {
    for (ExampleNumberKey k : csv.getKeys()) {
      out.put(k.getRegion(), k.getType(), csv.getOrDefault(k, NUMBER));
    }
    return out.build();
    return out.buildOrThrow();
  }

  private static Stream<String> write(ExampleNumberKey key) {
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FileBasedCsvLoader.java
@ -25,7 +25,7 @@ import java.nio.file.Path;

 /**
 * A CSV provider which reads files rooted in a given directory. The file layout should match that
 * in the CSV metadata directory ({@code googledata/third_party/i18n/phonenumbers/metadata}).
 * in the CSV metadata directory ({@code third_party/libphonenumber_metadata/metadata}).
 */
 public final class FileBasedCsvLoader implements CsvDataProvider {
  /** Returns a CSV loader which reads files from the given base directory. */
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/FormatsTableSchema.java
@ -81,7 +81,7 @@ public final class FormatsTableSchema {
              formats.getOrDefault(id, NATIONAL_PREFIX_OPTIONAL),
              toComment(formats.getOrDefault(id, COMMENT))));
    }
    return specs.build();
    return specs.buildOrThrow();
  }

  private static Optional<String> toOptional(String s) {
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/model/RangesTableSchema.java
@ -23,13 +23,13 @@ import static java.util.Comparator.comparing;
 import static java.util.function.Function.identity;
 import static java.util.stream.Collectors.joining;

 import com.google.common.base.Splitter;
 import com.google.common.collect.ContiguousSet;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableRangeSet;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSortedSet;
 import com.google.common.collect.Range;
 import com.google.i18n.phonenumbers.metadata.LengthsParser;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.i18n.PhoneRegion;
 import com.google.i18n.phonenumbers.metadata.i18n.SimpleLanguageTag;
@ -49,18 +49,18 @@ import com.google.i18n.phonenumbers.metadata.table.RangeTable.OverwriteMode;
 import com.google.i18n.phonenumbers.metadata.table.Schema;
 import java.time.ZoneId;
 import java.util.List;
 import java.util.NavigableSet;
 import java.util.Optional;
 import java.util.TreeSet;
 import java.util.stream.Stream;

 /**
 * The schema of the standard "Ranges" table with rows keyed by {@link RangeKey} and columns:
 *
 * <ol>
 *   <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not
 *       the same a XmlNumberType or ValidNumberType). All ranges should be assigned a type.
 *   <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF
 *       can yield the internal ValidNumberType). All ranges should be assigned a tariff.
 *   <li>{@link #TYPE}: The semantic type of numbers in a range (note that this is not the same a
 *       XmlNumberType or ValidNumberType). All ranges should be assigned a type.
 *   <li>{@link #TARIFF}: The expected cost of numbers in a range (combining TYPE and TARIFF can
 *       yield the internal ValidNumberType). All ranges should be assigned a tariff.
 *   <li>{@link #AREA_CODE_LENGTH}: The length of an optional prefix which may be removed from
 *       numbers in a range for local dialling. Local only lengths are derived using this column.
 *   <li>{@link #NATIONAL_ONLY}: True if numbers in a range cannot be dialled from outside its
@ -72,8 +72,8 @@ import java.util.stream.Stream;
 *       applied).
 *   <li>{@link #TIMEZONE}: The timezone names for a range (or empty to imply the default
 *       timezones). Multiple timezones can be specific if separated by {@code '&'}.
 *   <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are
 *       set {@code true} that range is valid within the region {@code XX}.
 *   <li>{@link #REGIONS}: A group of boolean columns in the form "Region:XX", where ranges are set
 *       {@code true} that range is valid within the region {@code XX}.
 *   <li>{@link #GEOCODES}: A group of String columns in the form "Geocode:XXX" containing the
 *       geocode string for a range, where {@code XXX} is the language code of the string.
 *   <li>{@link #PROVENANCE}: Indicates the most important reason for a range to be valid.
@ -81,6 +81,7 @@ import java.util.stream.Stream;
 * </ol>
 *
 * <p>Rows keys are serialized via the marshaller and produce leading columns:
 *
 * <ol>
 *   <li>{@code Prefix}: The prefix (RangeSpecification) for the ranges in a row (e.g. "12[3-6]").
 *   <li>{@code Length}: A set of lengths for the ranges in a row (e.g. "9", "8,9" or "5,7-9").
@ -88,16 +89,16 @@ import java.util.stream.Stream;
 */
 public final class RangesTableSchema {
  /**
   * External number type enum. This is technically much better than ValidNumberType since it
   * splits type and cost properly. Unfortunately the internal logic of the phonenumber library
   * doesn't really cope with this, which is why we convert to {@code XmlRangesSchema} before
   * creating legacy data structures.
   * External number type enum. This is technically much better than ValidNumberType since it splits
   * type and cost properly. Unfortunately the internal logic of the phonenumber library doesn't
   * really cope with this, which is why we convert to {@code XmlRangesSchema} before creating
   * legacy data structures.
   *
   * <p>This enum can be modified as new types are requested from data providers, providing the
   * type mapping to ValidNumberType is updated appropriately. Note that until it's clear that
   * mapping types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we
   * should be very careful about using the additional types. Additional types need to be removed
   * before the generated table can be turned into a {@link NumberingScheme}.
   * <p>This enum can be modified as new types are requested from data providers, providing the type
   * mapping to ValidNumberType is updated appropriately. Note that until it's clear that mapping
   * types such as {@link #M2M} to {@link ValidNumberType#UNKNOWN} will work okay, we should be very
   * careful about using the additional types. Additional types need to be removed before the
   * generated table can be turned into a {@link NumberingScheme}.
   */
  public enum ExtType {
    /** Default value not permitted in real data. */
@ -125,14 +126,14 @@ public final class RangesTableSchema {

    private static final ImmutableMap<ExtType, ValidNumberType> TYPE_MAP =
        Stream.of(
            ExtType.FIXED_LINE,
            ExtType.MOBILE,
            ExtType.FIXED_LINE_OR_MOBILE,
            ExtType.PAGER,
            ExtType.PERSONAL_NUMBER,
            ExtType.UAN,
            ExtType.VOICEMAIL,
            ExtType.VOIP)
                ExtType.FIXED_LINE,
                ExtType.MOBILE,
                ExtType.FIXED_LINE_OR_MOBILE,
                ExtType.PAGER,
                ExtType.PERSONAL_NUMBER,
                ExtType.UAN,
                ExtType.VOICEMAIL,
                ExtType.VOIP)
            .collect(toImmutableMap(identity(), v -> ValidNumberType.valueOf(v.name())));

    public Optional<ValidNumberType> toValidNumberType() {
@ -185,9 +186,9 @@ public final class RangesTableSchema {
      Column.of(ExtTariff.class, "Tariff", ExtTariff.STANDARD_RATE);

  /**
   * The "Area Code Length" column in the range table, denoting the length of a prefix which can
   * be removed from all numbers in a range to obtain locally diallable numbers. If an
   * "area code" is not optional for dialling, then no value should be set here.
   * The "Area Code Length" column in the range table, denoting the length of a prefix which can be
   * removed from all numbers in a range to obtain locally diallable numbers. If an "area code" is
   * not optional for dialling, then no value should be set here.
   */
  public static final Column<Integer> AREA_CODE_LENGTH =
      Column.ofUnsignedInteger("Area Code Length");
@ -226,12 +227,13 @@ public final class RangesTableSchema {
  public static final Column<String> COMMENT = Column.ofString("Comment");

  /** Marshaller for constructing CsvTable from RangeTable. */
  private static final CsvKeyMarshaller<RangeKey> MARSHALLER = new CsvKeyMarshaller<>(
      RangesTableSchema::write,
      RangesTableSchema::read,
      Optional.of(RangeKey.ORDERING),
      "Prefix",
      "Length");
  private static final CsvKeyMarshaller<RangeKey> MARSHALLER =
      new CsvKeyMarshaller<>(
          RangesTableSchema::write,
          RangesTableSchema::read,
          Optional.of(RangeKey.ORDERING),
          "Prefix",
          "Length");

  /** The non-key columns of a range table. */
  public static final Schema TABLE_COLUMNS =
@ -251,10 +253,10 @@ public final class RangesTableSchema {
          .build();

  /**
   * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced
   * by the CSV regions multi-value. This allows region codes to be serialize in a single column
   * (which is far nicer when looking at data in a spreadsheet). In the range table, this is
   * normalized into the boolean column group (because that's far nicer to work with).
   * The columns for the serialized CSV table. Note that the "REGIONS" column group is replaced by
   * the CSV regions multi-value. This allows region codes to be serialize in a single column (which
   * is far nicer when looking at data in a spreadsheet). In the range table, this is normalized
   * into the boolean column group (because that's far nicer to work with).
   */
  private static final Schema CSV_COLUMNS =
      Schema.builder()
@ -289,17 +291,21 @@ public final class RangesTableSchema {
    for (Change c : table.toChanges()) {
      for (RangeKey k : RangeKey.decompose(c.getRanges())) {
        regions.clear();
        c.getAssignments().forEach(a -> {
          // We special case the regions column, converting a group of boolean columns into a
          // multi-value of region codes. If the column is in the group, it must hold Booleans.
          if (regionColumns.contains(a.column())) {
            if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) {
              regions.add(REGIONS.getKey(a.column()));
            }
          } else {
            csv.put(k, a);
          }
        });
        c.getAssignments()
            .forEach(
                a -> {
                  // We special case the regions column, converting a group of boolean columns into
                  // a
                  // multi-value of region codes. If the column is in the group, it must hold
                  // Booleans.
                  if (regionColumns.contains(a.column())) {
                    if (a.value().map(((Column<Boolean>) a.column())::cast).orElse(Boolean.FALSE)) {
                      regions.add(REGIONS.getKey(a.column()));
                    }
                  } else {
                    csv.put(k, a);
                  }
                });
        // We can do this out-of-sequence because the table will order its columns.
        if (!regions.isEmpty()) {
          csv.put(k, CSV_REGIONS, Regions.of(regions));
@ -311,22 +317,28 @@ public final class RangesTableSchema {

  /**
   * Converts a {@link RangeKey} based {@link CsvTable} to a {@link RangeTable}, preserving the
   * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be
   * the {@link #SCHEMA} instance if the given table had different columns.
   * original table columns. The {@link CsvSchema} of the returned table is not guaranteed to be the
   * {@link #SCHEMA} instance if the given table had different columns.
   */
  public static RangeTable toRangeTable(CsvTable<RangeKey> csv) {
    RangeTable.Builder out = RangeTable.builder(TABLE_COLUMNS);
    for (RangeKey k : csv.getKeys()) {
      Change.Builder change = Change.builder(k.asRangeTree());
      csv.getRow(k).forEach((c, v) -> {
        // We special case the regions column, converting a comma separated list of region codes
        // into a series of boolean column assignments.
        if (c.equals(CSV_REGIONS)) {
          CSV_REGIONS.cast(v).getValues().forEach(r -> change.assign(REGIONS.getColumn(r), true));
        } else {
          change.assign(c, v);
        }
      });
      csv.getRow(k)
          .forEach(
              (c, v) -> {
                // We special case the regions column, converting a comma separated list of region
                // codes
                // into a series of boolean column assignments.
                if (c.equals(CSV_REGIONS)) {
                  CSV_REGIONS
                      .cast(v)
                      .getValues()
                      .forEach(r -> change.assign(REGIONS.getColumn(r), true));
                } else {
                  change.assign(c, v);
                }
              });
      out.apply(change.build(), OverwriteMode.NEVER);
    }
    return out.build();
@ -339,7 +351,8 @@ public final class RangesTableSchema {

  // Shared by ShortcodeTableSchema
  public static RangeKey read(List<String> parts) {
    return RangeKey.create(RangeSpecification.parse(parts.get(0)), parseLengths(parts.get(1)));
    return RangeKey.create(
        RangeSpecification.parse(parts.get(0)), LengthsParser.parseLengths(parts.get(1)));
  }

  private static String formatLength(ImmutableSortedSet<Integer> lengthSet) {
@ -364,33 +377,5 @@ public final class RangesTableSchema {
    }
  }

  private static final Splitter COMMA_SPLITTER = Splitter.on(',').trimResults();
  private static final Splitter RANGE_SPLITTER = Splitter.on('-').trimResults().limit(2);

  private static NavigableSet<Integer> parseLengths(String s) {
    NavigableSet<Integer> lengths = new TreeSet<>();
    for (String lengthOrRange : COMMA_SPLITTER.split(s)) {
      if (lengthOrRange.contains("-")) {
        List<String> lohi = RANGE_SPLITTER.splitToList(lengthOrRange);
        int lo = parseInt(lohi.get(0));
        int hi = parseInt(lohi.get(1));
        checkArgument(lo < hi, "Invalid range: %s-%s", lo, hi);
        checkArgument(lengths.isEmpty() || lo > lengths.last(), "Overlapping ranges: %s", s);
        lengths.addAll(ContiguousSet.closed(lo, hi));
      } else {
        int length = parseInt(lengthOrRange);
        checkArgument(lengths.isEmpty() || length > lengths.last(), "Overlapping ranges: %s", s);
        lengths.add(length);
      }
    }
    return lengths;
  }

  private static int parseInt(String s) {
    return Integer.parseUnsignedInt(s, 10);
  }

  private RangesTableSchema() {}
 }


--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/AnyPath.java
@ -0,0 +1,181 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import java.util.Optional;

 /**
 * Represents an NFA graph which accepts sequences of inputs of any digit (also known as "any-digit
 * sequences"), possibly of variable length. For example, an {@code AnyPath} instance might accept
 * a single input of any digit (i.e. equivalent to the regular expression {@code "\d"}), or it might
 * accept sequences of any digits of length 4 or 6 (i.e. equivalent to the regular expression
 * {@code "\d{4}\d{2}?"}.
 *
 * <p>As {@code AnyPath} instances are all restricted to only accepting any-digits sequences, the
 * only interesting thing about them is the set of sequence lengths they accept.
 */
@AutoValue
 abstract class AnyPath implements Comparable<AnyPath> {
  /**
   * The special empty path which matches zero length input. This is useful as an identity value
   * when constructing other paths but should never be a path in the graph.
   */
  public static final AnyPath EMPTY = new AutoValue_AnyPath(0x1);

  /** The path matching exactly one input of any digit. */
  public static final AnyPath SINGLE = of(0x2);

  /** The path matching one or zero inputs of any digit. */
  public static final AnyPath OPTIONAL = of(0x3);

  @VisibleForTesting
  static AnyPath of(int mask) {
    Preconditions.checkArgument(mask > 1, "invalid path mask: %s", mask);
    return new AutoValue_AnyPath(mask);
  }

  /**
   * Returns a bit-mask representing the lengths of any-digit sequences accepted by this path.
   * If bit-N is set, then this path accepts an N-length sequence of any digits.
   */
  abstract int mask();

  /** Returns whether this path accepts an any-digit sequence of length {@code n}.*/
  public boolean acceptsLength(int n) {
    Preconditions.checkArgument(n >= 0 && n < 32, "invalid path length: %s", n);
    return (mask() & (1 << n)) != 0;
  }

  /** Returns the maximum length any-sequence that this path will accept. */
  public int maxLength() {
    return (31 - Integer.numberOfLeadingZeros(mask()));
  }

  /**
   * Returns whether this path is empty (i.e. accepts only zero length sequences). This is only
   * useful when constructing paths and empty paths should never appear in an NFA graph.
   */
  public boolean isEmpty() {
    return mask() == 0x1;
  }

  /**
   * Extends this path by one input, potentially setting all input as optional. For example (using
   * 'x' to represent a single "any digit" input):
   * <ul>
   *   <li>{@code "xx".extend(false) == "xxx"}
   *   <li>{@code "xx".extend(true) == "(xxx)?"}
   *   <li>{@code "xx(x)?".extend(false) == "xxx(x)?"}
   *   <li>{@code "xx(x)?".extend(true) == "(xxx(x)?)?"}
   * </ul>
   */
  public AnyPath extend(boolean allOptional) {
    return of((mask() << 1) | (allOptional ? 0x1 : 0x0));
  }

  /**
   * Joins the given path to this one, results in a new path which is equivalent to the
   * concatenation of the regular expressions they represent. For example (using
   * 'x' to represent a single "any digit" input):
   * <ul>
   *   <li>{@code "xx".join("xx") == "xxxx"}
   *   <li>{@code "xx".join("x?") == "xx(x)?"}
   * </ul>
   */
  public AnyPath join(AnyPath other) {
    int newMask = 0;
    // Include the length itself (which is always accepted).
    for (int n = 0; n <= other.maxLength(); n++) {
      if (other.acceptsLength(n)) {
        newMask |= mask() << n;
      }
    }
    return of(newMask);
  }

  /**
   * Returns a new path which is equal to this path, except that it also accepts zero length
   * sequences.
   */
  public AnyPath makeOptional() {
    return of(mask() | 0x1);
  }

  /**
   * Attempts to "factor" this path by the given path to produce a path such that
   * {@code p.factor(q).join(q)} is equivalent to {@code p}. This is useful when trying to
   * determine longest common paths. Factorizing may not succeed in cases where no common path
   * exists (e.g. {@code "xx(xx)?".factor("x?")} fails because there is no way to join anything
   * to the path {@code "x?"} to make it accept exactly 2 or 4 length any-digit sequences).
   */
  public Optional<AnyPath> factor(AnyPath other) {
    int factor = mask() / other.mask();
    if (factor > 1 && (other.mask() * factor) == mask()) {
      return Optional.of(of(factor));
    } else {
      return Optional.empty();
    }
  }

  @Override
  public int compareTo(AnyPath other) {
    return Integer.compare(mask(), other.mask());
  }

  @Override
  public final String toString() {
    // A non-obvious algorithm for getting a reasonable toString() using x's.
    // Best understood via examples:
    //
    // 0001 is invalid as we cannot represent an optional zero-length sequence.
    //
    // Hi-bit-1 ==> 1 x
    // 0010 -> x, 0011 -> (x)?
    //
    // Hi-bit-2 ==> 2 x's
    // 0100 -> xx, 0101 -> (xx)?, 0110 -> x(x)?, 0111 -> (x(x)?)?
    //
    // Hi-bit-3 ==> 3 x's
    // 1000 -> xxx,    1001 -> (xxx)?,    1010 -> x(xx)?,    1011 -> (x(xx)?)?
    // 1100 -> xx(x)?, 1101 -> (xx(x)?)?, 1110 -> x(x(x)?)?, 1111 -> (x(x(x)?)?)?
    //
    // Rules:
    // * For hi-bit M, there are M x's in the string.
    // * For N < M; if bit-N is set, then a group starts after the Nth-x.
    if (mask() == 0x1) {
      return "<EMPTY>";
    }
    StringBuilder out = new StringBuilder();
    for (int n = 0; n < maxLength(); n++) {
      out.append('x');
    }
    // Loop high-to-low to prevent earlier insertions messing with the index.
    for (int n = maxLength() - 1; n >= 0; n--) {
      if (acceptsLength(n)) {
        out.insert(n, '(');
      }
    }
    // The number of opened groups was the number of set bits - 1.
    for (int n = Integer.bitCount(mask()) - 1; n > 0; n--) {
      out.append(")?");
    }
    return out.toString();
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Edge.java
@ -0,0 +1,351 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSortedSet;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import java.util.Collection;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;

 /**
 * Value type for edges in NFA graphs of phone number regular expressions. Outside this package,
 * this type is mainly used for examining NFA graphs which represent a regular expression,
 * generated via {@link RangeTreeConverter#toNfaGraph}..
 *
 * <p>Note that the ordering of edges is carefully designed to attempt to replicate as much of the
 * existing intuition about ordering in regular expressions as possible. This should result in any
 * generated expressions being as close to existing hand edited expressions as possible.
 */
 public abstract class Edge implements Comparable<Edge> {
  /** API for visiting composite edges; see also {@link #accept(Visitor)}. */
  public interface Visitor {
    /** Visits a leaf node simple edge. */
    void visit(SimpleEdge edge);
    /**
     * Visits a composited sequence of edges. Note that sequences only ever contain disjunctions or
     * simple edges, but never other sequences. For edges "a", "b", "c", this represents the
     * concatenated edge "abc".
     */
    void visitSequence(List<Edge> edges);
    /**
     * Visits a disjunction of parallel edges. Note that disjunctions only ever contain sequences
     * or simple edges, but never other disjunctions. For edges "a", "b", "c", this represents the
     * disjunctive group "(a|b|c)".
     */
    void visitGroup(Set<Edge> edges, boolean isOptional);
  }

  // The singleton epsilon edge.
  private static final SimpleEdge EPSILON = new SimpleEdge();
  // The singleton edge matching any digit (i.e. 'x' or '\d').
  private static final SimpleEdge ANY = new SimpleEdge(ALL_DIGITS_MASK, false);
  // The singleton edge optionally matching any digit (i.e. 'x?' or '\d?').
  private static final SimpleEdge OPTIONAL_ANY = ANY.optional();

  /** Returns an edge which accepts digits 0 to 9 according tothe bits set in the given mask. */
  public static SimpleEdge fromMask(int digitMask) {
    return digitMask == ALL_DIGITS_MASK ? ANY : new SimpleEdge(digitMask, false);
  }

  /**
   * Returns the epsilon edge which accepts zero length input and transitions immediately. This
   * edge should only ever appear parallel to other edges, and not as the only transition between
   * two nodes.
   */
  public static SimpleEdge epsilon() {
    return EPSILON;
  }

  /** Returns the edge which accepts any digit {@code [0-9]}. */
  public static SimpleEdge any() {
    return ANY;
  }

  /** Returns the edge which optionally accepts any digit {@code [0-9]}. */
  public static SimpleEdge optionalAny() {
    return OPTIONAL_ANY;
  }

  /**
   * Returns the ordered concatenation of the given edges. If either edge is a concatenation, it
   * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
   */
  public static Edge concatenation(Edge lhs, Edge rhs) {
    checkArgument(!lhs.equals(EPSILON) && !rhs.equals(EPSILON), "cannot concatenate epsilon edges");
    // Don't make concatenations of concatenations; flatten them out so you only have singletons
    // or disjunctions. This is equivalent to writing "xyz" instead of "x(yz)".
    List<Edge> edges = Stream.of(lhs, rhs)
        .flatMap(
            e -> (e instanceof Concatenation) ? ((Concatenation) e).edges.stream() : Stream.of(e))
        .collect(Collectors.toList());
    return new Concatenation(edges);
  }

  /**
   * Returns the disjunction of the given edges. If either edge is already a concatenation, it
   * is first expanded, so that the resulting edge contains only simple edges or disjunctions.
   */
  public static Edge disjunction(Collection<Edge> edges) {
    // Don't make disjunctions of disjunctions; flatten them out so you only have singletons,
    // concatenations or epsilon. This is equivalent to writing "(x|y|z)" instead of "(x|(y|z))".
    List<Edge> allEdges = edges.stream()
        .flatMap(
            e -> (e instanceof Disjunction) ? ((Disjunction) e).edges.stream() : Stream.of(e))
        .sorted()
        .distinct()
        .collect(Collectors.toList());
    // There should only ever be one epsilon when we make a disjunction (disjunctions are made when
    // subgraphs collapse and each subgraph should only have one epsilon to make it optional).
    // Epsilons sort to-the-left of everything, so if there is an epsilon it must be the first edge.
    boolean isOptional = allEdges.get(0) == EPSILON;
    if (isOptional) {
      allEdges = allEdges.subList(1, allEdges.size());
    }
    Preconditions.checkState(!allEdges.contains(EPSILON));
    return new Disjunction(allEdges, isOptional);
  }

  /** An edge optionally matching a single input token, or the epsilon transition. */
  public static final class SimpleEdge extends Edge {
    private final int digitMask;
    private final boolean isOptional;

    // Constructor for singleton epsilon edge.
    private SimpleEdge() {
      this.digitMask = 0;
      // An optional epsilon makes no real sense.
      this.isOptional = false;
    }

    private SimpleEdge(int digitMask, boolean isOptional) {
      checkArgument(digitMask > 0 && digitMask < (1 << 10), "invalid bit mask %s", digitMask);
      this.digitMask = digitMask;
      this.isOptional = isOptional;
    }

    /** Returns the mask of digits accepted by this edge. */
    public int getDigitMask() {
      return digitMask;
    }

    /** Returns whether this edge is optional. */
    public boolean isOptional() {
      return isOptional;
    }

    /** Returns an optional version of this, non-optional edge. */
    public SimpleEdge optional() {
      Preconditions.checkState(digitMask != 0, "cannot make epsilon optional");
      Preconditions.checkState(!isOptional, "edge already optional");
      return new SimpleEdge(digitMask, true);
    }

    @Override
    public void accept(Visitor visitor) {
      visitor.visit(this);
    }

    @Override
    public boolean equals(Object obj) {
      return (obj instanceof SimpleEdge) && digitMask == ((SimpleEdge) obj).digitMask;
    }

    @Override
    public int hashCode() {
      return digitMask;
    }

    @Override
    public int compareTo(Edge rhs) {
      if (rhs instanceof SimpleEdge) {
        return compare((SimpleEdge) rhs);
      } else {
        // Composite types know how to compare themselves to SimpleEdges, so delegate to them but
        // remember to invert the result since we are reversing the comparison order.
        return -rhs.compareTo(this);
      }
    }

    private int compare(SimpleEdge rhs) {
      if (isOptional != rhs.isOptional) {
        // Optional edges sort to-the-right of non-optional things.
        return isOptional ? 1 : -1;
      }
      if (digitMask == rhs.digitMask) {
        return 0;
      }
      if (digitMask == 0 || rhs.digitMask == 0) {
        // Epsilon sorts to-the-left of everything.
        return digitMask == 0 ? -1 : 1;
      }
      // Unlike many other places where range specifications are used, we cannot guarantee the
      // ranges are disjoint here, so we sort on the reversed bitmask to favour the lowest set bit.
      // This sorts 'x' ([0-9]) to the left of everything, and epsilon to the right of everything.
      // I.e. "x" < "0", "0" < "1", "[0-3]" < "[0-2]", "9" < epsilon.
      //
      // Remember to logical-shift back down to avoid negative values.
      int reverseLhsMask = (Integer.reverse(digitMask) >>> 22);
      int reverseRhsMask = (Integer.reverse(rhs.digitMask) >>> 22);
      // Compare in the opposite order, so the largest reversed value is ordered "to the left".
      return Integer.compare(reverseRhsMask, reverseLhsMask);
    }
  }

  // A sequence of edges (disjunctions or simple edges).
  private static final class Concatenation extends Edge {
    private final ImmutableList<Edge> edges;

    private Concatenation(Collection<Edge> edges) {
      this.edges = ImmutableList.copyOf(edges);
    }

    @Override
    public void accept(Visitor visitor) {
      visitor.visitSequence(edges);
    }

    @Override
    public boolean equals(Object obj) {
      return (obj instanceof Concatenation) && edges.equals(((Concatenation) obj).edges);
    }

    @Override
    public int hashCode() {
      return edges.hashCode();
    }

    @Override
    public int compareTo(Edge rhs) {
      if (rhs instanceof Concatenation) {
        return compareEdges(edges, ((Concatenation) rhs).edges);
      } else {
        // Compare our first edge to the non-concatenation. If this compares as equal, order the
        // concatenation between simple edges and disjunctions to break the tie and avoid implying
        // that a concatenation and a non-concatenation are equal.
        int comparison = -rhs.compareTo(edges.get(0));
        return comparison != 0 ? comparison : (rhs instanceof SimpleEdge ? 1 : -1);
      }
    }
  }

  // A disjunctive group of edges (sequences or simple edges).
  private static final class Disjunction extends Edge {
    private final ImmutableSortedSet<Edge> edges;
    private final boolean isOptional;

    private Disjunction(Collection<Edge> edges, boolean isOptional) {
      checkArgument(!edges.isEmpty());
      this.edges = ImmutableSortedSet.copyOf(edges);
      this.isOptional = isOptional;
    }

    @Override
    public void accept(Visitor visitor) {
      visitor.visitGroup(edges, isOptional);
    }

    @Override
    public boolean equals(Object obj) {
      return (obj instanceof Disjunction) && edges.equals(((Disjunction) obj).edges);
    }

    @Override
    public int hashCode() {
      // Negate bits here to be different from Concatenation.
      return ~edges.hashCode();
    }

    @Override
    public int compareTo(Edge rhs) {
      if (rhs instanceof Disjunction) {
        return compareEdges(edges.asList(), ((Disjunction) rhs).edges.asList());
      } else {
        // Compare our first edge to the non-disjunction. If this compares as equal, order the
        // disjunction to the right of the other edge to break the tie and avoid implying that
        // a disjunction and a non-disjunction are equal.
        int comparison = -rhs.compareTo(edges.asList().get(0));
        return comparison == 0 ? 1 : comparison;
      }
    }
  }

  /**
   * Accepts a visitor on this edge, visiting any sub-edges from which it is composed. This is a
   * double-dispatch visitor to avoid anyone processing edges needing to know about specific types.
   * Only the immediate edge is visited and the visitor is then responsible for visiting child
   * edges.
   */
  public abstract void accept(Visitor visitor);

  // Compare lists according to elements, and tie break on length if different. This is effectively
  // a lexicographical ordering.
  private static int compareEdges(ImmutableList<Edge> lhs, ImmutableList<Edge> rhs) {
    int minSize = Math.min(lhs.size(), rhs.size());
    for (int n = 0; n < minSize; n++) {
      int compared = lhs.get(n).compareTo(rhs.get(n));
      if (compared != 0) {
        return compared;
      }
    }
    return Integer.compare(lhs.size(), rhs.size());
  }

  @Override
  public String toString() {
    StringBuilder out = new StringBuilder();
    accept(new Visitor() {
      @Override
      public void visit(SimpleEdge e) {
        if (e.equals(Edge.epsilon())) {
          // Epsilon cannot be optional.
          out.append("e");
        } else {
          int m = e.getDigitMask();
          out.append(m == ALL_DIGITS_MASK ? "x" : RangeSpecification.toString(m));
          if (e.isOptional()) {
            out.append('?');
          }
        }
      }

      @Override
      public void visitSequence(List<Edge> edges) {
        edges.forEach(e -> e.accept(this));
      }

      @Override
      public void visitGroup(Set<Edge> edges, boolean isOptional) {
        out.append("(");
        edges.forEach(e -> {
          e.accept(this);
          out.append("|");
        });
        out.setLength(out.length() - 1);
        out.append(isOptional ? ")?" : ")");
      }
    });
    return out.toString();
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriter.java
@ -0,0 +1,343 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkState;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;

 import com.google.common.collect.Iterables;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.Visitor;
 import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import javax.annotation.Nullable;

 /** Writes an NFA graph edge instance as a regular expression. */
 final class EdgeWriter implements Visitor {
  // Regex constant strings pulled out for some degree of readability.
  private static final String DOT_MATCH = ".";
  private static final String DIGIT_MATCH = "\\d";
  private static final String OPTIONAL_MARKER = "?";
  private static final String GROUP_START = "(?:";
  private static final String GROUP_DISJUNCTION = "|";
  private static final String GROUP_END = ")";
  private static final String OPTIONAL_GROUP_END = GROUP_END + OPTIONAL_MARKER;

  /**
   * Returns a regular expression corresponding to the structure of the given edge. This method
   * does not apply any specific optimizations to the edge it is given and any optimizations which
   * affect the output must have already been applied to the graph from which the input edge was
   * derived.
   *
   * @param edge A collapsed edge typically derived from serializing an NFA graph.
   * @param useDotMatch true if {@code '.'} should be used to "match any digit" (instead of
   *     {@code '\\d'}) which results in shorter output.
   */
  public static String toRegex(Edge edge, boolean useDotMatch) {
    EdgeWriter writer = new EdgeWriter(useDotMatch);
    edge.accept(writer);
    return writer.out.toString();
  }

  // The token to match any input digit (e.g. "\\d" or ".").
  private final String anyToken;
  // Accumulated regular expression appended to during visitation.
  private final StringBuilder out = new StringBuilder();
  // Flag to determine when the top-level edge visited is a group, because if it is we can often
  // omit the explicit grouping tokens and save some space.
  private boolean isTopLevelGroup = true;

  private EdgeWriter(boolean useDotMatch) {
    this.anyToken = useDotMatch ? DOT_MATCH : DIGIT_MATCH;
  }

  @Override
  public void visit(SimpleEdge e) {
    checkArgument(!e.equals(Edge.epsilon()), "unexpected bare epsilon");
    isTopLevelGroup = false;
    // It's easier to just attempt to extract an "any" edge as that code already has to work for
    // simple edges when they are inside other composite edges. Optionality is encoded into the
    // resulting AnyPath and handled by appendRegex(), so we don't need to handle it again here.
    Optional<AnyPath> any = AnyPathVisitor.extractAnyPath(e);
    if (any.isPresent()) {
      appendRegex(out, any.get().mask());
    } else {
      // Not an "any" edge so append the usual range representation (e.g. "6" or "[014-9]").
      out.append(RangeSpecification.toString(e.getDigitMask()));
      if (e.isOptional()) {
        out.append(OPTIONAL_MARKER);
      }
    }
  }

  @Override
  public void visitSequence(List<Edge> edges) {
    checkArgument(!edges.isEmpty(), "sequences must have at least one edge");
    isTopLevelGroup = false;
    // At this level a sequence might be a mix of normal and "any" edges (e.g. "123xxxx"). To
    // cope with this, track and accumulate the un-written "any" edge, and emit it just before
    // any other output (or at the end).
    AnyPath any = AnyPath.EMPTY;
    for (Edge e : edges) {
      Optional<AnyPath> next = AnyPathVisitor.extractAnyPath(e);
      if (next.isPresent()) {
        any = any.join(next.get());
        continue;
      }
      // Here we have a "normal" edge, but we still might need to emit a collected "any" edge.
      if (!any.isEmpty()) {
        appendRegex(out, any.mask());
        any = AnyPath.EMPTY;
      }
      // This recursion only happens when this was not an "any" edge (though it may still be a
      // composite that contains other "any" edges).
      e.accept(this);
    }
    // If the last thing we saw in this sequence was an "any" edge, don't forget to emit it.
    if (!any.isEmpty()) {
      appendRegex(out, any.mask());
    }
  }

  @Override
  public void visitGroup(Set<Edge> edges, boolean isOptional) {
    checkArgument(!edges.isEmpty(), "groups must have at least one edge");
    // The very top-level group is almost always non-optional and can be omitted for length
    // (ie. "(?:a|b|c)" can just be "a|b|c").
    boolean canSkipParens = isTopLevelGroup && !isOptional;
    // Unset this before recursing.
    isTopLevelGroup = false;

    // We have exactly one case where an "any" edge needs to be handled for groups, and that's
    // when there's an optional any group that's not part of an enclosing sequence (e.g. "(xx)?").
    if (edges.size() == 1 && isOptional) {
      Optional<AnyPath> any = AnyPathVisitor.extractAnyPath(Iterables.getOnlyElement(edges));
      if (any.isPresent()) {
        // Remember to account for the optionality of the outer group.
        appendRegex(out, any.get().makeOptional().mask());
        return;
      }
    }

    if (!canSkipParens) {
      out.append(GROUP_START);
    }
    for (Edge e : edges) {
      e.accept(this);
      out.append(GROUP_DISJUNCTION);
    }
    // Easier to just remove the disjunction we know was added last than track state in the loop.
    out.setLength(out.length() - GROUP_DISJUNCTION.length());
    if (!canSkipParens) {
      out.append(isOptional ? OPTIONAL_GROUP_END : GROUP_END);
    }
  }

  /**
   * Recursive visitor to extract "any" sequences from edges (simple or composite). A sequence of
   * edges is an "any path" if all edges accept any digit. Composite edges already enforce the
   * requirement that epsilon edges don't exist directly (they are represented via optionality).
   */
  private static final class AnyPathVisitor implements Visitor {
    /**
     * Returns the longest "any" sequence represented by the given edge (if the edge represents an
     * any sequence). If present, the result is non-empty.
     */
    @Nullable
    public static Optional<AnyPath> extractAnyPath(Edge e) {
      AnyPathVisitor visitor = new AnyPathVisitor();
      e.accept(visitor);
      return Optional.ofNullable(visitor.path);
    }

    // Accumulate value during visitation and set to null to abort.
    @Nullable
    private AnyPath path = AnyPath.EMPTY;

    @Override
    public void visit(SimpleEdge edge) {
      checkState(path != null, "path should never be null at start of recursion");
      if (edge.getDigitMask() == ALL_DIGITS_MASK) {
        path = path.join(edge.isOptional() ? AnyPath.OPTIONAL : AnyPath.SINGLE);
      } else {
        path = null;
      }
    }

    @Override
    public void visitSequence(List<Edge> edges) {
      checkState(path != null, "path should never be null at start of recursion");
      // Looking for a complete sequence of "any edges" (partial sequences in a concatenation are
      // taken care of by the caller).
      for (Edge e : edges) {
        Optional<AnyPath> next = AnyPathVisitor.extractAnyPath(e);
        if (next.isPresent()) {
          path = path.join(next.get());
        } else {
          path = null;
          break;
        }
      }
    }

    @Override
    public void visitGroup(Set<Edge> edges, boolean isOptional) {
      checkState(path != null, "path should never be null at start of recursion");
      // Looking for a group like (xxx(xx)?)? which contains one edge only. We just recurse into
      // that edge and then make the result optional (a disjuction with only one edge must be
      // optional or else it should have been a concatenation).
      if (edges.size() > 1) {
        path = null;
        return;
      }
      checkState(isOptional, "single edge disjunctions should be optional");
      Edge e = Iterables.getOnlyElement(edges);
      e.accept(this);
      if (path != null) {
        path = path.makeOptional();
      }
    }
  }

  // The code below here is really a bit squiffy and relies on a whole bunch of bit fiddling to
  // do what it does. The good news is that it's easy to unit-test the heck out of, so that's
  // what I've done. Don't look too hard at what's going on unless you're a bit of a masochist.

  /**
   * Appends the regular expression corresponding to the given AnyPath mask value. This is a
   * bit-mask where the Nth bit corresponds to accepting an any digit sequence of length N.
   *
   * <p>For example:
   * <ul>
   *   <li> {@code 00000010} accepts only length 1 (e.g. "\d")
   *   <li> {@code 00000011} accepts lengths 0 or 1 (e.g. "\d?")
   *   <li> {@code 00001000} accepts only length 3 (e.g. "\d{3}")
   *   <li> {@code 00011100} accepts lengths 2-4 (e.g. "\d{2,4}")
   *   <li> {@code 11101100} accepts lengths 0,2,3,5,6,7 (e.g. "(?:\d\d(?:\d(?:\d{2,4})?)?)?")
   * </ul>
   */
  private void appendRegex(StringBuilder out, int mask) {
    checkArgument(mask > 1, "unexpected mask value %s", mask);
    // Deal with optionality separately.
    boolean allOptional = (mask & 0x1) != 0;
    mask &= ~0x1;
    // We are looking for bit patterns like '1111000' for contiguous ranges (e.g. {3,7}).
    // Find the lo/hi size of the next contiguous range (inclusive).
    int lo = Integer.numberOfTrailingZeros(mask);
    int hi = Integer.numberOfTrailingZeros(~(mask >>> lo)) + (lo - 1);

    // If all the bits are accounted for (nothing above the "hi" bit) then this was the last
    // contiguous range and we don't need to recurse (so no more groups need to be opened).
    if (mask < (1 << (hi + 1))) {
      // Writes a contiguous range as a single token with optionality (e.g. "\d", "(?:\d{2,4})?").
      appendAnyRange(out, lo, hi, allOptional);
      return;
    }
    // This is about the entire group, not the subgroup we are about to recurse into.
    if (allOptional) {
      out.append(GROUP_START);
    }
    // IMPORTANT: If we are recursing, we must not attempt to emit the entire group here, only the
    // shortest matching length.
    //
    // Mask "11101100" does NOT represent "\d{2,3}(?:\d{2,4})?" as that can match 4-digits too.
    // Instead it should generate "\d\d(?:\d(?:\d{2,4})?)?", where the 3 digit match is part of an
    // optional group.
    appendRequiredAnyRange(out, lo);
    // Recurse using the mask that's had the match we just emitted "factored out". This is always
    // optional because bit-0 is what was the lowest set bit in our mask.
    appendRegex(out, mask >>> lo);
    if (allOptional) {
      out.append(OPTIONAL_GROUP_END);
    }
  }

  /**
   * Appends regular expression tokens that accept any digits for a single length.
   *
   * <p>For example:
   * <ol>
   *   <li>{@code n=1}: {@code "\d"}
   *   <li>{@code n=2}: {@code "\d\d"} (this could be extended if using '.')
   *   <li>{@code otherwise}: {@code "\d{n}"}
   * </ol>
   */
  private void appendRequiredAnyRange(StringBuilder out, int n) {
    checkArgument(n >= 1, "bad any length %s", n);
    out.append(anyToken);
    if (n == 2) {
      // Only safe to do this if the group is not optional ("\d\d?" != "(?:\d{2})?").
      out.append(anyToken);
    } else if (n > 2) {
      out.append('{').append(n).append('}');
    }
  }

  /**
   * Appends regular expression tokens that accept any digits in a contiguous range of lengths.
   *
   * <p>For example:
   * <ol>
   *   <li>{@code lo=1, hi=1, optional=false}: {@code "\d"}
   *   <li>{@code lo=1, hi=1, optional=true}: {@code "\d?"}
   *   <li>{@code lo=2, hi=2, optional=true}: {@code "(?:\d{2})?"}
   *   <li>{@code lo=3, hi=6, optional=false}: {@code "\d{3,6}"}
   *   <li>{@code lo=3, hi=6, optional=true}: {@code "(?:\d{3,6})?"}
   *   <li>{@code lo=1, hi=4, optional=true}: {@code "\d{0,4}"} (not {@code (?:\d{1,4})?})
   *   <li>{@code lo=2, hi=2, optional=false}: {@code "\d\d"} (special case for size)
   *   <li>{@code lo=1, hi=2, optional=false}: {@code "\d\d?"} (special case for size)
   * </ol>
   */
  private void appendAnyRange(StringBuilder out, int lo, int hi, boolean optional) {
    checkArgument(lo >= 1 && hi >= lo, "bad range arguments %s, %s", lo, hi);
    if (lo == hi) {
      if (!optional) {
        // Required single length.
        appendRequiredAnyRange(out, lo);
      } else {
        // Optional single length.
        if (lo > 1) {
          out.append(GROUP_START).append(anyToken);
          out.append('{').append(lo).append('}');
          out.append(OPTIONAL_GROUP_END);
        } else {
          out.append(anyToken).append(OPTIONAL_MARKER);
        }
      }
    } else if (lo == 1 && hi == 2 && !optional) {
      // Special case for "\d\d?" as it's shorter than "\d{1,2}" (and even shorter with '.').
      // Even though we append the "optional marker" (i.e. '?') here it's got nothing to do
      // with the entire group being optional. That would be "(?:\d{1,2})?" which is "\d{0,2}".
      out.append(anyToken).append(anyToken).append(OPTIONAL_MARKER);
    } else if (lo == 1 && optional) {
      // Special case to write "\d{0,N}" instead of "(?:\d{1,N})?"
      out.append(anyToken).append("{0,").append(hi).append('}');
    } else {
      if (optional) {
        out.append(GROUP_START);
      }
      // General case.
      out.append(anyToken).append('{').append(lo).append(',').append(hi).append('}');
      if (optional) {
        out.append(OPTIONAL_GROUP_END);
      }
    }
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattener.java
@ -0,0 +1,195 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 import com.google.common.graph.ValueGraph;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
 import java.util.function.Function;

 /**
 * Flattens an NFA graph of simple edges into a composite edge which represents all the same
 * transitions in a strict tree structure (i.e. nestable sub-groups). This can entail some
 * duplication of edges, but this should be kept to a minimum and favours duplicating trailing
 * paths to avoid introducing additional non-determinism.
 */
 final class NfaFlattener {
  /**
  * Flattens the given NFA graph into a single composite edge composed of concatenation and
  * disjunction. The resulting edge can be visited using the {@code Edge.Visitor} class.
  */
  public static Edge flatten(ValueGraph<Node, SimpleEdge> graph) {
    return new NfaFlattener(graph).flatten();
  }

  /*
   * A simple pair of edge value and target node which represents the current state along any path
   * in the NFA graph. Path followers may be joined (if they point at the same node) but can only
   * be split by recursion into the new subgraph.
   */
  @AutoValue
  abstract static class PathFollower {
    private static PathFollower of(Node node, Edge edge) {
      return new AutoValue_NfaFlattener_PathFollower(node, edge);
    }

    /** The target node that this follower points to. */
    abstract Node node();
    /** A composite edge representing everything up to the target node in the current sub-graph. */
    abstract Edge edge();
  }

  // The graph being flattened.
  private final ValueGraph<Node, SimpleEdge> graph;
  // An ordering for the work queue which ensures that followers with the same node are adjacent.
  private final Comparator<PathFollower> queueOrder;

  private NfaFlattener(ValueGraph<Node, SimpleEdge> graph) {
    this.graph = graph;
    this.queueOrder = Comparator
        .comparing(PathFollower::node, nodeOrdering(graph))
        .thenComparing(PathFollower::edge);
  }

  private Edge flatten() {
    // Sub-graph visitation only works for graphs which branch from and collapse to a single node.
    // An NFA graph could be multiple sequential edges or a sequence of edges and sub-graphs.
    // Handle that in this outer loop rather than complicate the visitor (already quite complex).
    PathFollower out = visitSubgraph(Node.INITIAL);
    while (out.node() != Node.TERMINAL) {
      PathFollower subgraph = visitSubgraph(out.node());
      out = PathFollower.of(subgraph.node(), Edge.concatenation(out.edge(), subgraph.edge()));
    }
    return out.edge();
  }

  /**
   * Visits the sub-graph rooted at the given node, following all out-edges until they eventually
   * re-join. Because the given graph has only one terminal node and no cycles, all sub-graphs must
   * eventually rejoin at some point. If during visitation of a sub-graph, a node with multiple
   * out-edges is reached, then the sub-graph it starts is recursively visited. Note that as "inner"
   * sub-graphs must terminate at or before their parent graph, nesting is assured.
   *
   * <p>The key to the implementation of this algorithm is that visitation occurs in breadth-first
   * order defined according to the reachability of the nodes in the graph. This ensures that when
   * an edge follower which reaches a node at which other edges join together is processed (i.e.
   * when it gets to the head of the queue) all the other followers that can also reach that node
   * must also be present in a contiguous sequence at the front of the queue.
   */
  private PathFollower visitSubgraph(Node node) {
    Preconditions.checkArgument(graph.outDegree(node) > 0, "cannot recurse from the terminal node");
    if (graph.outDegree(node) == 1) {
      // Visit the trivial "subgraph" that's really just a single edge. Note that this code could
      // loop and concatenate all sequential single edges, but it also works fine to rely on the
      // recursion of the caller (the advantage of doing it this, simpler, way means that this code
      // doesn't have to know about termination due to reaching the terminal node).
      Node target = Iterables.getOnlyElement(graph.successors(node));
      return PathFollower.of(target, graph.edgeValue(node, target).get());
    }
    // A work-queue of the path followers, ordered primarily by the node they target. This results
    // in the followers at any "point of collapse" being adjacent in the queue.
    PriorityQueue<PathFollower> followerQueue = new PriorityQueue<>(queueOrder);
    for (Node t : graph.successors(node)) {
      followerQueue.add(PathFollower.of(t, graph.edgeValue(node, t).get()));
    }
    while (true) {
      // Get the set of followers that share the same target node at the head of the queue. The
      // ordering in the queue ensures that followers for the same target are always adjacent.
      PathFollower follower = followerQueue.remove();
      Node target = follower.node();
      List<Edge> joiningEdges = collectJoiningEdges(followerQueue, target);
      if (joiningEdges != null) {
        // Replace any joined followers with their disjunction (they all have the same target).
        joiningEdges.add(follower.edge());
        follower = PathFollower.of(target, Edge.disjunction(joiningEdges));
      }
      if (followerQueue.isEmpty()) {
        // If we just processed the last "joining" paths then this sub-graph has been collapsed
        // into a single edge and we just return the current follower. Note that we can join edges
        // without ending recursion (when 3 followers become 2) but we can only end recursion after
        // joining at least 2 edges at the terminal sub-graph node.
        return follower;
      }
      // Recurse into the next sub-graph (possibly just a single edge) which is just concatenated
      // onto the current follower.
      PathFollower subgraph = visitSubgraph(target);
      followerQueue.add(
          PathFollower.of(subgraph.node(), Edge.concatenation(follower.edge(), subgraph.edge())));
    }
  }

  // Collects the edges of any followers at the front of the queue which share the same target node
  // as the given follower. If the node is not a target of any other followers then return null.
  private static List<Edge> collectJoiningEdges(PriorityQueue<PathFollower> queue, Node target) {
    // It's really common for edges not to join, so avoid making the list unless necessary.
    if (!nextFollowerJoinsTarget(queue, target)) {
      return null;
    }
    List<Edge> joiningEdges = new ArrayList<>();
    do {
      joiningEdges.add(queue.remove().edge());
    } while (nextFollowerJoinsTarget(queue, target));
    return joiningEdges;
  }

  // Checks if the head of the queue is a follower with the same target node.
  private static boolean nextFollowerJoinsTarget(PriorityQueue<PathFollower> queue, Node target) {
    return !queue.isEmpty() && queue.peek().node().equals(target);
  }

  /**
   * Returns a total ordering of nodes in this graph based on the maximum path length from the
   * initial node. If path lengths are equal for two nodes, then the node ID is used to tie break.
   *
   * <p>The property of this ordering that is critical to the node flattening algorithm is that if
   * {@code a < b}, then no path exists in the graph where {@code b} precedes {@code a}. This
   * ensures that path followers are processed consistently with the "node reachability" and if
   * several path followers target the same node, then they are adjacent in the follower queue.
   *
   * <p>Using the node ID as a tie-break is safe, because while node IDs are assigned arbitrarily,
   * they only apply between nodes in the same path length "bucket", so it cannot violate the total
   * ordering requirement, since any order within a "bucket" is equally good.
   */
  // Note: If there are graph cycles this will not terminate, but that implies bad bugs elsewhere.
  @VisibleForTesting
  static Comparator<Node> nodeOrdering(ValueGraph<Node, ?> graph) {
    Map<Node, Integer> map = new HashMap<>();
    recursivelySetMaxPathLength(Node.INITIAL, 0, graph, map);
    // We have to cast the "get" method since it accepts "Object", not "Node" on a map.
    return Comparator.comparing((Function<Node, Integer>) map::get).thenComparing(Node::id);
  }

  private static void recursivelySetMaxPathLength(
      Node node, int length, ValueGraph<Node, ?> graph, Map<Node, Integer> map) {
    // Only continue if at least some paths can be lengthened from here onwards.
    if (length > map.getOrDefault(node, -1)) {
      map.put(node, length);
      for (Node target : graph.successors(node)) {
        recursivelySetMaxPathLength(target, length + 1, graph, map);
      }
    }
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/Node.java
@ -0,0 +1,51 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import com.google.auto.value.AutoValue;

 /**
 * Value type for nodes in NFA graphs of phone number regular expressions. This is basically a
 * trivial wrapper for an {@code int}, but it makes a lot of other pieces of code type safe.
 * Outside this package, this type is mainly used for examining NFA graphs which represent a
 * regular expression, generated via {@link RangeTreeConverter#toNfaGraph}.
 */
@AutoValue
 public abstract class Node implements Comparable<Node> {
  /** The unique initial node in an NFA graph with in-order zero. */
  public static final Node INITIAL = new AutoValue_Node(0);
  /** The unique terminal node in an NFA graph with out-order zero. */
  public static final Node TERMINAL = new AutoValue_Node(1);

  /** Returns a new node whose ID is one greater than this node. */
  public Node createNext() {
    return (id() == 0) ? TERMINAL : new AutoValue_Node(id() + 1);
  }

  /** Returns the numeric ID of this node, which must be unique within an NFA graph. */
  abstract int id();

  @Override
  public int compareTo(Node o) {
    return Integer.compare(id(), o.id());
  }

  @Override
  public final String toString() {
    return Integer.toString(id());
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverter.java
@ -0,0 +1,123 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkState;

 import com.google.common.graph.ElementOrder;
 import com.google.common.graph.MutableValueGraph;
 import com.google.common.graph.ValueGraph;
 import com.google.common.graph.ValueGraphBuilder;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.HashMap;
 import java.util.Map;

 /**
 * Converts DFA {@link RangeTree}s to NFA {@link ValueGraph}s. The resulting graph has almost
 * exactly the same node and edge structure as the original DFA, with the following exceptions:
 * <ol>
 *   <li>Nodes which could optionally terminate now have 'epsilon' edges connecting them to the
 *   terminal node.
 *   <li>If an optionally terminating node connects directly to the terminal node, then a special
 *   "optional edge" is used (this is because the {@link ValueGraph} structure allows only one
 *   value for each edge, so you can't have an epsilon edge that goes between the same source and
 *   target as other edge).
 * </ol>
 */
 public final class RangeTreeConverter {
  /**
   * Returns the directed NFA graph representation of a {@link RangeTree}. The returned graph is
   * not a DFA and may contain epsilon transitions. Nodes are assigned in visitation order, except
   * for the initial and terminal nodes which are always present in the graph.
   */
  public static ValueGraph<Node, SimpleEdge> toNfaGraph(RangeTree ranges) {
    NfaVisitor visitor = new NfaVisitor(ranges.getInitial());
    ranges.accept(visitor);
    return visitor.graph;
  }

  private static class NfaVisitor implements DfaVisitor {
    private final MutableValueGraph<Node, SimpleEdge> graph = ValueGraphBuilder
        .directed()
        .allowsSelfLoops(false)
        // Stable ordering should help keep any generated structures (regex, graph files) stable.
        .nodeOrder(ElementOrder.<Node>natural())
        .build();
    // Map of nodes added to the new graph (keyed by the corresponding DFA node).
    private final Map<DfaNode, Node> nodeMap = new HashMap<>();
    // The last node we added.
    private Node lastAdded;

    private NfaVisitor(DfaNode initial) {
      // Add initial and terminal nodes first (there's always exactly one of each).
      graph.addNode(Node.INITIAL);
      graph.addNode(Node.TERMINAL);
      // During visitation we check only target nodes to add epsilon edges, but we may also need
      // to add an epsilon from the very top if the DFA can match the empty input.
      if (initial.canTerminate()) {
        graph.putEdgeValue(Node.INITIAL, Node.TERMINAL, Edge.epsilon());
      }
      nodeMap.put(initial, Node.INITIAL);
      nodeMap.put(RangeTree.getTerminal(), Node.TERMINAL);
      lastAdded = Node.TERMINAL;
    }

    @Override
    public void visit(DfaNode dfaSource, DfaEdge dfaEdge, DfaNode dfaTarget) {
      SimpleEdge simpleEdge = Edge.fromMask(dfaEdge.getDigitMask());
      Node source = nodeMap.get(dfaSource);
      Node target = getTarget(dfaTarget);
      boolean wasNewNode = graph.addNode(target);
      // The only chance of an existing edge is if an epsilon was already added immediately before
      // visiting this edge. This can only occur if (target == TERMINAL) however.
      SimpleEdge epsilon = graph.putEdgeValue(source, target, simpleEdge);
      if (epsilon != null) {
        checkState(target.equals(Node.TERMINAL) && epsilon.equals(Edge.epsilon()),
            "unexpected edge during visitation: %s -- %s --> %s", source, epsilon, target);
        // Re-add the edge, but this time make it optional (because that's what epsilon means).
        graph.putEdgeValue(source, target, simpleEdge.optional());
      }
      // Only recurse if the target node was newly added to the graph in this visitation.
      if (wasNewNode) {
        // The TERMINAL node is always in the map so (target != TERMINAL) here. This means we
        // never risk adding a loop in the graph. The epsilon may end up being swapped out for
        // an optional edge when we visit the dfaTarget, but that's fine.
        if (dfaTarget.canTerminate()) {
          graph.putEdgeValue(target, Node.TERMINAL, Edge.epsilon());
        }
        dfaTarget.accept(this);
      }
    }

    // Gets or creates a new target node, adding it to the node map (but not to the graph itself).
    private Node getTarget(DfaNode gnode) {
      Node target = nodeMap.get(gnode);
      if (target != null) {
        return target;
      }
      lastAdded = lastAdded.createNext();
      nodeMap.put(gnode, lastAdded);
      return lastAdded;
    }
  }

  private RangeTreeConverter() {}
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatter.java
@ -0,0 +1,118 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import com.google.common.base.CharMatcher;
 import com.google.common.base.Preconditions;

 /**
 * Simple indenting formatter for regular expressions and other similar nested syntax. Obviously
 * the results are not the same from a match perspective as the new string contains whitespace.
 */
 public final class RegexFormatter {
  /** Option for how to handle formatting of groups. */
  public enum FormatOption {
    PRESERVE_CAPTURING_GROUPS,
    FORCE_NON_CAPTURING_GROUPS,
    FORCE_CAPTURING_GROUPS,
  }

  // We only care about 3 specific tokens, so this code can be used to print strings which look
  // similar (nested, disjunctive groups) such as the toString() of the Edge class.
  private static final CharMatcher tokens = CharMatcher.anyOf("()|");

  /**
   * Formats a regular expression (or similar nested group syntax) using the following rules:
   * <ol>
   * <li>Newline after opening '(?:' and increase indent.
   * <li>Newline after '|'
   * <li>Decrease indent and add newline before closing ')'
   * </ol>
   */
  public static String format(String regex, FormatOption formatOption) {
    return new RegexFormatter(regex, formatOption).format();
  }

  private final StringBuilder out = new StringBuilder();
  private final String regex;
  private final FormatOption formatOption;

  private RegexFormatter(String regex, FormatOption formatOption) {
    this.regex = CharMatcher.whitespace().removeFrom(regex);
    this.formatOption = Preconditions.checkNotNull(formatOption);
  }

  private String format() {
    recurse(0, 0);
    return out.toString();
  }

  // Assume at line start.
  private int recurse(int pos, int level) {
    while (pos < regex.length()) {
      indent(level);
      // Optionally printing closing group from previous recursion.
      if (regex.charAt(pos) == ')') {
        out.append(')');
        pos++;
      }
      int nextToken = tokens.indexIn(regex, pos);
      if (nextToken == -1) {
        out.append(regex.substring(pos, regex.length()));
        return regex.length();
      }
      out.append(regex.substring(pos, nextToken));
      pos = nextToken;
      switch (regex.charAt(pos)) {
        case '(':
          out.append("(");
          pos++;
          if (regex.indexOf("?:", pos) == pos) {
            if (formatOption != FormatOption.FORCE_CAPTURING_GROUPS) {
              out.append("?:");
            }
            pos += 2;
          } else if (formatOption == FormatOption.FORCE_NON_CAPTURING_GROUPS) {
            out.append("?:");
          }
          out.append('\n');
          pos = recurse(pos, level + 1);
          break;

        case '|':
          out.append("|\n");
          pos++;
          break;

        case ')':
          // Just exit recursion and let the parent write the ')', so don't update our position.
          out.append("\n");
          return pos;

        default:
          throw new AssertionError();
      }
    }
    return pos;
  }

  private void indent(int level) {
    while (level-- > 0) {
      out.append("  ");
    }
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/RegexGenerator.java
@ -0,0 +1,171 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.ALLOW_EDGE_SPLITTING;
 import static com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy.REQUIRE_EQUAL_EDGES;
 import static java.util.stream.Collectors.joining;

 import com.google.common.base.Preconditions;
 import com.google.common.graph.ValueGraph;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer;
 import com.google.i18n.phonenumbers.metadata.RangeTreeFactorizer.MergeStrategy;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.Optional;

 /** Produces partially optimized regular expressions from {@code RangeTree}s. */
 public final class RegexGenerator {
  private static final RegexGenerator BASIC = new RegexGenerator(false, false, false, false);

  // NOTE: Tail optimization should remain disabled since it seems to undo some of the benefits of
  // subgroup optimization. At some point the code can probably just be removed.
  private static final RegexGenerator DEFAULT_XML =
      BASIC.withDfaFactorization().withSubgroupOptimization();

  /**
   * Returns a basic regular expression generator with no optional optimizations enabled. This will
   * produce regular expressions with a simpler structure than other generators but output will
   * almost always be longer.
   */
  public static RegexGenerator basic() {
    return BASIC;
  }

  /**
   * Returns the default regex generator for XML data. This should be used by any tool wishing to
   * obtain the same regular expressions as the legacy XML data. It is deliberately not specified
   * as to which optimizations are enabled for this regular expression generator.
   */
  public static RegexGenerator defaultXmlGenerator() {
    return DEFAULT_XML;
  }

  /**
   * Returns a new regular expression generator which uses the {@code '.'} token for matching any
   * digit (rather than {@code '\d'}). This results in shorter output, but possibly at the cost of
   * performance on certain platforms (and a degree of readability).
   */
  public RegexGenerator withDotMatch() {
    Preconditions.checkState(!this.useDotMatch, "Dot-matching already enabled");
    return new RegexGenerator(true, this.factorizeDfa, this.optimizeSubgroups, this.optimizeTail);
  }

  /**
   * Returns a new regular expression generator which applies a length-based factorization of the
   * DFA graph in an attempt to reduce the number of problematic terminating states. This results
   * in regular expressions with additional non-determinism, but which can greatly reduce size.
   */
  public RegexGenerator withDfaFactorization() {
    Preconditions.checkState(!this.factorizeDfa, "Length based factorizing already enabled");
    return new RegexGenerator(this.useDotMatch, true, this.optimizeSubgroups, this.optimizeTail);
  }

  /**
   * Returns a new regular expression generator which applies experimental factorization of the
   * DFA graph in an attempt to identify and handle subgroups which would cause repetition. This
   * results in regular expressions with additional non-determinism, but which can greatly reduce
   * size.
   */
  public RegexGenerator withSubgroupOptimization() {
    Preconditions.checkState(!this.optimizeSubgroups, "Subgroup optimization already enabled");
    return new RegexGenerator(this.useDotMatch, this.factorizeDfa, true, this.optimizeTail);
  }

  /**
   * Returns a new regular expression generator which applies tail optimization to the intermediate
   * NFA graph to factor out common trailing paths. This results in a small size improvement to
   * many cases and does not adversely affect readability.
   */
  public RegexGenerator withTailOptimization() {
    Preconditions.checkState(!this.optimizeTail, "Tail optimization already enabled");
    return new RegexGenerator(this.useDotMatch, this.factorizeDfa, this.optimizeSubgroups, true);
  }

  private final boolean useDotMatch;
  private final boolean factorizeDfa;
  private final boolean optimizeSubgroups;
  private final boolean optimizeTail;

  private RegexGenerator(
      boolean useDotMatch, boolean factorizeDfa, boolean optimizeSubgroups, boolean optimizeTail) {
    this.useDotMatch = useDotMatch;
    this.factorizeDfa = factorizeDfa;
    this.optimizeSubgroups = optimizeSubgroups;
    this.optimizeTail = optimizeTail;
  }

  /**
   * Generates a regular expression from a range tree, applying the configured options for this
   * generator.
   */
  public String toRegex(RangeTree ranges) {
    // The regex of the empty range is "a regex that matches nothing". This is meaningless.
    checkArgument(!ranges.isEmpty(),
        "cannot generate regular expression from empty ranges");
    // We cannot generate any regular expressions if there are no explicit state transitions in the
    // graph (i.e. we can generate "(?:<re>)?" but only if "<re>" is non-empty). We just get
    // "the regex that always immediately terminates after no input". This is also meaningless.
    checkArgument(!ranges.getInitial().equals(RangeTree.getTerminal()),
        "range tree must not contain only the empty digit sequence: %s", ranges);

    String regex = generateFactorizedRegex(ranges);
    if (optimizeSubgroups) {
      regex = recursivelyOptimizeSubgroups(ranges, regex);
    }
    return regex;
  }

  private String recursivelyOptimizeSubgroups(RangeTree ranges, String regex) {
    Optional<RangeTree> subgraphRanges = SubgroupOptimizer.extractRepeatingSubgraph(ranges);
    if (subgraphRanges.isPresent()) {
      RangeTree leftoverRanges = ranges.subtract(subgraphRanges.get());
      String leftoverRegex = generateFactorizedRegex(leftoverRanges);
      leftoverRegex = recursivelyOptimizeSubgroups(leftoverRanges, leftoverRegex);
      String optimizedRegex = leftoverRegex + "|" + generateFactorizedRegex(subgraphRanges.get());
      if (optimizedRegex.length() < regex.length()) {
        regex = optimizedRegex;
      }
    }
    return regex;
  }

  private String generateFactorizedRegex(RangeTree ranges) {
    String regex = regexOf(ranges);
    if (factorizeDfa) {
      regex = generateFactorizedRegex(ranges, regex, REQUIRE_EQUAL_EDGES);
      regex = generateFactorizedRegex(ranges, regex, ALLOW_EDGE_SPLITTING);
    }
    return regex;
  }

  private String generateFactorizedRegex(RangeTree dfa, String bestRegex, MergeStrategy strategy) {
    String factoredRegex = RangeTreeFactorizer.factor(dfa, strategy).stream()
        .map(this::regexOf)
        .collect(joining("|"));
    return factoredRegex.length() < bestRegex.length() ? factoredRegex : bestRegex;
  }

  private String regexOf(RangeTree ranges) {
    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(ranges);
    if (optimizeTail) {
      nfa = TrailingPathOptimizer.optimize(nfa);
    }
    return EdgeWriter.toRegex(NfaFlattener.flatten(nfa), useDotMatch);
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/SubgroupOptimizer.java
@ -0,0 +1,190 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.collect.ImmutableList.toImmutableList;

 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.LinkedHashMultiset;
 import com.google.common.collect.Multiset;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.IntStream;
 import javax.annotation.Nullable;

 /**
 * An optimization for RangeTree DFAs which attempts to isolate and extract subgraphs which would
 * otherwise cause a lot of repetition in the generated regular expression.
 */
 public final class SubgroupOptimizer {
  /**
   * Returns the subgraph which is likely to cause the most repetition in the regular expression
   * of the given DFA. Subtracting the result out of the original range tree and generating two
   * distinct regular expressions is likely to be shorter than the regular expression of the
   * original range.
   */
  public static Optional<RangeTree> extractRepeatingSubgraph(RangeTree ranges) {
    return LinkNodeVisitor
        .findBridgingNode(ranges)
        .flatMap(n -> SubgraphExtractionVisitor.extractSubgraph(ranges, n));
  }

  /**
   * A visitor which applies two types of weights to every interior node in a DFA.
   * <ul>
   *   <li>A count of incoming edges to that node.
   *   <li>A count of all edges in the subgraph rooted at that node.
   * </ul>
   * These are then multiplied together using the cost function:
   * <pre>cost(n) = subgraph-weight(n) * (in-order(n) - 1)</pre>
   * get get a proxy for the cost of additional duplicates likely to be created by this node.
   */
  static class LinkNodeVisitor implements DfaVisitor {
    // Reasonable approximation for the cost of an edge in a subgraph is the length of the
    // corresponding range specification (it doesn't work so well for repeated edges like
    // 'xxxxxxxx' --> "\d{8}", but it's good to help break ties in the cost function).
    private static final ImmutableList<Integer> EDGE_WEIGHTS =
        IntStream.rangeClosed(1, 0x3FF)
            .mapToObj(m -> RangeSpecification.toString(m).length())
            .collect(toImmutableList());

    // Important to use "linked" multisets here (at least for the one we iterate over) since
    // otherwise we end up with non-deterministic regular expression generation.
    private final Multiset<DfaNode> inOrder = LinkedHashMultiset.create();
    private final Multiset<DfaNode> subgraphWeight = LinkedHashMultiset.create();

    /**
     * Returns the interior node whose subgraph is likely to cause the most repetition in the
     * regular expression of the given DFA.
     */
    static Optional<DfaNode> findBridgingNode(RangeTree ranges) {
      checkArgument(!ranges.isEmpty(), "cannot visit empty ranges");
      LinkNodeVisitor v = new LinkNodeVisitor();
      ranges.accept(v);
      return Optional.ofNullable(v.getHighestCostNode());
    }

    private static int getEdgeWeight(DfaEdge edge) {
      // Subtract 1 since the array is 1-based (a zero edge mask is not legal).
      return EDGE_WEIGHTS.get(edge.getDigitMask() - 1);
    }

    @VisibleForTesting
    int getSubgraphWeight(DfaNode n) {
      return subgraphWeight.count(n);
    }

    @VisibleForTesting
    int getInOrder(DfaNode n) {
      return inOrder.count(n);
    }

    // This returns null if no edge has a cost greater than zero. Since the cost function uses
    // (in-order(n) - 1) this is trivially true for any graph where all interior nodes have only
    // a single in-edge (the terminal node can have more than one in-edge, but it has a weight of
    // zero and the initial node is never considered a candidate).
    @VisibleForTesting
    @Nullable
    DfaNode getHighestCostNode() {
      DfaNode node = null;
      int maxWeight = 0;
      for (DfaNode n : inOrder.elementSet()) {
        int weight = getSubgraphWeight(n) * (getInOrder(n) - 1);
        if (weight > maxWeight) {
          maxWeight = weight;
          node = n;
        }
      }
      return node;
    }

    @Override
    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
      // The weight is zero only if we haven't visited this node before (or it's the terminal).
      int targetWeight = subgraphWeight.count(target);
      if (targetWeight == 0 && !target.equals(RangeTree.getTerminal())) {
        target.accept(this);
        targetWeight = subgraphWeight.count(target);
      }
      // Add an extra one for the edge we are processing now and increment our target's in-order.
      subgraphWeight.add(source, targetWeight + getEdgeWeight(edge));
      inOrder.add(target);
    }
  }

  /**
   * A visitor to extract the subgraph of a DFA which passes through a specified interior
   * "bridging" node.
   */
  private static class SubgraphExtractionVisitor implements DfaVisitor {
    private final DfaNode bridgingNode;
    private final List<RangeSpecification> paths = new ArrayList<>();
    private RangeSpecification path = RangeSpecification.empty();
    private boolean sawBridgingNode = false;
    private boolean splitHappens = false;

    /** Returns the subgraph which passes through the specified node. */
    static Optional<RangeTree> extractSubgraph(RangeTree ranges, DfaNode node) {
      SubgraphExtractionVisitor v = new SubgraphExtractionVisitor(node);
      ranges.accept(v);
      // Only return proper subgraphs.
      return v.splitHappens ? Optional.of(RangeTree.from(v.paths)) : Optional.empty();
    }

    private SubgraphExtractionVisitor(DfaNode bridgingNode) {
      this.bridgingNode = checkNotNull(bridgingNode);
    }

    @Override
    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
      RangeSpecification oldPath = path;
      path = path.extendByMask(edge.getDigitMask());
      // Potentially emit paths for any terminating node (not just the end of the graph). We have
      // to extract the entire sub-graph _after_ the bridging node, including terminating nodes.
      if (target.canTerminate()) {
        // Emit path if we are "below" the bridging node.
        if (sawBridgingNode) {
          paths.add(path);
        } else {
          // Records that there were other paths not in the subgroup (since we only want to return
          // a new DFA that's a proper subgraph of the original graph).
          splitHappens = true;
        }
      }
      if (target.equals(bridgingNode)) {
        // Recurse with the flag set to emit paths once we hit the terminal node (note that the
        // bridging node cannot be the terminal node).
        sawBridgingNode = true;
        target.accept(this);
        sawBridgingNode = false;
      } else {
        // Recurse normally regardless of the flag.
        target.accept(this);
      }
      path = oldPath;
    }
  }
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizer.java
@ -0,0 +1,206 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static java.util.Comparator.naturalOrder;
 import static java.util.stream.Collectors.toList;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.graph.Graphs;
 import com.google.common.graph.MutableValueGraph;
 import com.google.common.graph.ValueGraph;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Optional;

 /**
 * Optimizer for NFA graphs which attempts to restructure the trailing paths to maximize sharing
 * and hopefully minimize the amount of duplication in the resulting regular expression.
 */
 public final class TrailingPathOptimizer {
  /**
   * Optimizes an NFA graph to make trailing "any digit" sequences common where possible. In many
   * cases this will result in no change to the structure of the NFA (common trailing paths are
   * not a feature of every NFA), but in some cases a substantial reduction in duplication can
   * occur.
   *
   * <p>This is equivalent to recognizing that {@code "12\d{2}\d{2}?|34\d{2}|56\d{3}"} can be
   * written as {@code "(?:12\d{2}?|34|56\d)\d{2}"}.
   */
  public static ValueGraph<Node, SimpleEdge> optimize(ValueGraph<Node, SimpleEdge> graph) {
    MutableValueGraph<Node, SimpleEdge> out = Graphs.copyOf(graph);

    // Build a map of trailing "any digit" sequences (key is the node it starts from).
    Map<Node, AnyPath> anyPaths = new HashMap<>();
    recursivelyDetachTrailingPaths(Node.TERMINAL, AnyPath.EMPTY, out, anyPaths);

    // If the terminal node has no "any digit" sequences leading to it, there's nothing we can do
    // (well not in this simplistic algorithm anyway). This should almost never happen for phone
    // number matching graphs as it implies a match expression that can terminate at a precise
    // digit, rather than any digit. The only time this might occur is for short-codes, but due to
    // their size it's likely to be fine if we don't try to aggressively optimize them.
    if (anyPaths.size() == 1 && anyPaths.containsKey(Node.TERMINAL)) {
      return graph;
    }
    // This is just a way to find a node from which we can start generating new nodes.
    Node lastAddedNode = out.nodes().stream().max(naturalOrder()).get();

    // Process paths from short to long (since some paths are sub-paths of longer ones).
    List<Node> shortestPathsFirst = anyPaths.entrySet().stream()
        .sorted(Comparator.comparing(Entry::getValue))
        .map(Entry::getKey)
        .collect(toList());
    Node pathEnd = Node.TERMINAL;
    while (true) {
      // Start with the next path that might be a factor of all the remaining paths.
      Node shortestPathNode = shortestPathsFirst.get(0);
      AnyPath shortestPath = anyPaths.get(shortestPathNode);
      int pathsToFactor = shortestPathsFirst.size() - 1;
      if (pathsToFactor == 0) {
        // If all paths are factored, we're done.
        break;
      }
      // Factor all the remaining paths by the shortest path (where a missing result means it
      // cannot be factored).
      ImmutableList<AnyPath> factored = shortestPathsFirst.stream()
          .skip(1)
          .map(n -> anyPaths.get(n).factor(shortestPath))
          .filter(Optional::isPresent)
          .map(Optional::get)
          .collect(toImmutableList());
      // If not all the remaining paths have the shortest path as a common factor, we're done (in
      // this simplistic algorithm we don't consider cases where an AnyPath is the factor of some,
      // but not all, other paths; we could but it's far less likely to reduce regex size).
      if (factored.size() < pathsToFactor) {
        break;
      }
      // Shortest path is a factor of all remaining paths, so add a new path to the graph for it.
      lastAddedNode = addPath(shortestPathNode, pathEnd, shortestPath, lastAddedNode, out);
      // We're done with this path, but might still be able to find more factors of remaining paths.
      anyPaths.remove(shortestPathNode);
      shortestPathsFirst.remove(0);  // index, not value.
      // The newly factored edges now replace the original factors in the map.
      for (int n = 0; n < factored.size(); n++) {
        Preconditions.checkState(anyPaths.containsKey(shortestPathsFirst.get(n)));
        anyPaths.put(shortestPathsFirst.get(n), factored.get(n));
      }
      // We now connect any new factored edges to the node we just added (not the terminal node).
      pathEnd = shortestPathNode;
    }
    // If we exit, we must still reconnect any remaining, unfactored, paths to the graph.
    for (Map.Entry<Node, AnyPath> e : anyPaths.entrySet()) {
      lastAddedNode = addPath(e.getKey(), pathEnd, e.getValue(), lastAddedNode, out);
    }
    return out;
  }

  /**
   * Recursively build up a map of trailing "any digit" sequences (AnyPath), starting from some
   * current node (initially the terminal node) and working backwards. The key in the map is the
   * node at which the AnyPath value starts from. Edges and nodes are removed from the graph,
   * leaving "ragged" paths which will need to be reconnected later (the keys in the map are the
   * set of nodes that need to be reconnected).
   *
   * @return whether the given node is the start of an AnyPath (i.e. if it immediately follows any
   *     edges which are not "any digit" sequences).
   */
  private static boolean recursivelyDetachTrailingPaths(
      Node node, AnyPath path, MutableValueGraph<Node, SimpleEdge> g, Map<Node, AnyPath> anyPaths) {
    if (beginsAnAnyPath(node, g)) {
      anyPaths.put(node, path);
      return true;
    }
    // All incoming edges accept all digits, so we can recurse (but don't traverse epsilons).
    List<Node> sources = g.predecessors(node).stream()
        .filter(s -> !g.edgeValue(s, node).get().equals(Edge.epsilon()))
        .collect(toList());
    for (Node source : sources) {
      AnyPath newPath = path.extend(canTerminate(source, g));
      // Recurse to remove trailing paths higher in the tree and keep this source node only if
      // recursion stopped here.
      boolean keepSourceNode = recursivelyDetachTrailingPaths(source, newPath, g, anyPaths);
      g.removeEdge(source, node);
      // This removes the epsilon if it exists (and does nothing otherwise). This is safe since we
      // know the other out-edge of this node accepts all digits, so the only remaining type of
      // edge that could exist is an epsilon. After removing both we expect not to find any others.
      g.removeEdge(source, Node.TERMINAL);
      Preconditions.checkState(g.outDegree(source) == 0, "unexpected out edges in trailing graph");
      // If we were able to recurse past this node, it can be removed.
      if (!keepSourceNode) {
        g.removeNode(source);
      }
    }
    return false;
  }

  /**
   * Returns whether the given node has incoming edges that do not just accept "any digit". This is
   * the point at which recursion must stop since AnyPath can only represent "any digit" sequences.
   */
  private static boolean beginsAnAnyPath(Node target, ValueGraph<Node, SimpleEdge> g) {
    // Obviously we cannot recurse past the initial node.
    if (target == Node.INITIAL) {
      return true;
    }
    return g.predecessors(target).stream()
        .map(s -> g.edgeValue(s, target).get())
        .filter(e -> !e.equals(Edge.epsilon()))
        .anyMatch(e -> e.getDigitMask() != ALL_DIGITS_MASK);
  }

  /**
   * Returns whether this node can terminate. This logic relies on the input graph not having had
   * its epsilon edges moved (i.e. if an epsilon edge exists it must point to the terminal node).
   * This also looks for special "optional" edges which exist when a non-epsilon edge already
   * exists from this node to the terminal node.
   */
  private static boolean canTerminate(Node node, ValueGraph<Node, SimpleEdge> g) {
    return g.successors(node).stream()
        .map(t -> g.edgeValue(node, t).get())
        .anyMatch(e -> e.isOptional() || e.equals(Edge.epsilon()));
  }

  /** Adds the given "AnyPath" into the graph, generating new nodes and edges as necessary. */
  private static Node addPath(
      Node node, Node end, AnyPath path, Node lastAdded, MutableValueGraph<Node, SimpleEdge> out) {
    // Path length is always at least 1 for an AnyPath.
    int pathLength = path.maxLength();
    for (int n = 0; n < pathLength - 1; n++) {
      if (path.acceptsLength(n)) {
        out.putEdgeValue(node, end, Edge.epsilon());
      }
      lastAdded = lastAdded.createNext();
      out.addNode(lastAdded);
      out.putEdgeValue(node, lastAdded, Edge.any());
      node = lastAdded;
    }
    // For the last edge we cannot add a parallel epsilon path if we need to skip to the end,
    // so add the special "optional any" edge instead.
    out.putEdgeValue(
        node, end, path.acceptsLength(pathLength - 1) ? Edge.optionalAny() : Edge.any());
    return lastAdded;
  }

  private TrailingPathOptimizer() {}
 }
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvParser.java
@ -73,19 +73,23 @@ public final class CsvParser {
          } else {
            ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
            // Not a pure lambda due to the need to index columns.
            row.forEach(new Consumer<String>() {
              private int i = 0;

              @Override
              public void accept(String v) {
                checkArgument(i < header.size(),
                    "too many columns (expected %s): %s", header.size(), map);
                if (!v.isEmpty()) {
                  map.put(header.get(i++), v);
                }
              }
            });
            handler.accept(map.build());
            row.forEach(
                new Consumer<String>() {
                  private int i = 0;

                  @Override
                  public void accept(String v) {
                    checkArgument(
                        i < header.size(),
                        "too many columns (expected %s): %s",
                        header.size(),
                        map);
                    if (!v.isEmpty()) {
                      map.put(header.get(i++), v);
                    }
                  }
                });
            handler.accept(map.buildOrThrow());
          }
        }
      };
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/CsvTable.java
@ -582,7 +582,7 @@ public abstract class CsvTable<K> {
          .put('r', '\r')
          .put('t', '\t')
          .put('\\', '\\')
          .build();
          .buildOrThrow();

  // Visible for AutoValue only.
  CsvTable() {}
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/RangeTable.java
@ -628,7 +628,7 @@ public final class RangeTable {
      RangeTree include = getRanges(column, value);
      map.put(value, PrefixTree.minimal(include, allRanges.subtract(include), minPrefixLength));
    }
    return map.build();
    return map.buildOrThrow();
  }

  // Constants for the simplification routine below.
--- a/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
+++ b/metadata/src/main/java/com/google/i18n/phonenumbers/metadata/table/Schema.java
@ -55,7 +55,7 @@ public abstract class Schema {
    }

    public Schema build() {
      return new AutoValue_Schema(names.build(), columns.build(), groups.build());
      return new AutoValue_Schema(names.build(), columns.buildOrThrow(), groups.buildOrThrow());
    }
  }

--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/LengthsParserTest.java
@ -0,0 +1,76 @@
 /*
 * Copyright (C) 2022 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata;

 import static com.google.common.truth.Truth.assertThat;
 import static org.junit.Assert.assertThrows;

 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public final class LengthsParserTest {

  @Test
  public void shouldThrowIfStringContainsForbiddenCharacters() {
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("a-6,7"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8, B, C"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8, ,10"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("4, +7-9, +11"));
  }

  @Test
  public void shouldThrowIfNumbersAreOutOfOrder() {
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("9-7"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("8,12-11"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("5,4,7-8"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("6-8, 7-9"));
  }

  @Test
  public void shouldThrowIfFormatIsWrong() {
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("4-6-8"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("7-"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("3, -7"));
    assertThrows(IllegalArgumentException.class, () -> LengthsParser.parseLengths("1 2-3 4, 5 6"));
  }

  @Test
  public void testParseSingletons() {
    assertThat(LengthsParser.parseLengths("8")).containsExactly(8);
    assertThat(LengthsParser.parseLengths("14")).containsExactly(14);
  }

  @Test
  public void testParseCommaSeparatedNumbers() {
    assertThat(LengthsParser.parseLengths("6,8,9")).containsExactly(6, 8, 9);
    assertThat(LengthsParser.parseLengths("13, 14")).containsExactly(13, 14);
  }

  @Test
  public void testParseRanges() {
    assertThat(LengthsParser.parseLengths("6-8")).containsExactly(6, 7, 8);
    assertThat(LengthsParser.parseLengths("13 - 14")).containsExactly(13, 14);
  }

  @Test
  public void testParseComplex() {
    assertThat(LengthsParser.parseLengths("4,7,9-12")).containsExactly(4, 7, 9, 10, 11, 12);
    assertThat(LengthsParser.parseLengths("4-6, 8, 10-12")).containsExactly(4, 5, 6, 8, 10, 11, 12);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/RangeSpecificationTest.java
@ -20,8 +20,8 @@ import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.DigitSequence.domain;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.parse;
 import static java.util.Arrays.asList;
 import static com.google.i18n.phonenumbers.metadata.testing.AssertUtil.assertThrows;
 import static java.util.Arrays.asList;

 import com.google.common.collect.ImmutableRangeSet;
 import com.google.common.collect.Range;
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/DigitSequenceMatcherTest.java
@ -0,0 +1,210 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.INVALID;
 import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.MATCHED;
 import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.TOO_LONG;
 import static com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result.TOO_SHORT;

 import com.google.common.base.CharMatcher;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.DigitSequence;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler;
 import com.google.i18n.phonenumbers.metadata.regex.RegexGenerator;
 import java.util.Arrays;
 import java.util.regex.Pattern;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class DigitSequenceMatcherTest {

  @Test public void testStringDigits() {
    DigitSequence digits = DigitSequenceMatcher.digitsFromString("1234");

    Assert.assertTrue(digits.hasNext());
    Assert.assertEquals(1, digits.next());
    Assert.assertTrue(digits.hasNext());
    Assert.assertEquals(2, digits.next());
    Assert.assertTrue(digits.hasNext());
    Assert.assertEquals(3, digits.next());
    Assert.assertTrue(digits.hasNext());
    Assert.assertEquals(4, digits.next());
    Assert.assertFalse(digits.hasNext());
  }

  @Test public void testSingleDigitMatching() {
    assertNotMatches(ranges("0"), INVALID, "1", "9");
    assertNotMatches(ranges("0"), TOO_LONG, "00");

    assertMatches(ranges("x"), "0", "5", "9");
    assertNotMatches(ranges("x"), TOO_SHORT, "");
    assertNotMatches(ranges("x"), TOO_LONG, "00");

    assertMatches(ranges("[2-6]"), "2", "3", "4", "5", "6");
    assertNotMatches(ranges("[2-6]"), INVALID, "0", "1", "7", "8", "9");
    assertNotMatches(ranges("[2-6]"), TOO_LONG, "26");
  }

  @Test public void testOptional() {
    RangeTree dfa = ranges("12", "123");
    assertMatches(ranges("12", "123"), "12", "123");
    assertNotMatches(dfa, TOO_SHORT, "1");
    assertNotMatches(dfa, INVALID, "13");
    assertNotMatches(dfa, TOO_LONG, "1233");
  }

  @Test public void testRepetition() {
    assertMatches(ranges("12xx", "12xxx", "12xxxx"), "1234", "12345", "123456");
  }

  @Test public void testOr() {
    RangeTree dfa = ranges("01", "23");
    assertMatches(dfa, "01", "23");
    assertNotMatches(dfa, INVALID, "03", "12");
    assertNotMatches(dfa, TOO_SHORT, "0", "2");
    assertNotMatches(dfa, TOO_LONG, "011", "233");

    assertMatches(ranges("01", "23", "45", "6789"), "01", "23", "45", "6789");
  }

  @Test public void testRealRegexShort() {
    RangeTree dfa = ranges(
        "11[2-7]xxxxxxx",
        "2[02][2-7]xxxxxxx",
        "33[2-7]xxxxxxx",
        "4[04][2-7]xxxxxxx",
        "79[2-7]xxxxxxx",
        "80[2-467]xxxxxxx");

    assertMatches(dfa, "112 1234567", "797 1234567", "807 1234567");
    assertNotMatches(dfa, TOO_SHORT, "112 123", "797 12345", "807 123456");
    assertNotMatches(dfa, TOO_LONG, "112 12345678", "797 123456789");
    assertNotMatches(dfa, INVALID, "122 1234567", "799 1234567", "805 1234567");
  }

  @Test public void testRealRegexLong() {
    RangeTree dfa = ranges(
        "12[0-249][2-7]xxxxxx",
        "13[0-25][2-7]xxxxxx",
        "14[145][2-7]xxxxxx",
        "1[59][14][2-7]xxxxxx",
        "16[014][2-7]xxxxxx",
        "17[1257][2-7]xxxxxx",
        "18[01346][2-7]xxxxxx",
        "21[257][2-7]xxxxxx",
        "23[013][2-7]xxxxxx",
        "24[01][2-7]xxxxxx",
        "25[0137][2-7]xxxxxx",
        "26[0158][2-7]xxxxxx",
        "278[2-7]xxxxxx",
        "28[1568][2-7]xxxxxx",
        "29[14][2-7]xxxxxx",
        "326[2-7]xxxxxx",
        "34[1-3][2-7]xxxxxx",
        "35[34][2-7]xxxxxx",
        "36[01489][2-7]xxxxxx",
        "37[02-46][2-7]xxxxxx",
        "38[159][2-7]xxxxxx",
        "41[36][2-7]xxxxxx",
        "42[1-47][2-7]xxxxxx",
        "43[15][2-7]xxxxxx",
        "45[12][2-7]xxxxxx",
        "46[126-9][2-7]xxxxxx",
        "47[0-24-9][2-7]xxxxxx",
        "48[013-57][2-7]xxxxxx",
        "49[014-7][2-7]xxxxxx",
        "5[136][25][2-7]xxxxxx",
        "522[2-7]xxxxxx",
        "54[28][2-7]xxxxxx",
        "55[12][2-7]xxxxxx",
        "5[78]1[2-7]xxxxxx",
        "59[15][2-7]xxxxxx",
        "612[2-7]xxxxxx",
        "6[2-4]1[2-7]xxxxxx",
        "65[17][2-7]xxxxxx",
        "66[13][2-7]xxxxxx",
        "67[14][2-7]xxxxxx",
        "680[2-7]xxxxxx",
        "712[2-7]xxxxxx",
        "72[14][2-7]xxxxxx",
        "73[134][2-7]xxxxxx",
        "74[47][2-7]xxxxxx",
        "75[15][2-7]xxxxxx",
        "7[67]1[2-7]xxxxxx",
        "788[2-7]xxxxxx",
        "816[2-7]xxxxxx",
        "82[014][2-7]xxxxxx",
        "83[126][2-7]xxxxxx",
        "86[136][2-7]xxxxxx",
        "87[078][2-7]xxxxxx",
        "88[34][2-7]xxxxxx",
        "891[2-7]xxxxxx");

    assertMatches(dfa, "364 2 123456", "674 4 123456", "883 7 123456");
    assertNotMatches(dfa, TOO_SHORT, "364 2 123", "674 4 1234", "883 7 12345");
    assertNotMatches(dfa, TOO_LONG, "364 2 1234567", "674 4 12345678");
    assertNotMatches(dfa, INVALID,
        "365 2 123456", "364 8 123456", "670 4 123456", "670 5 123456", "892 2 123456");
  }

  private static RangeTree ranges(String... lines) {
    return RangeTree.from(Arrays.stream(lines).map(RangeSpecification::parse));
  }

  private static void assertMatches(RangeTree dfa, String... numbers) {
    checkRegex(dfa, true, numbers);
    byte[] matcherData = MatcherCompiler.compile(dfa);

    DigitSequenceMatcher matcher = DigitSequenceMatcher.create(matcherData);
    assertMatcher(matcher, MATCHED, numbers);
  }

  private static void assertNotMatches(RangeTree dfa, Result error, String... numbers) {
    checkArgument(error != MATCHED);
    checkRegex(dfa, false, numbers);
    byte[] matcherData = MatcherCompiler.compile(dfa);
    DigitSequenceMatcher matcher = DigitSequenceMatcher.create(matcherData);
    assertMatcher(matcher, error, numbers);
  }

  private static void checkRegex(RangeTree dfa, boolean expectMatch, String... numbers) {
    Pattern pattern = Pattern.compile(RegexGenerator.basic().toRegex(dfa));
    for (String number : numbers) {
      checkArgument(expectMatch == pattern.matcher(noSpace(number)).matches(),
          "regex %s could not match input %s", dfa.asRangeSpecifications(), number);
    }
  }

  private static void assertMatcher(
      DigitSequenceMatcher matcher, Result expected, String... numbers) {
    for (final String number : numbers) {
      Assert.assertEquals(expected,
          matcher.match(DigitSequenceMatcher.digitsFromString(noSpace(number))));
    }
  }

  private static String noSpace(String input) {
    return CharMatcher.whitespace().removeFrom(input);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/CompilerRegressionTest.java
@ -0,0 +1,317 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.common.truth.Truth.assertWithMessage;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static java.lang.Integer.bitCount;
 import static java.lang.Integer.lowestOneBit;
 import static java.lang.Integer.numberOfTrailingZeros;

 import com.google.common.collect.Multimap;
 import com.google.common.collect.MultimapBuilder;
 import com.google.common.collect.SetMultimap;
 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto;
 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.TestCase;
 import com.google.i18n.phonenumbers.internal.finitestatematcher.compiler.RegressionTestProto.Tests;
 import com.google.i18n.phonenumbers.metadata.DigitSequence;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaEdge;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaVisitor;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.DigitSequenceMatcher.Result;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.TextFormat;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class CompilerRegressionTest {
  // Tests that the compiler produces the expected output, byte-for-byte.
  @Test
  public void testCompiledBytesEqualExpectedMatcherBytes() throws IOException {
    StringWriter buffer = new StringWriter();
    PrintWriter errors = new PrintWriter(buffer);
    try (InputStream data =
        CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
      Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
      TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
      for (TestCase tc : tests.getTestCaseList()) {
        byte[] actual = MatcherCompiler.compile(ranges(tc.getRangeList()));
        byte[] expected = combine(tc.getExpectedList());
        int diffIndex = indexOfDiff(actual, expected);
        if (!tc.getShouldFail()) {
          if (diffIndex != -1) {
            errors.format("FAILED [%s]: First difference at index %d\n", tc.getName(), diffIndex);
            errors.format("Actual  : %s\n", formatPbSnippet(actual, diffIndex, 20));
            errors.format("Expected: %s\n", formatPbSnippet(expected, diffIndex, 20));
            writeGoldenPbOutput(actual, errors);
          }
        } else {
          if (diffIndex == -1) {
            errors.format("FAILED [%s]: Expected difference, but got none\n", tc.getName());
          }
        }
      }
    }
    String errorMessage = buffer.toString();
    if (!errorMessage.isEmpty()) {
      assertWithMessage(errorMessage).fail();
    }
  }

  // Test that the matcher behaves correctly with respect to the input ranges using the expected
  // byte sequences. If this test fails, then the matcher implementation is doing something wrong,
  // or the expected bytes were generated incorrectly (either by hand or from the compiler).
  //
  // IMPORTANT: This test tests that the expected bytes (rather than the compiled bytes) match the
  // numbers in the ranges. This avoids the risk of any bugs in both the matcher and compiler
  // somehow cancelling each other out. However this also means that this test depends on the
  // equality test above for validity (i.e. this test can pass even if the matcher compiler is
  // broken, so it should not be run in isolation when debugging).
  @Test
  public void testExpectedMatcherBytesMatchRanges() throws IOException {
    try (InputStream data =
        CompilerRegressionTest.class.getResourceAsStream("regression_test_data.textpb")) {
      RegressionTestProto.Tests.Builder tests = RegressionTestProto.Tests.newBuilder();
      TextFormat.merge(new InputStreamReader(data, StandardCharsets.UTF_8), tests);
      for (TestCase tc : tests.getTestCaseList()) {
        RangeTree ranges = ranges(tc.getRangeList());
        // If we compiled the ranges here, we could risk a situation where the compiled bytes were
        // broken but the compiler had a corresponding bug that cancelled it out. This test only
        // tests the matcher behaviour, whereas the test above only tests the compiler behaviour.
        DigitSequenceMatcher matcher = DigitSequenceMatcher.create(combine(tc.getExpectedList()));
        Multimap<Result, DigitSequence> numbers = buildTestNumbers(ranges);
        if (!tc.getShouldFail()) {
          testExpectedMatch(tc.getName(), matcher, numbers);
        } else {
          testExpectedFailure(tc.getName(), matcher, numbers);
        }
      }
    }
  }

  private static void testExpectedMatch(String testName, DigitSequenceMatcher matcher,
      Multimap<Result, DigitSequence> numbers) {
    for (Result expectedResult : Result.values()) {
      for (DigitSequence s : numbers.get(expectedResult)) {
        Result result = matcher.match(new Sequence(s));
        assertWithMessage("FAILED [%s]: Sequence %s", testName, s)
            .that(result).isEqualTo(expectedResult);
      }
    }
  }

  private static void testExpectedFailure(String testName, DigitSequenceMatcher matcher,
      Multimap<Result, DigitSequence> numbers) {
    for (Result expectedResult : Result.values()) {
      for (DigitSequence s : numbers.get(expectedResult)) {
        Result result = matcher.match(new Sequence(s));
        if (result != expectedResult) {
          return;
        }
      }
    }
    assertWithMessage("FAILED [%s]: Expected at least one failure", testName).fail();
  }

  // Magic number: DigitSequences cannot be longer than 18 digits at the moment, so a check is
  // needed to prevent us trying to make a longer-than-allowed sequences in tests. This only
  // happens in the case of a terminal node, since non-terminal paths must be < 17 digits long.
  // If the allowed digits increases, this value can be modified or left as-is.
  private static final int MAX_SEQUENCE_LENGTH = 18;

  // Trivial adapter from the metadata DigitSequence to the matcher's lightweight sequence.
  private static final class Sequence implements DigitSequenceMatcher.DigitSequence {
    private final DigitSequence seq;
    private int index = 0;

    Sequence(DigitSequence seq) {
      this.seq = seq;
    }

    @Override
    public boolean hasNext() {
      return index < seq.length();
    }

    @Override
    public int next() {
      return seq.getDigit(index++);
    }
  }

  // Returns a RangeTree for the list of RangeSpecification strings.
  RangeTree ranges(List<String> specs) {
    return RangeTree.from(specs.stream().map(RangeSpecification::parse).collect(toImmutableList()));
  }

  // Builds a map of numbers for the given RangeTree to test every branching point in the DFA.
  // All paths combinations are generated exactly once to give coverage. This does use pseudo
  // random numbers to pick random digits from masks, but it should not be flaky. If it _ever_
  // fails then it implies a serious problem with the matcher compiler or matcher implementation.
  private static Multimap<Result, DigitSequence> buildTestNumbers(RangeTree ranges) {
    SetMultimap<Result, DigitSequence> numbers =
        MultimapBuilder.enumKeys(Result.class).treeSetValues().build();
    Set<DfaNode> visited = new HashSet<>();
    ranges.accept(new Visitor(RangeSpecification.empty(), numbers, visited));
    return numbers;
  }

  /**
   * Visitor to generate a targeted set of test numbers from a range tree DFA, which should
   * exercise every instruction in the corresponding matcher data. These numbers should ensure
   * that every "branch" (including early terminations) is taken at least once. Where digits
   * should be equivalent (i.e. both x & y have the same effect) they are chosen randomly, since
   * otherwise you would need to generate billions of numbers to cover every possible combination.
   */
  private static final class Visitor implements DfaVisitor {
    private final RangeSpecification sourcePath;
    private final SetMultimap<Result, DigitSequence> numbers;
    private final Set<DfaNode> visited;
    private int outEdgesMask = 0;

    Visitor(RangeSpecification sourcePath,
        SetMultimap<Result, DigitSequence> numbers,
        Set<DfaNode> visited) {
      this.sourcePath = sourcePath;
      this.numbers = numbers;
      this.visited = visited;
    }

    @Override
    public void visit(DfaNode source, DfaEdge edge, DfaNode target) {
      // Record the current outgoing edge mask.
      int mask = edge.getDigitMask();
      outEdgesMask |= mask;
      // Get the current path and add a test number for it.
      RangeSpecification path = sourcePath.extendByMask(mask);
      numbers.put(target.canTerminate() ? Result.MATCHED : Result.TOO_SHORT, sequenceIn(path));
      // Avoid recursing into nodes we've already visited. This avoids generating many (hundreds)
      // of test numbers for nodes which are reachable in many ways (via many path prefixes). This
      // is an optional check and could be removed, but for testing larger ranges it seems to make
      // a difference in test time. DFA node/instruction coverage should be unaffected by this.
      if (visited.contains(target)) {
        return;
      }
      visited.add(target);
      // Recurse into the next level with a new visitor starting from our path (it's okay to visit
      // the terminal node here since it does nothing and leaves the out edges mask zero).
      Visitor childVisitor = new Visitor(path, numbers, visited);
      target.accept(childVisitor);
      // After recursion, find out which of our target's out-edges cannot be reached.
      int unreachableMask = ~childVisitor.outEdgesMask & ALL_DIGITS_MASK;
      if (unreachableMask != 0 && path.length() < MAX_SEQUENCE_LENGTH) {
        // Create a path which cannot be reached directly from our target node. If this is the
        // terminal node then we create a path that's too long, otherwise it's just invalid.
        Result expected = target.equals(RangeTree.getTerminal()) ? Result.TOO_LONG : Result.INVALID;
        numbers.put(expected, sequenceIn(path.extendByMask(unreachableMask)));
      }
    }
  }

  // Returns a pseudo randomly chosen sequence from the given path.
  private static final DigitSequence sequenceIn(RangeSpecification path) {
    DigitSequence seq = DigitSequence.empty();
    for (int n = 0; n < path.length(); n++) {
      int mask = path.getBitmask(n);
      // A random number M in [0..BitCount), not the bit itself.
      // E.g. mask = 0011010011 ==> (0 <= maskBit < 5) (allowed digits are {0,1,4,6,7})
      int maskBit = (int) (bitCount(mask) * Math.random());
      // Mask out the M lower bits which come before the randomly selected one.
      // E.g. maskBit = 3 ==> mask = 0011000000 (3 lower bits cleared)
      while (maskBit > 0) {
        mask &= ~lowestOneBit(mask);
        maskBit--;
      }
      // Extend the sequence by the digit value of the randomly selected bit.
      // E.g. mask = 0011000000 ==> digit = 6 (randomly chosen from the allowed digits).
      seq = seq.extendBy(numberOfTrailingZeros(mask));
    }
    return seq;
  }

  // Combines multiple ByteStrings into a single byte[] (we allow splitting in the regression test
  // file for readability.
  private static byte[] combine(List<ByteString> bytes) {
    int size = bytes.stream().mapToInt(ByteString::size).sum();
    byte[] out = new byte[size];
    int offset = 0;
    for (ByteString b : bytes) {
      b.copyTo(out, offset);
      offset += b.size();
    }
    return out;
  }

  // Return the index of the first difference, or -1 is the byte arrays are the same.
  private static int indexOfDiff(byte[] a, byte[] b) {
    int length = Math.min(a.length, b.length);
    for (int n = 0; n < length; n++) {
      if (a[n] != b[n]) {
        return n;
      }
    }
    return (a.length == length && b.length == length) ? -1 : length;
  }

  // Formats a subset of the bytes as a human readable snippet using C-style hex escaping (which
  // is compatible with the regression test data).
  private static String formatPbSnippet(byte[] bytes, int start, int length) {
    StringBuilder out = new StringBuilder();
    if (start > 0) {
      out.append("...");
    }
    appendBytes(out, bytes, start, length);
    if (start + length < bytes.length) {
      out.append("...");
    }
    return out.toString();
  }

  // Writes bytes such that they can be cut & pasted into a regression test file as new golden data.
  private static void writeGoldenPbOutput(byte[] bytes, PrintWriter errors) {
    errors.println("Golden Data:");
    StringBuilder out = new StringBuilder();
    for (int start = 0; start < bytes.length; start += 20) {
      errors.format("  expected: \"%s\"\n", appendBytes(out, bytes, start, 20));
      out.setLength(0);
    }
  }

  // Appends a set of bytes in C-style hex format (e.g. \xHH).
  private static StringBuilder appendBytes(StringBuilder out, byte[] bytes, int start, int length) {
    int end = Math.min(start + length, bytes.length);
    for (int n = start; n < end; n++) {
      out.append(String.format("\\x%02x", bytes[n] & 0xFF));
    }
    return out;
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/MatcherCompilerTest.java
@ -0,0 +1,144 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.primitives.Bytes.asList;
 import static com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler.MatcherCompiler.compile;

 import com.google.common.truth.Truth;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.finitestatematcher.OpCode;
 import java.util.Arrays;
 import java.util.List;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class MatcherCompilerTest {

  private static final Byte TERMINATOR = (byte) 0;

  @Test public void testSingleOperation() {
    byte digit0 = single(0);
    byte digit5 = single(5);
    byte digit9 = single(9);
    assertCompile(ranges("0"), digit0, TERMINATOR);
    assertCompile(ranges("5"), digit5, TERMINATOR);
    assertCompile(ranges("9"), digit9, TERMINATOR);
    assertCompile(ranges("0559"), digit0, digit5, digit5, digit9, TERMINATOR);

    byte digit5Terminating = (byte) (digit5 | (1 << 4));
    assertCompile(ranges("05", "0559"),
        digit0, digit5, digit5Terminating, digit9, TERMINATOR);
  }

  @Test public void testAnyOperation() {
    byte anyDigit = any(1);
    byte anyDigit16Times = any(16);
    assertCompile(ranges("x"), anyDigit, TERMINATOR);
    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx"), anyDigit16Times, TERMINATOR);
    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx_x"),
        anyDigit16Times, anyDigit, TERMINATOR);

    byte anyDigitTerminating = (byte) (anyDigit | (1 << 4));
    assertCompile(ranges("x", "xx"), anyDigit, anyDigitTerminating, TERMINATOR);
    assertCompile(ranges("xxxx_xxxx_xxxx_xxxx", "xxxx_xxxx_xxxx_xxxx_x"),
        anyDigit16Times, anyDigitTerminating, TERMINATOR);
  }

  @Test public void testRangeOperation() {
    int range09 = range(0, 9);
    int range123 = range(1, 2, 3);
    int range789 = range(7, 8, 9);

    assertCompile(ranges("[09]"), hi(range09), lo(range09), TERMINATOR);
    assertCompile(ranges("[123][789]"),
        hi(range123), lo(range123), hi(range789), lo(range789), TERMINATOR);
  }

  @Test public void testMapOperation() {
    // Force all 10 possible branches to be taken.
    byte[] data = compile(ranges("00", "11", "22", "33", "44", "55", "66", "77", "88", "99"));
    // Check only the first 4 bytes for exact values.
    Assert.assertEquals(
        asList((byte) 0x95, (byte) 0x31, (byte) 0xF5, (byte) 0x9D),
        asList(data).subList(0, 4));
    // Each branch should jump to a 2 byte sequence between 10 and 28 bytes away (inclusive).
    List<Byte> jumpTable = asList(data).subList(4, 14);
    List<Byte> remainder = asList(data).subList(14, data.length);
    // TODO: Now that ordering should be consistent, tighten up this test to ensure
    // consistency and remove the shorter consistency test below.
    for (byte jump : new byte[] {0xA, 0xC, 0xE, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C}) {
      Assert.assertTrue(jumpTable.contains(jump));
      int index = jumpTable.indexOf(jump);
      // Subtract the length of the jump table to get relative offset in remaining code.
      jump = (byte) (jump - 10);
      // Each jump should end in 2 single-byte instructions (match corresponding digit, terminate).
      Assert.assertEquals(single(index), remainder.get(jump));
      Assert.assertEquals(TERMINATOR, remainder.get(jump + 1));
    }
  }

  @Test public void testConsistentSorting() {
    // Ensure that the MatcherCompiler output is consistent, otherwise it can result in a
    // non-deterministic build, because the generated file changes with each execution.
    byte[] expected = new byte[] {-128, 0, 0, 29, 3, 5, 7, 32, 0, 33, 0, 34, 0};
    assertCompile(ranges("00", "11", "22"), expected);
  }

  /** Returns the 1-byte instruction representing matching a single digit once. */
  private static Byte single(int value) {
    checkArgument(value >= 0 && value < 10);
    return (byte) ((OpCode.SINGLE.ordinal() << 5) | value);
  }

  /** Returns the 1-byte instruction representing matching any digit a specified number of times. */
  private static Byte any(int count) {
    checkArgument(count > 0 && count <= 16);
    return (byte) ((OpCode.ANY.ordinal() << 5) | (count - 1));
  }

  /** Returns the 2-byte instruction representing matching a range of digits. */
  private static int range(int... digits) {
    int mask = 0;
    for (int d : digits) {
      checkArgument(0 <= d && d <= 9);
      mask |= 1 << d;
    }
    return (OpCode.RANGE.ordinal() << 13) | mask;
  }

  private static Byte hi(int shortInstruction) {
    return (byte) (shortInstruction >> 8);
  }

  private static Byte lo(int shortInstruction) {
    return (byte) (shortInstruction & 0xFF);
  }

  private void assertCompile(RangeTree dfa, byte... expected) {
    Truth.assertThat(compile(dfa)).isEqualTo(expected);
  }

  private static RangeTree ranges(String... lines) {
    return RangeTree.from(Arrays.stream(lines).map(RangeSpecification::parse));
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/OperationTest.java
@ -0,0 +1,60 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.finitestatematcher.compiler;

 import static com.google.common.primitives.Bytes.asList;

 import com.google.common.collect.ImmutableList;
 import com.google.common.io.ByteArrayDataOutput;
 import com.google.common.io.ByteStreams;
 import junit.framework.Assert;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class OperationTest {

  @Test public void testWriteJumpTableNoExtraBranches() {
    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput();
    Operation.writeJumpTable(outBytes, ImmutableList.of(0x10, 0x80, 0xFC), Statistics.NO_OP);
    // The jump table size is added to the offsets.
    Assert.assertEquals(
        asList(new byte[] {(byte) 0x13, (byte) 0x83, (byte) 0xFF}),
        asList(outBytes.toByteArray()));
  }

  // An easy way to reason about what the offsets for the branches should be is to consider
  // that the last branch must always have the original offset (it jumps from the very end of
  // the jump table, which is exactly what the original offset specified. The branch before it
  // is the same except that it must jump over the final branch (ie, +2 bytes) and so on.
  // Direct offsets are relative to the start of the jump table however and must be adjusted.
  @Test public void testWriteJumpTableExtraBranches() {
    ByteArrayDataOutput outBytes = ByteStreams.newDataOutput();
    // Two extra branches needed (0x200 and 0xF7). Worst case adjustment is 9 bytes.
    // Total adjustment is 7 bytes (jump table size + 2 * branch)
    Operation.writeJumpTable(outBytes, ImmutableList.of(0xF7, 0xF6, 0x200), Statistics.NO_OP);
    Assert.assertEquals(asList(new byte[] {
        // Jump table: (offset-to-branch, direct-adjusted-offset, offset-to-branch)
        (byte) 0x03, (byte) 0xFD, (byte) 0x05,
        // Extra branch: offset = 0xF7 + 2 (jumps over last branch)
        (byte) 0x10, (byte) 0xF9,
        // Extra branch: offset = 0x200 (last branch always has original offset)
        (byte) 0x12, (byte) 0x00}),
        asList(outBytes.toByteArray()));
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/finitestatematcher/compiler/regression_test_data.textpb
@ -0,0 +1,295 @@
 # Copyright (C) 2017 The Libphonenumber Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # ---- Manually crafted "unit" tests ----

 test_case {
  name: "Simple Range"
  range: "1234xxx"
  # 4 single byte, single value instructions: 0x20 + value
  # 1 single byte, "ANY" instruction: 0x40 + (count-1)
  expected: "\x21\x22\x23\x24\x42\x00"
 }
 test_case {
  # NOTE: When the ANY instruction is marked as terminating, it applies when the instruction is
  # reached, not after it's executed (i.e. \x50... is "(\d...)?", and not "\d(...)?").
  # Match 3 x ANY (0x42), then "terminate or ANY" (0x50), then 2 x ANY
  name: "Variable Any Match #1"
  range: "1xxx"
  range: "1xxxxxx"
  expected: "\x21\x42\x50\x41\x00"
 }
 test_case {
  name: "Variable Any Match #2"
  range: "1xxx"
  range: "1xxxx"
  range: "1xxxxx"
  range: "1xxxxxx"
  # A repeated terminating ANY match applies on every repeat, not just the first time.
  # Match 3 x ANY (0x42 = \d{3}), then 3 x "terminate or ANY" (0x52 = \d{0,3}).
  expected: "\x21\x42\x52\x00"
 }
 test_case {
  name: "Overflow Any Match"
  range: "xxxxxxxxxxxxxxxxxx"
  # 18 'any' digits can't fit in one instruction, so write 2 separate opcodes to match 16 (0x4F)
  # and then 2 (0x41). This will almost never occur since DigitSequence is limited to 18 digits.
  expected: "\x4F\x41\x00"
 }
 test_case {
  name: "Range Matching"
  range: "[0-4]12"
  # First 2 bytes are a "branch" operation (opcode = 0x60 plus mask), but there are no offsets
  # after it (since one "branch" is just to continue matching, while the other is failure).
  expected: "\x60\x1F\x21\x22\x00"
 }
 test_case {
  name: "Range Matching"
  # Requires a 2-way branch in the DFA where both paths cover all input digits [0-9].
  range: "[0-4]12"
  range: "[5-9]34"
  # First 2 bytes are a 2-way branch operation (opcode = 0x68 plus mask), then 2 jump offsets
  # from the end of the branch instruction.
  expected: "\x68\x1F\x02\x05\x21\x22\x00\x23\x24\x00"
 }

 # ---- Deliberate failure cases ----

 test_case {
  name: "Modified Single Match Bytecode"
  should_fail: true
  range: "123xxxx"
  range: "123xxxxx"
  range: "123xxxxxx"
  # Expected bytes have been tweaked to accept 4 (\x24), rather than 3 (\x23).
  expected: "\x21\x22\x24\x43\x51\x00"
 }
 test_case {
  name: "Modified Range Bytecode"
  should_fail: true
  range: "1[2-5]xxxx"
  # Expected bytes have been tweaked to accept [7-9] (\x63\x80), rather than [2-5] (\x60\x3C)
  expected: "\x21\x63\x80\x43\x00"
 }
 test_case {
  name: "Modified Any Match Bytecode"
  should_fail: true
  range: "1xxxx"
  # Expected bytes have been tweaked to accept xxx (\x42), rather than xxxx (\x43)
  expected: "\x21\x42\x00"
 }

 # ---- Auto-generated "stress tests" ----

 test_case {
  name: "GB Mobile"
  range: "7[1-3]xxxxxxxx"
  range: "74[0-46-9]xxxxxxx"
  range: "745[0-689]xxxxxx"
  range: "7457[0-57-9]xxxxx"
  range: "750[0-8]xxxxxx"
  range: "75[13-9]xxxxxxx"
  range: "752[0-35-9]xxxxxx"
  range: "7700[01]xxxxx"
  range: "770[1-9]xxxxxx"
  range: "77[1-7]xxxxxxx"
  range: "778[02-9]xxxxxx"
  range: "779[0-689]xxxxxx"
  range: "78[014-9]xxxxxxx"
  range: "78[23][0-8]xxxxxx"
  range: "79[024-9]xxxxxxx"
  range: "791[02-9]xxxxxx"
  range: "7911[028]xxxxx"
  range: "793[0-689]xxxxxx"
  # Not much insight here - other than it starts by matching a '7' and terminates in one place
  # after matching "any digit" 5 times (which is the shortest trailing match in the ranges).
  expected: "\x27\x8c\xa8\x1a\x2a\x06\x09\x0d\x14\x1c\x20\x40\x10\x1e\x6b\xdf\x1c\x1f\x84\x44"
  expected: "\x92\x5d\x1d\x16\x21\x88\x64\x92\x55\x1d\x0f\x21\x24\x6b\xf3\x09\x10\x82\x22\x49"
  expected: "\x6d\x03\x1b\x18\x40\x10\x19\x6b\x7f\x17\x19\x61\xff\x10\x11\x63\xef\x0e\x68\x01"
  expected: "\x11\x0c\x63\xfd\x07\x63\x7f\x04\x6b\xfd\x02\x0a\x40\x08\x63\xbf\x05\x60\x03\x02"
  expected: "\x61\x05\x44\x00"
 }
 test_case {
  name: "India Fixed Line"
  range: "11[2-7]xxxxxxx"
  range: "12[0-249][2-7]xxxxxx"
  range: "12[35-8]x[2-7]xxxxx"
  range: "13[0-25][2-7]xxxxxx"
  range: "13[346-9]x[2-7]xxxxx"
  range: "14[145][2-7]xxxxxx"
  range: "14[236-9]x[2-7]xxxxx"
  range: "1[59][0235-9]x[2-7]xxxxx"
  range: "1[59][14][2-7]xxxxxx"
  range: "16[014][2-7]xxxxxx"
  range: "16[235-9]x[2-7]xxxxx"
  range: "17[1257][2-7]xxxxxx"
  range: "17[34689]x[2-7]xxxxx"
  range: "18[01346][2-7]xxxxxx"
  range: "18[257-9]x[2-7]xxxxx"
  range: "2[02][2-7]xxxxxxx"
  range: "21[134689]x[2-7]xxxxx"
  range: "21[257][2-7]xxxxxx"
  range: "23[013][2-7]xxxxxx"
  range: "23[24-8]x[2-7]xxxxx"
  range: "24[01][2-7]xxxxxx"
  range: "24[2-8]x[2-7]xxxxx"
  range: "25[0137][2-7]xxxxxx"
  range: "25[25689]x[2-7]xxxxx"
  range: "26[0158][2-7]xxxxxx"
  range: "26[2-4679]x[2-7]xxxxx"
  range: "27[13-79]x[2-7]xxxxx"
  range: "278[2-7]xxxxxx"
  range: "28[1568][2-7]xxxxxx"
  range: "28[2-479]x[2-7]xxxxx"
  range: "29[14][2-7]xxxxxx"
  range: "29[235-9]x[2-7]xxxxx"
  range: "301x[2-7]xxxxx"
  range: "31[79]x[2-7]xxxxx"
  range: "32[1-5]x[2-7]xxxxx"
  range: "326[2-7]xxxxxx"
  range: "33[2-7]xxxxxxx"
  range: "34[13][2-7]xxxxxx"
  range: "342[0189][2-7]xxxxx"
  range: "342[2-7]xxxxxx"
  range: "34[5-8]x[2-7]xxxxx"
  range: "35[125689]x[2-7]xxxxx"
  range: "35[34][2-7]xxxxxx"
  range: "36[01489][2-7]xxxxxx"
  range: "36[235-7]x[2-7]xxxxx"
  range: "37[02-46][2-7]xxxxxx"
  range: "37[157-9]x[2-7]xxxxx"
  range: "38[159][2-7]xxxxxx"
  range: "38[2-467]x[2-7]xxxxx"
  range: "4[04][2-7]xxxxxxx"
  range: "41[14578]x[2-7]xxxxx"
  range: "41[36][2-7]xxxxxx"
  range: "42[1-47][2-7]xxxxxx"
  range: "42[5689]x[2-7]xxxxx"
  range: "43[15][2-7]xxxxxx"
  range: "43[2-467]x[2-7]xxxxx"
  range: "45[12][2-7]xxxxxx"
  range: "45[4-7]x[2-7]xxxxx"
  range: "46[0-26-9][2-7]xxxxxx"
  range: "46[35]x[2-7]xxxxx"
  range: "47[0-24-9][2-7]xxxxxx"
  range: "473x[2-7]xxxxx"
  range: "48[013-57][2-7]xxxxxx"
  range: "48[2689]x[2-7]xxxxx"
  range: "49[014-7][2-7]xxxxxx"
  range: "49[2389]x[2-7]xxxxx"
  range: "51[025][2-7]xxxxxx"
  range: "51[146-9]x[2-7]xxxxx"
  range: "52[14-8]x[2-7]xxxxx"
  range: "522[2-7]xxxxxx"
  range: "53[1346]x[2-7]xxxxx"
  range: "53[25][2-7]xxxxxx"
  range: "54[14-69]x[2-7]xxxxx"
  range: "54[28][2-7]xxxxxx"
  range: "55[12][2-7]xxxxxx"
  range: "55[46]x[2-7]xxxxx"
  range: "56[146-9]x[2-7]xxxxx"
  range: "56[25][2-7]xxxxxx"
  range: "571[2-7]xxxxxx"
  range: "57[2-4]x[2-7]xxxxx"
  range: "581[2-7]xxxxxx"
  range: "58[2-8]x[2-7]xxxxx"
  range: "59[15][2-7]xxxxxx"
  range: "59[246]x[2-7]xxxxx"
  range: "61[1358]x[2-7]xxxxx"
  range: "612[2-7]xxxxxx"
  range: "621[2-7]xxxxxx"
  range: "62[2457]x[2-7]xxxxx"
  range: "631[2-7]xxxxxx"
  range: "63[2-4]x[2-7]xxxxx"
  range: "641[2-7]xxxxxx"
  range: "64[235-7]x[2-7]xxxxx"
  range: "65[17][2-7]xxxxxx"
  range: "65[2-689]x[2-7]xxxxx"
  range: "66[13][2-7]xxxxxx"
  range: "66[24578]x[2-7]xxxxx"
  range: "671[2-7]xxxxxx"
  range: "67[235689]x[2-7]xxxxx"
  range: "674[0189][2-7]xxxxx"
  range: "674[2-7]xxxxxx"
  range: "680[2-7]xxxxxx"
  range: "68[1-6]x[2-7]xxxxx"
  range: "71[013-9]x[2-7]xxxxx"
  range: "712[2-7]xxxxxx"
  range: "72[0235-9]x[2-7]xxxxx"
  range: "72[14][2-7]xxxxxx"
  range: "73[134][2-7]xxxxxx"
  range: "73[2679]x[2-7]xxxxx"
  range: "74[1-35689]x[2-7]xxxxx"
  range: "74[47][2-7]xxxxxx"
  range: "75[15][2-7]xxxxxx"
  range: "75[2-46-9]x[2-7]xxxxx"
  range: "7[67][02-9]x[2-7]xxxxx"
  range: "7[67]1[2-7]xxxxxx"
  range: "78[013-7]x[2-7]xxxxx"
  range: "782[0-6][2-7]xxxxx"
  range: "788[0189][2-7]xxxxx"
  range: "788[2-7]xxxxxx"
  range: "79[0189]x[2-7]xxxxx"
  range: "79[2-7]xxxxxxx"
  range: "80[2-467]xxxxxxx"
  range: "81[1357-9]x[2-7]xxxxx"
  range: "816[2-7]xxxxxx"
  range: "82[014][2-7]xxxxxx"
  range: "82[235-8]x[2-7]xxxxx"
  range: "83[03-57-9]x[2-7]xxxxx"
  range: "83[126][2-7]xxxxxx"
  range: "84[0-24-9]x[2-7]xxxxx"
  range: "85xx[2-7]xxxxx"
  range: "86[136][2-7]xxxxxx"
  range: "86[2457-9]x[2-7]xxxxx"
  range: "87[078][2-7]xxxxxx"
  range: "87[1-6]x[2-7]xxxxx"
  range: "88[1256]x[2-7]xxxxx"
  range: "88[34][2-7]xxxxxx"
  range: "891[2-7]xxxxxx"
  range: "89[2-4]x[2-7]xxxxx"
  expected: "\x81\x0f\xac\x72\x08\x1e\x3b\x58\xad\xcc\x75\x8d\x8b\x0f\xac\x72\xdc\xec\xf4\x08"
  expected: "\x0a\x0c\x0e\x10\x10\xf2\x10\xfa\x11\x00\x11\x06\x11\x0e\x93\x0f\xac\x6d\xc6\x09"
  expected: "\x0b\x0d\x0f\x11\x13\x15\x17\x11\x07\x11\x0f\x11\x17\x11\x1f\x11\x27\x11\x2d\x11"
  expected: "\x35\x11\x3d\x81\x31\xf5\x9d\x09\x0b\x0d\xa9\x0f\x11\x13\x15\x17\x12\x27\x12\x28"
  expected: "\x11\x34\x11\x38\x11\x3d\x11\x41\x11\x43\x11\x45\x93\x0f\xa9\x9d\x8c\x09\x0b\x0d"
  expected: "\x0f\x11\x13\x15\x17\x11\x3c\x11\x40\x11\x44\x11\x48\x11\x4c\x11\x50\x11\x52\x11"
  expected: "\x54\x90\xed\xac\x72\x08\x99\x0a\x0c\x0e\x10\x12\x73\x11\xab\x11\xad\x11\xb1\x11"
  expected: "\xb5\x11\xb9\x11\xdd\x95\x31\xf5\x9d\x63\x0a\x0c\x0e\x10\x12\x14\x16\x18\x1a\x11"
  expected: "\xab\x11\xaf\x11\xb3\x11\xd4\x11\xd5\x11\xb1\x11\xb5\x11\xb9\x11\x44\x93\x0f\xac"
  expected: "\x72\x09\x0b\x0d\x0f\x11\x13\x15\x17\x19\x11\x11\x11\x15\x11\x19\x11\x1d\x11\x21"
  expected: "\x11\x25\x11\x29\x11\x2d\x11\x31\x81\x0f\xac\x72\x08\x0a\x0c\x0e\x10\x12\x14\x16"
  expected: "\x11\x29\x11\x2d\x11\x13\x11\x2f\x11\x33\x11\x37\x11\x3b\x11\x40\x60\xfc\x11\x90"
  expected: "\x6b\x03\x02\x04\x11\x93\x11\x88\x60\xdc\x11\x84\x6a\x17\x02\x04\x11\x80\x11\x85"
  expected: "\x68\x27\x02\x04\x11\x78\x11\x7d\x84\x44\x89\x52\x02\x04\x11\x6e\x11\x73\x6b\xed"
  expected: "\x02\x04\x11\x6d\x11\x64\x68\x13\x02\x04\x11\x5e\x11\x63\x84\x42\x8a\x4a\x02\x04"
  expected: "\x11\x54\x11\x59\x68\x5b\x02\x04\x11\x4c\x11\x51\x82\x24\x51\x32\x02\x04\x11\x49"
  expected: "\x11\x40\x80\x44\x92\x33\x02\x04\x11\x38\x11\x3d\x80\x44\x92\x53\x02\x04\x11\x2e"
  expected: "\x11\x33\x84\x42\x90\x33\x02\x04\x11\x24\x11\x29\x69\x23\x02\x04\x11\x1c\x11\x21"
  expected: "\x82\x42\x49\x22\x02\x04\x11\x19\x11\x10\x84\x24\x4a\x52\x02\x04\x11\x08\x11\x0d"
  expected: "\x84\x44\x91\x52\x02\x04\x10\xfe\x11\x03\x80\x00\x89\x2a\xff\xf8\x80\x66\xd8\x32"
  expected: "\xf2\xf5\xf9\x82\x20\x4a\x4a\xf2\xeb\x6b\x13\xe7\xee\x68\x5d\xe3\xea\x82\x04\x8a"
  expected: "\x52\xdd\xe4\x80\x22\x89\x42\xde\xd7\x84\x42\x91\x2a\xd1\xd8\x80\x04\x8a\x52\xcb"
  expected: "\xd2\x80\x04\x92\x0a\xc5\xcc\x82\x22\x50\x4b\xbf\xc6\x6b\xf7\xbb\xc2\x68\xbb\xb7"
  expected: "\xbe\x68\xf3\xb3\xba\x84\x44\x8a\x0d\xad\xb4\x80\x22\x49\x12\xae\xa7\x80\x00\x51"
  expected: "\x32\xa8\xa1\x82\x40\x49\x12\xa2\x9b\x80\x00\x82\x0a\x95\x9c\x82\x22\x51\x12\x96"
  expected: "\x8f\x80\x00\x02\x52\x89\x90\x80\x44\x92\x52\x83\x8a\x80\x00\x8a\x12\x7d\x84\x80"
  expected: "\x20\x08\x32\x7e\x77\x80\x04\x12\x12\x71\x78\x80\x04\x90\x52\x6b\x72\x84\x42\x92"
  expected: "\x52\x65\x6c\x80\x44\x12\x32\x5f\x66\x84\x40\x93\x52\x59\x60\x5c\x80\x00\x92\x55"
  expected: "\x52\x59\x6b\xfb\x55\x4e\x84\x04\x81\x32\x48\x4f\x82\x24\x4a\x2a\x49\x42\x84\x44"
  expected: "\x8a\x52\x3c\x43\x6b\xfd\x3f\x38\x82\x22\x88\x22\x39\x32\x80\x44\x91\x53\x2c\x33"
  expected: "\x6b\xb9\x2f\x28\x84\x44\x52\x32\x22\x29\x80\x22\x92\x55\x1c\x23\x80\x00\x4a\x4a"
  expected: "\x1d\x16\x80\x62\x49\x33\x17\x19\x13\x21\x10\x11\x62\x80\x0e\x63\xf7\x0b\x40\x09"
  expected: "\x40\x0c\x60\xfc\x09\x6b\x03\x09\x07\x40\x05\x60\x7f\x02\x40\x02\x60\xfc\x44\x00"
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/AnyPathTest.java
@ -0,0 +1,106 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.EMPTY;
 import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.OPTIONAL;
 import static com.google.i18n.phonenumbers.metadata.regex.AnyPath.SINGLE;

 import com.google.common.collect.ImmutableSortedSet;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class AnyPathTest {
  @Test
  public void testConstants() {
    assertPath(EMPTY, 0);
    assertPath(SINGLE, 1);
    assertPath(OPTIONAL, 0, 1);
  }

  @Test
  public void testExtend() {
    assertThat(EMPTY.extend(false)).isEqualTo(SINGLE);
    assertThat(EMPTY.extend(true)).isEqualTo(OPTIONAL);
    // Non-optional extension is the same as joining with SINGLE.
    assertPath(SINGLE.extend(false), 2);
    // This is not the same as joining SINGLE.join(OPTIONAL).
    assertPath(SINGLE.extend(true), 0, 2);

    // 100 extends to 1000 or 1001 (if optional).
    assertPath(AnyPath.of(0x4).extend(false), 3);
    assertPath(AnyPath.of(0x4).extend(true), 0, 3);
  }

  @Test
  public void testJoin() {
    assertThat(EMPTY.join(SINGLE)).isEqualTo(SINGLE);
    assertThat(EMPTY.join(OPTIONAL)).isEqualTo(OPTIONAL);
    assertPath(SINGLE.join(SINGLE), 2);
    assertPath(SINGLE.join(OPTIONAL), 1, 2);
    assertPath(OPTIONAL.join(OPTIONAL), 0, 1, 2);

    // "(x(x)?)?" == 110 and matches 0 to 2.
    // "(x(x)?)?".join("(x(x)?)?") == "(x(x(x(x)?)?)?)?" == 11111 and matches 0 to 4.
    assertThat(AnyPath.of(0x7).join(AnyPath.of(0x7))).isEqualTo(AnyPath.of(0x1F));

    // "xx(x)?" == 1100 and matches 2 or 3.
    // "(xx)?" == 0101 and matches 0 or 2.
    // "xx(x)?".join("(xx)?") == "xx(xx)?" == 111100 and matches 2 to 5.
    assertThat(AnyPath.of(0xC).join(AnyPath.of(0x5))).isEqualTo(AnyPath.of(0x3C));
  }

  @Test
  public void testMakeOptional() {
    assertThat(OPTIONAL.makeOptional()).isEqualTo(OPTIONAL);
    assertThat(SINGLE.makeOptional()).isEqualTo(OPTIONAL);
    assertPath(AnyPath.of(0x4).makeOptional(), 0, 2);
  }

  @Test
  public void testToString() {
    assertThat(SINGLE.toString()).isEqualTo("x");
    assertThat(OPTIONAL.toString()).isEqualTo("(x)?");
    assertThat(AnyPath.of(0x8).toString()).isEqualTo("xxx");  // 1000 = 3 digits
    assertThat(AnyPath.of(0xA).toString()).isEqualTo("x(xx)?");  // 1010 = 1 or 3 digits
    assertThat(AnyPath.of(0xF).toString()).isEqualTo("(x(x(x)?)?)?");  // 1111 = 0 to 3 digits
  }

  // Ordering is important as we need to find the shortest path at certain times.
  @Test
  public void testOrdering() {
    assertThat(SINGLE).isGreaterThan(EMPTY);
    assertThat(OPTIONAL).isGreaterThan(SINGLE);

    assertThat(AnyPath.of(0x8)).isGreaterThan(AnyPath.of(0x4));
    // Same length, but the 2nd highest length match is taken into account as a tie break.
    // This strategy turns out to match numeric comparison perfectly since set-bits are lengths.
    assertThat(AnyPath.of(0xA)).isGreaterThan(AnyPath.of(0x9));
  }

  private static void assertPath(AnyPath p, Integer... n) {
    ImmutableSortedSet<Integer> lengths = ImmutableSortedSet.copyOf(n);
    int maxLength = lengths.last();
    assertThat(p.maxLength()).isEqualTo(maxLength);
    for (int i = 0; i <= maxLength; i++) {
      assertThat(p.acceptsLength(i)).isEqualTo(lengths.contains(i));
    }
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeTest.java
@ -0,0 +1,224 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.RangeSpecification.ALL_DIGITS_MASK;
 import static org.junit.Assert.fail;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableSet;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.Visitor;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class EdgeTest {
  @Test
  public void testSimple() {
    assertThat(Edge.fromMask(0x6).getDigitMask()).isEqualTo(0x6);
    assertThat(Edge.fromMask(0x6).isOptional()).isFalse();

    assertThat(Edge.fromMask(0x3).toString()).isEqualTo("[01]");  // 0000000011
    assertThat(Edge.fromMask(0x300).toString()).isEqualTo("[89]");  // 1100000000
    assertThat(Edge.fromMask(0x1FE).toString()).isEqualTo("[1-8]");  // 0111111110
    assertThat(Edge.fromMask(ALL_DIGITS_MASK).toString()).isEqualTo("x");  // any digit
  }

  @Test
  public void testAny() {
    assertThat(Edge.fromMask(ALL_DIGITS_MASK)).isEqualTo(Edge.any());
    assertThat(Edge.any().optional()).isEqualTo(Edge.optionalAny());

    assertThat(Edge.any().toString()).isEqualTo("x");
    // Unlike AnyPath, simple edges are not sequences, so don't need parens for optional.
    assertThat(Edge.optionalAny().toString()).isEqualTo("x?");
  }

  @Test
  public void testEpsilon() {
    // Epsilon isn't optional, it represents a path that non-optionally accepts no input.
    assertThat(Edge.epsilon().isOptional()).isFalse();
    assertThat(Edge.epsilon().toString()).isEqualTo("e");
  }

  @Test
  public void testConcatenation() {
    Edge concatenated = Edge.concatenation(Edge.fromMask(0x3), Edge.any());
    assertThat(concatenated.toString()).isEqualTo("[01]x");
    TestingVisitor v = new TestingVisitor() {
      @Override
      public void visitSequence(List<Edge> edges) {
        assertThat(edges).containsExactly(Edge.fromMask(0x3), Edge.any()).inOrder();
        wasTested = true;
      }
    };
    concatenated.accept(v);
    assertThat(v.wasTested).isTrue();
  }

  @Test
  public void testGroup() {
    Edge group = Edge.disjunction(ImmutableSet.of(Edge.fromMask(0x3), Edge.any()));
    TestingVisitor v = new TestingVisitor() {
      @Override
      public void visitGroup(Set<Edge> edges, boolean isOptional) {
        assertThat(edges).containsExactly(Edge.any(), Edge.fromMask(0x3)).inOrder();
        assertThat(isOptional).isFalse();
        wasTested = true;
      }
    };
    group.accept(v);
    assertThat(group.toString()).isEqualTo("(x|[01])");
    assertThat(v.wasTested).isTrue();
  }

  @Test
  public void testOptionalGroup() {
    Edge group = Edge.disjunction(ImmutableSet.of(Edge.fromMask(0x3), Edge.epsilon(), Edge.any()));
    TestingVisitor v = new TestingVisitor() {
      @Override
      public void visitGroup(Set<Edge> edges, boolean isOptional) {
        // Reordered and epsilon removed.
        assertThat(edges).containsExactly(Edge.any(), Edge.fromMask(0x3)).inOrder();
        assertThat(isOptional).isTrue();
        wasTested = true;
      }
    };
    group.accept(v);
    assertThat(group.toString()).isEqualTo("(x|[01])?");
    assertThat(v.wasTested).isTrue();
  }

  @Test
  public void testOrdering() {
    // Testing ordering is important because when generating regular expressions, the edge order
    // defines a lot about the visual order of the final regular expression. This order should be
    // as close to "what a person would consider reasonable" as possible. In fact some of the cases
    // tested here will never occur in real situations (e.g. sequences compared with groups)
    // because of the way composite edges are created. However it seems sensible to test the
    // behaviour nevertheless.

    // Simple Edges

    assertSameOrder(e("0"), e("0"));
    // "0" < "1" - lowest bit set wins
    assertOrdered(e("0"), e("1"));
    // "[01]" < "1" - lowest bit set wins
    assertOrdered(e("[01]"), e("1"));
    // "x" < "9" - lowest bit set wins
    assertOrdered(X, e("9"));

    // Sequences

    // ("0x" < "1") and ("0" < "1x") - first edge in sequence is compared to single edge.
    assertOrdered(seq(e("0"), X), e("1"));
    assertOrdered(e("0"), seq(e("1"), X));
    // "[01]" < "[01]x" - single edges are "smaller" than sequences of edges if all else is equal.
    assertOrdered(e("[01]"), seq(e("[01]"), X));

    // "[01]x" == "[01]x"
    assertSameOrder(seq(e("[01]"), X), seq(e("[01]"), X));
    // "x1" < "x2" - comparing 2 sequences compares all edges.
    assertOrdered(seq(X, e("1")), seq(X, e("2")));

    // "[01]x" < "[01]xx" - shortest sequence wins in tie break (similar to how "[01]" < "[01]x")
    assertOrdered(seq(e("[01]"), X), seq(e("[01]"), X, X));

    // Disjunctions

    // "(1|2)" == "(2|1)" - edges are sorted when creating disjunctions
    assertSameOrder(or(e("1"), e("2")), or(e("2"), e("1")));
    // "(1|2|3)" < "(1|2|4)" - comparing 2 disjunctions compares all edges.
    assertOrdered(or(e("1"), e("2"), e("3")), or(e("1"), e("2"), e("4")));
    // "(1|2)" < "(1|2|3)" - shortest sequence wins in tie break
    assertOrdered(or(e("1"), e("2")), or(e("1"), e("2"), e("3")));

    // Miscellaneous

    // "1" < "(1|2)" - if first edge matches, single edges sort before groups.
    assertOrdered(e("1"), or(e("1"), e("2")));

    // "(1|x)" < "1x" - because "(1|x)" is actually "(x|1)" and "x" < "1".
    assertOrdered(or(e("1"), X), seq(e("1"), X));
  }

  private static void assertSameOrder(Edge lhs, Edge rhs) {
    assertThat(lhs).isEquivalentAccordingToCompareTo(rhs);
    assertThat(lhs).isEqualTo(rhs);
  }

  private static void assertOrdered(Edge lhs, Edge rhs) {
    assertThat(lhs).isNotEqualTo(rhs);
    assertThat(lhs).isLessThan(rhs);
    assertThat(rhs).isGreaterThan(lhs);
  }

  // A bit like a mock, but not really "mocking" existing behaviour.
  private static class TestingVisitor implements Visitor {
    // Set this in overridden method(s).
    protected boolean wasTested = false;

    @Override
    public void visit(SimpleEdge edge) {
      fail("unexpected call");
    }

    @Override
    public void visitSequence(List<Edge> edges) {
      fail("unexpected call");
    }

    @Override
    public void visitGroup(Set<Edge> edges, boolean isOptional) {
      fail("unexpected call");
    }
  }

  // The 'any digit' edge.
  private static final Edge X = e("x");

  // Creates a simple edge from a range specification string for testing.
  private static SimpleEdge e(String s) {
    RangeSpecification spec = RangeSpecification.parse(s);
    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
    return SimpleEdge.fromMask(spec.getBitmask(0));
  }

  // Creates sequence of edges (wrapping for convenience).
  private static Edge seq(Edge first, Edge second, Edge... rest) {
    // This already rejects epsilon edges.
    Edge edge = Edge.concatenation(first, second);
    for (Edge e : rest) {
      edge = Edge.concatenation(edge, e);
    }
    return edge;
  }

  // Creates a non-optional disjunction of edges.
  private static Edge or(Edge... edges) {
    List<Edge> e = Arrays.asList(edges);
    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
    return Edge.disjunction(e);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/EdgeWriterTest.java
@ -0,0 +1,154 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;

 import com.google.common.base.Preconditions;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class EdgeWriterTest {

  // Note that this code is tested very thoroughly by any "round-tripping" of regular expressions
  // in the metadata (i.e. generating regular expressions from DFAs and then re-parsing then to
  // ensure that the same DFA is produced). This is part of any acceptance test for generating
  // regular expressions and serves as a far more comprehensive stress test on the code. These
  // tests are thus limited to simpler cases and highlighting interesting behaviour.

  // The 'any digit' edge.
  private static final Edge X = e("x");

  @Test
  public void testSimple() {
    assertThat(regex(e("0"))).isEqualTo("0");
    assertThat(regex(e("[0-7]"))).isEqualTo("[0-7]");
    assertThat(regex(e("[0-9]"))).isEqualTo("\\d");
    assertThat(regex(X)).isEqualTo("\\d");
  }

  @Test
  public void testSequences() {
    assertThat(regex(seq(e("0"), e("1"), e("2")))).isEqualTo("012");
  }

  @Test
  public void testGroups() {
    // Non-optional groups spanning the top level don't need parentheses.
    assertThat(regex(or(e("0"), e("1"), e("2")))).isEqualTo("0|1|2");
    // Optional groups always need parentheses.
    assertThat(regex(opt(e("0"), e("1"), e("2")))).isEqualTo("(?:0|1|2)?");
    // Once a group has prefix or suffix, parentheses are needed.
    assertThat(regex(
        seq(
            or(e("0"), e("1")),
            e("2"))))
        .isEqualTo("(?:0|1)2");
  }

  @Test
  public void testNesting() {
    // Basic nesting is handled by a very straightforward edge visitor, so one non-trivial test
    // will cover all the basic cases ("any digit" sequences are a different matter however).
    assertThat(regex(
        seq(
            e("0"),
            or(
                e("1"),
                seq(
                    e("2"),
                    opt(e("3"), e("4")))),
            e("5"), e("6"))))
        .isEqualTo("0(?:1|2(?:3|4)?)56");
  }

  @Test
  public void testAnyDigitSequences() {
    // This is the complex part of efficient regular expression generation.
    assertThat(regex(seq(e("0"), e("1"), X))).isEqualTo("01\\d");
    // "\d\d" is shorter than "\d{2}"
    assertThat(regex(seq(X, X))).isEqualTo("\\d\\d");
    assertThat(regex(seq(X, X, X))).isEqualTo("\\d{3}");
    // Top level optional groups are supported.
    assertThat(regex(opt(seq(X, X)))).isEqualTo("(?:\\d{2})?");
    // Optional parts go at the end.
    assertThat(regex(
        seq(
            opt(seq(X, X)),
            X, X)))
        .isEqualTo("\\d\\d(?:\\d{2})?");
    // "(x(x(x)?)?)?"
    Edge anyGrp = opt(seq(
        X,
        opt(seq(
            X,
            opt(X)))));
    // The two cases of a group on its own or as part of a sequence are handled separately, so
    // must be tested separately.
    assertThat(regex(anyGrp)).isEqualTo("\\d{0,3}");
    assertThat(regex(seq(e("1"), e("2"), anyGrp))).isEqualTo("12\\d{0,3}");
    // xx(x(x(x)?)?)?"
    assertThat(regex(seq(X, X, anyGrp))).isEqualTo("\\d{2,5}");
    // Combining "any digit" groups produces minimal representation
    assertThat(regex(seq(anyGrp, anyGrp))).isEqualTo("\\d{0,6}");
  }

  // Helper to call standard version of regex generator (not using 'dot' for matching).
  private String regex(Edge e) {
    return EdgeWriter.toRegex(e, false /* use dot match */);
  }

  // Creates a simple edge from a range specification string for testing.
  private static SimpleEdge e(String s) {
    RangeSpecification spec = RangeSpecification.parse(s);
    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
    return SimpleEdge.fromMask(spec.getBitmask(0));
  }

  // Creates sequence of edges (wrapping for convenience).
  private static Edge seq(Edge first, Edge second, Edge... rest) {
    // This already rejects epsilon edges.
    Edge edge = Edge.concatenation(first, second);
    for (Edge e : rest) {
      edge = Edge.concatenation(edge, e);
    }
    return edge;
  }

  // Creates a non-optional disjunction of edges.
  private static Edge or(Edge... edges) {
    List<Edge> e = Arrays.asList(edges);
    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
    return Edge.disjunction(e);
  }

  // Creates an optional disjunction of edges.
  private static Edge opt(Edge... edges) {
    List<Edge> e = new ArrayList<>();
    e.addAll(Arrays.asList(edges));
    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "don't pass epsilon directly");
    e.add(Edge.epsilon());
    return Edge.disjunction(e);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaBuilder.java
@ -0,0 +1,98 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;

 import com.google.common.graph.MutableValueGraph;
 import com.google.common.graph.ValueGraph;
 import com.google.common.graph.ValueGraphBuilder;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;

 /** Simple fluent API for constructing graphs for testing. */
 final class NfaBuilder {
  private final MutableValueGraph<Node, SimpleEdge> graph =
      ValueGraphBuilder.directed().allowsSelfLoops(false).build();
  // The last node added to the graph.
  private Node lastNode;

  /** Creates a new mutable NFA graph. */
  public NfaBuilder() {
    graph.addNode(INITIAL);
    graph.addNode(TERMINAL);
    lastNode = TERMINAL;
  }

  /**
   * Returns an unmodifiable view of the underlying graph (not a snapshot). If the builder is
   * modified after this method is called, it will affect what was returned.
   */
  public ValueGraph<Node, SimpleEdge> graph() {
    return graph;
  }

  /** Adds a new path from the given source node, returning the newly created target node. */
  public Node addPath(Node source, String path) {
    RangeSpecification spec = RangeSpecification.parse(path);
    for (int n = 0; n < spec.length(); n++) {
      lastNode = lastNode.createNext();
      addEdge(source, lastNode, SimpleEdge.fromMask(spec.getBitmask(n)));
      source = lastNode;
    }
    return lastNode;
  }

  /** Adds a new path between the given source and target (all intermediate nodes are new). */
  public void addPath(Node source, Node target, String path) {
    RangeSpecification spec = RangeSpecification.parse(path);
    for (int n = 0; n < spec.length() - 1; n++) {
      lastNode = lastNode.createNext();
      addEdge(source, lastNode, SimpleEdge.fromMask(spec.getBitmask(n)));
      source = lastNode;
    }
    addEdge(source, target, SimpleEdge.fromMask(spec.getBitmask(spec.length() - 1)));
  }

  /**
   * Adds a new path between the given source and target nodes, along with an epsilon edge from the
   * source to the target.
   */
  public void addOptionalPath(Node source, Node target, String path) {
    addPath(source, target, path);
    addEpsilon(source, target);
  }

  private void addEpsilon(Node s, Node t) {
    checkArgument(graph.nodes().contains(s), "missing source node");
    checkArgument(graph.nodes().contains(s), "missing target node");
    SimpleEdge e = graph.putEdgeValue(s, t, Edge.epsilon());
    if (e != null) {
      // Edge already exists; if not an epsilon, make it optional.
      checkArgument(!e.equals(Edge.epsilon()) && !e.isOptional(), "epsilon already added");
      graph.putEdgeValue(s, t, e.optional());
    }
  }

  private void addEdge(Node s, Node t, SimpleEdge e) {
    graph.addNode(s);
    graph.addNode(t);
    checkArgument(graph.putEdgeValue(s, t, e) == null, "edge already exists");
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NfaFlattenerTest.java
@ -0,0 +1,229 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;

 import com.google.common.base.Preconditions;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
 import java.util.TreeSet;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class NfaFlattenerTest {
  // The 'any digit' edge.
  private static final Edge X = e("x");

  @Test
  public void testSimple() {
    NfaBuilder nfa = new NfaBuilder();
    nfa.addPath(INITIAL, TERMINAL, "12");
    Edge flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(seq(e("1"), e("2")));
    assertThat(flat.toString()).isEqualTo("12");

    nfa.addPath(INITIAL, TERMINAL, "34");
    flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(
        or(
            seq(e("1"), e("2")),
            seq(e("3"), e("4"))));
    assertThat(flat.toString()).isEqualTo("(12|34)");
  }

  @Test
  public void testSubgroup() {
    NfaBuilder nfa = new NfaBuilder();
    Node split = nfa.addPath(INITIAL, "12");
    Node join = nfa.addPath(split, "34");
    nfa.addPath(split, join, "56");
    nfa.addPath(join, TERMINAL, "78");

    Edge flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(
        seq(e("1"), e("2"),
            or(
                seq(e("3"), e("4")),
                seq(e("5"), e("6"))
            ),
            e("7"), e("8")));
    assertThat(flat.toString()).isEqualTo("12(34|56)78");
  }

  @Test
  public void testSubgroupWithEarlyJoining() {
    NfaBuilder nfa = new NfaBuilder();
    // Create a graph with 4 initial paths branching out which collapses to 3, 2 and then 1.
    Node groupStart = nfa.addPath(INITIAL, "0");
    // Add 2 edges to the first join point (if we add only one edge then it clashes with the
    // joining edge, which goes directly from groupStart to firstJoin.
    Node firstJoin = nfa.addPath(nfa.addPath(groupStart, "1"), "2");
    nfa.addPath(groupStart, firstJoin, "3");
    Node secondJoin = nfa.addPath(firstJoin, "4");
    nfa.addPath(groupStart, secondJoin, "5");
    Node groupEnd = nfa.addPath(secondJoin, "6");
    nfa.addPath(groupStart, groupEnd, "7");
    nfa.addPath(groupEnd, TERMINAL, "8");

    Edge flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(
        seq(e("0"),
            or(
                seq(
                    or(
                        seq(
                            or(
                                seq(e("1"), e("2")),
                                e("3")),
                            e("4")),
                        e("5")),
                    e("6")),
                e("7")),
            e("8")));
    assertThat(flat.toString()).isEqualTo("0(((12|3)4|5)6|7)8");
  }

  @Test
  public void testPathDuplication() {
    NfaBuilder nfa = new NfaBuilder();
    Node groupStart = nfa.addPath(INITIAL, "0");
    Node lhsMid = nfa.addPath(groupStart, "1");
    Node groupEnd = nfa.addPath(lhsMid, "2");
    Node rhsMid = nfa.addPath(groupStart, "3");
    nfa.addPath(rhsMid, groupEnd, "4");
    nfa.addPath(groupEnd, TERMINAL, "5");

    // So far this is a normal nestable graph:
    //           ,--1-->()--2--v
    // (I)--0-->()             ()--5-->(T)
    //           `--3-->()--4--^
    Edge flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(
        seq(e("0"),
            or(
                seq(e("1"), e("2")),
                seq(e("3"), e("4"))),
            e("5")));
    assertThat(flat.toString()).isEqualTo("0(12|34)5");

    // This new path "crosses" the group, creating a non-nestable structure which can only be
    // resolved by duplicating some path (in this case it's the 2nd part of the right-hand-side).
    nfa.addPath(lhsMid, rhsMid, "x");

    flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(
        seq(e("0"),
            or(
                seq(e("1"),
                    or(
                        e("2"),
                        seq(X, e("4")))),
                seq(e("3"), e("4"))),
            e("5")));
    // Note the duplication of the '4' to make the graph nestable.
    assertThat(flat.toString()).isEqualTo("0(1(x4|2)|34)5");

  }

  @Test
  public void testNodeOrdering_bug_65250963() {
    //  ,--->(C)----------.
    //  |                 v
    // (I)-->(D)-->(B)-->(T)
    //  |           ^
    //  `--->(A)----'
    NfaBuilder nfa = new NfaBuilder();
    // IMPORTANT: Order of insertion determines the node IDs (A=1, B=2...). The edge index just
    // happens to match node ID for readability, but doesn't affect the test directly.
    Node a = nfa.addPath(INITIAL, "1");
    Node b = nfa.addPath(a, "2");
    Node c = nfa.addPath(INITIAL, "3");
    Node d = nfa.addPath(INITIAL, "4");
    // Now join up remaining paths.
    nfa.addPath(d, b, "5");
    nfa.addPath(b, TERMINAL, "6");
    nfa.addPath(c, TERMINAL, "7");
    Comparator<Node> ordering = NfaFlattener.nodeOrdering(nfa.graph());

    // In the old ordering code, because (B) and (D) are not reachable to/from (C) we would have
    // had the ordering (D < B), (B < C), (C < D) giving a cycle. In the new code, the longest path
    // length to reach (C) is less than (B), so we get (C < B) and we no longer have a cycle.
    // The node ordering is now: (INITIAL, A, C, D, B, TERMINAL)
    TreeSet<Node> nodes = new TreeSet<>(ordering);
    nodes.add(INITIAL);
    nodes.add(TERMINAL);
    nodes.add(a);
    nodes.add(b);
    nodes.add(c);
    nodes.add(d);
    assertThat(nodes).containsExactly(INITIAL, a, c, d, b, TERMINAL).inOrder();
  }

  @Test
  public void testOptionalTopLevelGroup_bug_69101586() {
    //  ,--->(e)----.
    //  |           v
    // (I)-->(A)-->(T)
    NfaBuilder nfa = new NfaBuilder();
    nfa.addOptionalPath(INITIAL, TERMINAL, "xx");
    Edge flat = NfaFlattener.flatten(nfa.graph());
    assertThat(flat).isEqualTo(opt(seq(X, X)));
    assertThat(flat.toString()).isEqualTo("(xx)?");
  }

  // Creates a simple edge from a range specification string for testing.
  private static SimpleEdge e(String s) {
    RangeSpecification spec = RangeSpecification.parse(s);
    Preconditions.checkArgument(spec.length() == 1, "only specify single digit ranges");
    return SimpleEdge.fromMask(spec.getBitmask(0));
  }

  // Creates sequence of edges (wrapping for convenience).
  private static Edge seq(Edge first, Edge second, Edge... rest) {
    // This already rejects epsilon edges.
    Edge edge = Edge.concatenation(first, second);
    for (Edge e : rest) {
      edge = Edge.concatenation(edge, e);
    }
    return edge;
  }

  // Creates an optional disjunction of edges.
  private static Edge opt(Edge... edges) {
    List<Edge> e = new ArrayList<>();
    e.addAll(Arrays.asList(edges));
    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "don't pass epsilon directly");
    e.add(Edge.epsilon());
    return Edge.disjunction(e);
  }

  // Creates a non-optional disjunction of edges.
  private static Edge or(Edge... edges) {
    List<Edge> e = Arrays.asList(edges);
    Preconditions.checkArgument(!e.contains(Edge.epsilon()), "use 'opt()' for optional groups");
    return Edge.disjunction(e);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/NodeTest.java
@ -0,0 +1,68 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;

 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class NodeTest {
  @Test
  public void testConstants() {
    assertThat(INITIAL.id()).isEqualTo(0);
    assertThat(TERMINAL.id()).isEqualTo(1);
    assertThat(TERMINAL).isNotEqualTo(INITIAL);
  }

  @Test
  public void testNext() {
    assertThat(INITIAL.createNext()).isSameInstanceAs(TERMINAL);
    assertThat(TERMINAL.createNext()).isNotEqualTo(TERMINAL);
    assertThat(TERMINAL.createNext().id()).isEqualTo(2);
    Node node = INITIAL;
    for (int id = 0; id < 10; id++) {
      assertThat(node.id()).isEqualTo(id);
      node = node.createNext();
    }
  }

  @Test
  public void testToString() {
    Node node = INITIAL;
    for (int id = 0; id < 10; id++) {
      assertThat(node.toString()).isEqualTo(Integer.toString(id));
      node = node.createNext();
    }
  }

  // Consistent ordering helps ensure regular expressions derived from graphs are deterministic.
  @Test
  public void testOrdering() {
    assertThat(TERMINAL).isGreaterThan(INITIAL);
    Node node = INITIAL;
    for (int id = 0; id < 10; id++) {
      Node next = node.createNext();
      assertThat(next).isGreaterThan(node);
      node = next;
    }
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RangeTreeConverterTest.java
@ -0,0 +1,154 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.Edge.any;
 import static com.google.i18n.phonenumbers.metadata.regex.Edge.epsilon;
 import static com.google.i18n.phonenumbers.metadata.regex.Edge.optionalAny;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;

 import com.google.common.collect.Iterables;
 import com.google.common.graph.ValueGraph;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import java.util.List;
 import java.util.stream.Stream;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class RangeTreeConverterTest {
  // Simple 4 node DFA.
  // (I) --1--> ( ) --2--> ( ) --x--> (T)
  @Test
  public void testSimple() {
    RangeTree dfa = RangeTree.from(specs("12x"));
    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
    assertThat(nfa.nodes()).hasSize(4);
    Node node = assertOutEdge(nfa, INITIAL, edge(1));
    node = assertOutEdge(nfa, node, edge(2));
    node = assertOutEdge(nfa, node, any());
    assertThat(node).isEqualTo(TERMINAL);
  }

  // Simple 4 node DFA with optional termination immediately before the terminal node.
  // (I) --1--> ( ) --2--> (T) --x--> (T)
  @Test
  public void testWithOptionalEdge() {
    RangeTree dfa = RangeTree.from(specs("12x", "12"));

    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
    assertThat(nfa.nodes()).hasSize(4);
    Node node = assertOutEdge(nfa, INITIAL, edge(1));
    node = assertOutEdge(nfa, node, edge(2));
    node = assertOutEdge(nfa, node, optionalAny());
    assertThat(node).isEqualTo(TERMINAL);
  }

  // Simple 4 node DFA with optional termination.
  // (I) --1--> (T) --2--> ( ) --x--> (T)
  @Test
  public void testWithEpsilon() {
    RangeTree dfa = RangeTree.from(specs("12x", "1"));

    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
    assertThat(nfa.nodes()).hasSize(4);
    Node node = assertOutEdge(nfa, INITIAL, edge(1));
    assertOutEdges(nfa, node, edge(2), epsilon());
    // One of the out nodes should be the terminal.
    assertThat(follow(nfa, node, epsilon())).isEqualTo(Node.TERMINAL);
    node = follow(nfa, node, edge(2));
    // The other is the normal edge that leads to the terminal.
    node = follow(nfa, node, any());
    assertThat(node).isEqualTo(TERMINAL);
  }

  // Simple 5 node DFA with 2 paths.
  // (I) --1--> ( ) --2--> ( ) --x--> (T)
  //   `---3--> ( ) --4----^
  @Test
  public void testMultiplePathsWithCommonTail() {
    RangeTree dfa = RangeTree.from(specs("12x", "34x"));

    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
    assertThat(nfa.nodes()).hasSize(5);

    assertOutEdges(nfa, INITIAL, edge(1), edge(3));
    Node lhs = follow(nfa, INITIAL, edge(1));
    lhs = assertOutEdge(nfa, lhs, edge(2));
    Node rhs = follow(nfa, INITIAL, edge(3));
    rhs = assertOutEdge(nfa, rhs, edge(4));
    assertThat(lhs).isEqualTo(rhs);
    Node node = assertOutEdge(nfa, lhs, any());
    assertThat(node).isEqualTo(TERMINAL);
  }

  @Test
  public void testOptionalTopLevelGroup_bug_69101586() {
    // Requires making a top level optional group, which is (deliberately) not easy with the
    // DFA tooling since it's pretty rare. This is a DFA which can terminate immediately and will
    // match the empty input (as well as its normal input).
    RangeTree dfa = RangeTree.from(specs("xx")).union(RangeTree.from(RangeSpecification.empty()));

    ValueGraph<Node, SimpleEdge> nfa = RangeTreeConverter.toNfaGraph(dfa);
    assertThat(nfa.nodes()).hasSize(3);
    assertThat(follow(nfa, INITIAL, epsilon())).isEqualTo(Node.TERMINAL);
    Node node = follow(nfa, INITIAL, any());
    node = assertOutEdge(nfa, node, any());
    assertThat(node).isEqualTo(TERMINAL);
  }

  // Returns the simple edge matching exactly this one digit value.
  SimpleEdge edge(int n) {
    return SimpleEdge.fromMask(1 << n);
  }

  List<RangeSpecification> specs(String... s) {
    return Stream.of(s).map(RangeSpecification::parse).collect(toImmutableList());
  }

  // Asserts that a node has only one out edge and returns that edge's target.
  Node assertOutEdge(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge edge) {
    assertThat(nfa.successors(node)).hasSize(1);
    Node target = Iterables.getOnlyElement(nfa.successors(node));
    assertThat(nfa.edgeValue(node, target).get()).isEqualTo(edge);
    return target;
  }

  // Asserts that a node has all the given edges.
  void assertOutEdges(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge... edges) {
    assertThat(nfa.successors(node)).hasSize(edges.length);
    List<Edge> out = nfa.successors(node).stream()
        .map(t -> nfa.edgeValue(node, t).get())
        .collect(toImmutableList());
    assertThat(out).containsExactlyElementsIn(edges);
  }

  // Follows the given edge from a node (which must be in the graph), returning the target node
  // (or null if the edge does not exist in the graph).
  Node follow(ValueGraph<Node, SimpleEdge> nfa, Node node, SimpleEdge edge) {
    return nfa.successors(node).stream()
        .filter(t -> nfa.edgeValue(node, t).get().equals(edge))
        .findFirst()
        .orElse(null);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexFormatterTest.java
@ -0,0 +1,107 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.FORCE_CAPTURING_GROUPS;
 import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.FORCE_NON_CAPTURING_GROUPS;
 import static com.google.i18n.phonenumbers.metadata.regex.RegexFormatter.FormatOption.PRESERVE_CAPTURING_GROUPS;

 import com.google.common.base.Joiner;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class RegexFormatterTest {

  // Luckily the formatter cares only about 3 special characters, '(', '|' and ')', so we only need
  // to test a few very straightforward cases to cover everything.

  @Test
  public void testSimple() {
    assertThat(RegexFormatter.format("abcd", PRESERVE_CAPTURING_GROUPS))
        .isEqualTo("abcd");
  }

  @Test
  public void testNested() {
    assertThat(RegexFormatter.format("ab(cd|ef)gh", PRESERVE_CAPTURING_GROUPS)).isEqualTo(lines(
        "ab(",
        "  cd|",
        "  ef",
        ")gh"));

    assertThat(RegexFormatter.format("ab(?:cd|ef)gh", PRESERVE_CAPTURING_GROUPS)).isEqualTo(lines(
        "ab(?:",
        "  cd|",
        "  ef",
        ")gh"));
  }

  @Test
  public void testDoubleNested() {
    assertThat(RegexFormatter.format("ab(cd(ef|gh)|ij)", PRESERVE_CAPTURING_GROUPS))
        .isEqualTo(lines(
            "ab(",
            "  cd(",
            "    ef|",
            "    gh",
            "  )|",
            "  ij",
            ")"));

    assertThat(RegexFormatter.format("ab(cd(?:ef|gh)|ij)", PRESERVE_CAPTURING_GROUPS))
        .isEqualTo(lines(
            "ab(",
            "  cd(?:",
            "    ef|",
            "    gh",
            "  )|",
            "  ij",
            ")"));
  }

  @Test
  public void testForceNonCapturingGroups() {
    assertThat(RegexFormatter.format("ab(?:cd(ef|gh)|ij)", FORCE_NON_CAPTURING_GROUPS))
        .isEqualTo(lines(
            "ab(?:",
            "  cd(?:",
            "    ef|",
            "    gh",
            "  )|",
            "  ij",
            ")"));
  }

  @Test
  public void testForceCapturingGroups() {
    assertThat(RegexFormatter.format("ab(?:cd(ef|gh)|ij)", FORCE_CAPTURING_GROUPS)).isEqualTo(lines(
        "ab(",
        "  cd(",
        "    ef|",
        "    gh",
        "  )|",
        "  ij",
        ")"));
  }

  private static String lines(String... s) {
    return Joiner.on('\n').join(s);
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/RegexGeneratorTest.java
@ -0,0 +1,197 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.base.CharMatcher.whitespace;
 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.RegexGenerator.basic;
 import static java.util.stream.Collectors.joining;

 import com.google.common.collect.ImmutableList;
 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import java.util.Arrays;
 import java.util.List;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class RegexGeneratorTest {
  @Test
  public void testSimple() {
    assertRegex(basic(), ranges("123xxx"), "123\\d{3}");
    // This could be improved to "..." rather than ".{3}" saving 1 char, probably not worth it.
    assertRegex(basic().withDotMatch(), ranges("123xxx"), "123.{3}");
  }

  @Test
  public void testVariableLength() {
    assertRegex(basic(), ranges("123xxx", "123xxxx", "123xxxxx", "123xxxxxx"), "123\\d{3,6}");
  }

  @Test
  public void testTailOptimization() {
    RangeTree dfa = ranges("123xxx", "123xxxx", "145xxx");
    assertRegex(basic(), dfa, "1(?:23\\d{3,4}|45\\d{3})");
    assertRegex(basic().withTailOptimization(), dfa, "1(?:23\\d?|45)\\d{3}");
  }

  @Test
  public void testDfaFactorization() {
    // Essentially create a "thin" wedge of specific non-determinism with the shorter (5-digit)
    // numbers which prevents the larger ranges from being contiguous in the DFA.
    RangeTree dfa = ranges("1234x", "1256x", "[0-4]xxxxxx", "[0-4]xxxxxxx");
    assertRegex(basic(), dfa,
        "[02-4]\\d{6,7}|",
        "1(?:[013-9]\\d{5,6}|",
        "2(?:[0-246-9]\\d{4,5}|",
        "3(?:[0-35-9]\\d{3,4}|4\\d(?:\\d{2,3})?)|",
        "5(?:[0-57-9]\\d{3,4}|6\\d(?:\\d{2,3})?)))");
    assertRegex(basic().withDfaFactorization(), dfa, "[0-4]\\d{6,7}|12(?:34|56)\\d");
  }

  @Test
  public void testSubgroupOptimization() {
    // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits).
    RangeTree postgraph = ranges("[02-8]", "1[1-9]", "10[1-9]", "9[0-47-9]");
    RangeTree pregraph = ranges("123", "234", "345", "456", "567");

    // Cross product of pre and post paths.
    RangeTree subgraph = RangeTree.from(
        pregraph.asRangeSpecifications().stream()
            .flatMap(a -> postgraph.asRangeSpecifications().stream().map(a::extendBy)));

    // Union in other paths to trigger repetition in the "basic" case.
    RangeTree rest = ranges("128xx", "238xx", "348xx", "458xx", "568xx");
    RangeTree dfa = rest.union(subgraph);

    assertRegex(basic(), dfa,
        "12(?:3(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
        "23(?:4(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
        "34(?:5(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
        "45(?:6(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)|",
        "56(?:7(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])|8\\d\\d)");

    assertRegex(basic().withSubgroupOptimization(), dfa,
        "(?:12|23|34|45|56)8\\d\\d|",
        "(?:123|234|345|456|567)(?:[02-8]|1(?:0[1-9]|[1-9])|9[0-47-9])");
  }

  @Test
  public void testRegression_bug_65250963() {
    RangeTree dfa = ranges(
        "1387",
        "1697",
        "1524",
        "1539",
        "1768",
        "1946");
    assertRegex(basic(), dfa,
        "1(?:",
        "  (?:",
        "    38|",
        "    69",
        "  )7|",
        "  5(?:",
        "    24|",
        "    39",
        "  )|",
        "  768|",
        "  946",
        ")");
  }

  @Test
  public void testRegression_bug_68929642() {
    assertMatches(
        "1\\d{6}(?:\\d{2})?",
        ImmutableList.of("1234567", "123456789"),
        ImmutableList.of("12345678"),
        "1xxx_xxx", "1xx_xxx_xxx");

    assertMatches(
        "1\\d{6}[0-7]?",
        ImmutableList.of("1234567", "12345670"),
        ImmutableList.of("123456", "123456700"),
        "1xxx_xxx", "1x_xxx_xx[0-7]");

    assertMatches(
        "\\d\\d?",
        ImmutableList.of("1", "12"),
        ImmutableList.of("", "123"),
        "x", "xx");

    assertMatches(
        "\\d{1,3}",
        ImmutableList.of("1", "12", "123"),
        ImmutableList.of("", "1234"),
        "x", "xx", "xxx");

    assertMatches(
        "\\d(?:\\d{3}(?:\\d{2})?)?",
        ImmutableList.of("1", "1234", "123456"),
        ImmutableList.of("", "12", "123", "12345", "1234567"),
        "x", "xxxx", "xxx_xxx");

    assertMatches(
        "(?:\\d\\d(?:\\d(?:\\d{2,4})?)?)?",
        ImmutableList.of("", "12", "123", "12345", "123456", "1234567"),
        ImmutableList.of("1", "1234", "12345678"),
        "", "xx", "xxx", "xx_xxx", "xxx_xxx", "xxxx_xxx");

    assertMatches(
        "(?:\\d{2})?",
        ImmutableList.of("", "12"),
        ImmutableList.of("1", "123"),
        "", "xx");

    assertMatches(
        "\\d?",
        ImmutableList.of("", "1"),
        ImmutableList.of("12"),
        "", "x");
  }

  // This does not check that the generated regex is the same as the input, but it does test some
  // positive/negative matching cases against both and verifies that the DFA for both are equal.
  private static void assertMatches(
      String pattern, List<String> matchNumbers, List<String> noMatchNumbers, String... specs) {
    String regex = basic().toRegex(ranges(specs));
    assertThat(regex).isEqualTo(pattern);

    // Test the given positive/negative match numbers and expect the same behaviour from both.
    for (String number : matchNumbers) {
      assertThat(number).matches(pattern);
      assertThat(number).matches(regex);
    }
    for (String number : noMatchNumbers) {
      assertThat(number).doesNotMatch(pattern);
      assertThat(number).doesNotMatch(regex);
    }
  }

  private static void assertRegex(RegexGenerator generator, RangeTree dfa, String... lines) {
    String regex = generator.toRegex(dfa);
    String expected = Arrays.stream(lines).map(whitespace()::removeFrom).collect(joining());
    assertThat(regex).isEqualTo(expected);
  }

  private static RangeTree ranges(String... specs) {
    return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/SubgraphOptimizerTest.java
@ -0,0 +1,80 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.common.truth.Truth8.assertThat;

 import com.google.i18n.phonenumbers.metadata.RangeSpecification;
 import com.google.i18n.phonenumbers.metadata.RangeTree;
 import com.google.i18n.phonenumbers.metadata.RangeTree.DfaNode;
 import com.google.i18n.phonenumbers.metadata.regex.SubgroupOptimizer.LinkNodeVisitor;
 import java.util.Arrays;
 import java.util.Optional;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class SubgraphOptimizerTest {
  // The subgraph of "everything except 95, 96 and 100" (this appears in China leading digits).
  // Note that unlike China, there's also an early terminating '9' in the subgraph to ensure that
  // the entire subgraph is extracted (including teminating node).
  private static final RangeTree POSTGRAPH = ranges("[02-9]", "1[1-9]", "10[1-9]", "9[0-47-9]");

  // Some prefixes which come before the subgraph.
  private static final RangeTree PREGRAPH = ranges("123", "234", "345", "456", "567");

  // Cross product of pre and post paths.
  private static final RangeTree SUBGRAPH = RangeTree.from(
      PREGRAPH.asRangeSpecifications().stream()
          .flatMap(a -> POSTGRAPH.asRangeSpecifications().stream().map(a::extendBy)));

  // Additional paths which share edges in the subgraph and will cause repetition in regular
  // expressions. Also add a couple of early terminating paths "on the way to" the subgroup.
  // Note however that a terminating path that reaches the root of the subgraph (e.g. "123") will
  // cause a split in the DFA at the root node (one terminating, one not terminating).
  private static final RangeTree TEST_RANGES =
      SUBGRAPH.union(ranges("128xx", "238xx", "348xx", "458xx", "568xx", "12", "34"));

  @Test
  public void testSubgraphWeightAndInOrder() {
    LinkNodeVisitor v = new LinkNodeVisitor();
    TEST_RANGES.accept(v);
    DfaNode n = v.getHighestCostNode();
    assertThat(n).isNotNull();
    // 5 paths in PREGRAPH which reach the root of POSTGRAPH.
    assertThat(v.getInOrder(n)).isEqualTo(5);
    // 7 edges in POSTGRAPH with a total weight of 27:
    // "[02-8]" = 6, "1", "0", "9" = 3, 2 x "[1-9]" = 10, "[0-47-9]" = 8
    assertThat(v.getSubgraphWeight(n)).isEqualTo(27);
  }

  @Test
  public void testSubgraphExtraction() {
    Optional<RangeTree> extracted = SubgroupOptimizer.extractRepeatingSubgraph(TEST_RANGES);
    assertThat(extracted).hasValue(SUBGRAPH);
    // The "bridge" node is the same, so we extract the whole graph (so we return nothing).
    assertThat(SubgroupOptimizer.extractRepeatingSubgraph(SUBGRAPH)).isEmpty();
    // There's no repetition in this graph, so return nothing.
    assertThat(SubgroupOptimizer.extractRepeatingSubgraph(ranges("123", "234", "345"))).isEmpty();
  }

  private static RangeTree ranges(String... specs) {
    return RangeTree.from(Arrays.stream(specs).map(RangeSpecification::parse));
  }
 }
--- a/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java
+++ b/metadata/src/test/java/com/google/i18n/phonenumbers/metadata/regex/TrailingPathOptimizerTest.java
@ -0,0 +1,122 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.google.i18n.phonenumbers.metadata.regex;

 import static com.google.common.truth.Truth.assertThat;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.INITIAL;
 import static com.google.i18n.phonenumbers.metadata.regex.Node.TERMINAL;

 import com.google.common.graph.ValueGraph;
 import com.google.i18n.phonenumbers.metadata.regex.Edge.SimpleEdge;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
 public class TrailingPathOptimizerTest {
  @Test
  public void testSimple() {
    NfaBuilder nfa = new NfaBuilder();
    nfa.addPath(INITIAL, TERMINAL, "12xx");
    nfa.addPath(INITIAL, TERMINAL, "34xxxx");
    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());

    // Expect the common trailing "xx" to be factored out at some new join point.
    NfaBuilder expected = new NfaBuilder();
    Node join = expected.addPath(INITIAL, "12");
    expected.addPath(INITIAL, join, "34xx");
    expected.addPath(join, TERMINAL, "xx");

    assertEquivalent(actual, expected);
  }

  @Test
  public void testTrailingOptionalGroup() {
    NfaBuilder nfa = new NfaBuilder();
    nfa.addPath(INITIAL, TERMINAL, "12xx");
    // Add path "34xx(xx)?"
    Node optStart = nfa.addPath(INITIAL, "34xx");
    nfa.addOptionalPath(optStart, TERMINAL, "xx");

    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());

    // Expect the common trailing "xx" to be factored out at some new join point.
    NfaBuilder expected = new NfaBuilder();
    Node join = expected.addPath(INITIAL, "12");
    // Add "34(xx)?" up to the joining node.
    optStart = expected.addPath(INITIAL, "34");
    expected.addOptionalPath(optStart, join, "xx");
    // Add the trailing "xx".
    expected.addPath(join, TERMINAL, "xx");

    assertEquivalent(actual, expected);
  }

  @Test
  public void testDoubleRecursion() {
    NfaBuilder nfa = new NfaBuilder();
    nfa.addPath(INITIAL, TERMINAL, "12xx");
    nfa.addPath(INITIAL, TERMINAL, "34xxxx");
    // Add path "56xxxx(xx)?"
    Node optStart = nfa.addPath(INITIAL, "56xxxx");
    nfa.addOptionalPath(optStart, TERMINAL, "xx");
    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());

    // Factoring should be applied twice to pull out 2 lots of "xx".
    // How I wish we had a way to embed proper graphs in JavaDoc!
    //
    //    ,-----------12-----------v
    // (I)------34----->(1)--xx-->(2)--xx-->(T)
    //    `-56-->()--xx--^
    //            `--e---^
    //
    NfaBuilder expected = new NfaBuilder();
    Node secondJoin = expected.addPath(INITIAL, "12");
    expected.addPath(secondJoin, TERMINAL, "xx");
    Node firstJoin = expected.addPath(INITIAL, "34");
    expected.addPath(firstJoin, secondJoin, "xx");
    optStart = expected.addPath(INITIAL, "56");
    expected.addOptionalPath(optStart, firstJoin, "xx");

    assertEquivalent(actual, expected);
  }

  @Test
  public void testNoChangeIfNoCommonFactor() {
    NfaBuilder nfa = new NfaBuilder();
    nfa.addPath(INITIAL, TERMINAL, "12xxxxxx");
    // Add path "34xxx(xx)?" which, while it shares 'xxx' with '12xxxxxx', will not be factored
    // because splitting out 'xxx' would make the resulting regular expression longer
    // (e.g. "(?:34\d{2}?|12\d{3})\d{3}" is longer than "34\d{2}?\d{3}|12\d{6}").
    //
    // Note that there are some cases in which this isn't true (shorter sequences like 'x' might be
    // splittable without cost, but they are unlikely to ever make the expression shorter,
    // especially if they result in adding new parentheses for grouping.
    Node optStart = nfa.addPath(INITIAL, "34xxx");
    nfa.addOptionalPath(optStart, TERMINAL, "xx");

    ValueGraph<Node, SimpleEdge> actual = TrailingPathOptimizer.optimize(nfa.graph());
    assertEquivalent(actual, nfa);
  }

  private static void assertEquivalent(ValueGraph<Node, SimpleEdge> actual, NfaBuilder expected) {
    // This is a somewhat cheeky way to test graph isomorphism and relies on the fact that graph
    // flattening is deterministic according to how edges sort and doesn't care about node values.
    // It also, obviously, relies on the flattening code to be vaguely well tested.
    assertThat(NfaFlattener.flatten(actual)).isEqualTo(NfaFlattener.flatten(expected.graph()));
  }
 }
--- a/metadata/src/test/proto/regression_test.proto
+++ b/metadata/src/test/proto/regression_test.proto
@ -0,0 +1,49 @@
 /*
 * Copyright (C) 2017 The Libphonenumber Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 syntax = "proto3";

 package i18n.phonenumbers.internal.finitestatematcher.compiler;

 option java_package = "com.google.i18n.phonenumbers.internal.finitestatematcher.compiler";
 option java_outer_classname = "RegressionTestProto";

 // A set of regression tests.
 message Tests {
  repeated TestCase test_case = 1;
 }

 // A single regression test entry.
 message TestCase {
  // A name for the test, ideally unique.
  string name = 1;
  // If set true, expect that the test will fail 100% of the time. This is
  // useful to test that test numbers have enough coverage to force a failure
  // and is typically achieved by modifying an input range after generating a
  // passing test (or carefully modifying the output bytecodes). Note that not
  // all changes will make a test fail 100% of the time, so care must be taken
  // to avoid creating a flaky test (e.g. don't change a "[0-3]" to "[0-5]", as
  // this only fails if the test number contains a 4 or 5 at the corresponding
  // index, change it to "[4-6]" so there's no overlap and at least one test
  // number that's valid for that range will not be accepted by the matcher).
  bool should_fail = 2;
  // The input ranges (in the form of range specifications) which form the DFA
  // to be tested (e.g. "1[2-5]678xxxxx" etc...).
  repeated string range = 3;
  // The expected output bytes, encoded in test files using C-style hex notation
  // (i.e. \xHH). This can be split over multiple lines for readability.
  repeated bytes expected = 4;
 }