From e7ec4de1cc296d34fcca214aebd3a43378893173 Mon Sep 17 00:00:00 2001
From: mcrouse <mcrouse@google.com>
Date: Thu, 4 Aug 2022 09:22:33 -0700
Subject: [PATCH] done

---
 cpp/CMakeLists.txt                |   2 +-
 cpp/src/phonenumbers/utf/rune.c   | 358 ------------------------------
 cpp/src/phonenumbers/utf/rune.cc  | 325 +++++++++++++++++++++++++++
 cpp/src/phonenumbers/utf/utf.h    |  60 ++---
 cpp/src/phonenumbers/utf/utfdef.h |  21 +-
 debian/copyright                  |   2 +-
 6 files changed, 364 insertions(+), 404 deletions(-)
 delete mode 100644 cpp/src/phonenumbers/utf/rune.c
 create mode 100644 cpp/src/phonenumbers/utf/rune.cc

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 434c593bd..35a9a1cf9 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -243,7 +243,7 @@ set (
   "src/phonenumbers/string_byte_sink.cc"
   "src/phonenumbers/stringutil.cc"
   "src/phonenumbers/unicodestring.cc"
-  "src/phonenumbers/utf/rune.c"
+  "src/phonenumbers/utf/rune.cc"
   "src/phonenumbers/utf/unicodetext.cc"
   "src/phonenumbers/utf/unilib.cc"
 )
diff --git a/cpp/src/phonenumbers/utf/rune.c b/cpp/src/phonenumbers/utf/rune.c
deleted file mode 100644
index b4aa93b5d..000000000
--- a/cpp/src/phonenumbers/utf/rune.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- *              Copyright (c) 2002 by Lucent Technologies.
- *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-#include "phonenumbers/utf/utf.h"
-#include "phonenumbers/utf/utfdef.h"
-
-enum
-{
-	Bit1	= 7,
-	Bitx	= 6,
-	Bit2	= 5,
-	Bit3	= 4,
-	Bit4	= 3,
-	Bit5	= 2,
-
-	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
-	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
-	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
-	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
-	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
-	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
-
-	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
-	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
-	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
-	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0001 1111 1111 1111 1111 1111 */
-
-	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
-	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
-
-	SurrogateMin	= 0xD800,
-	SurrogateMax	= 0xDFFF,
-
-	Bad	= Runeerror,
-};
-
-/*
- * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
- * This is a slower but "safe" version of the old chartorune
- * that works on strings that are not necessarily null-terminated.
- *
- * If you know for sure that your string is null-terminated,
- * chartorune will be a bit faster.
- *
- * It is guaranteed not to attempt to access "length"
- * past the incoming pointer.  This is to avoid
- * possible access violations.  If the string appears to be
- * well-formed but incomplete (i.e., to get the whole Rune
- * we'd need to read past str+length) then we'll set the Rune
- * to Bad and return 0.
- *
- * Note that if we have decoding problems for other
- * reasons, we return 1 instead of 0.
- */
-int
-charntorune(Rune *rune, const char *str, int length)
-{
-	int c, c1, c2, c3;
-	long l;
-
-	/* When we're not allowed to read anything */
-	if(length <= 0) {
-		goto badlen;
-	}
-
-	/*
-	 * one character sequence (7-bit value)
-	 *	00000-0007F => T1
-	 */
-	c = *(uchar*)str;
-	if(c < Tx) {
-		*rune = (Rune)c;
-		return 1;
-	}
-
-	// If we can't read more than one character we must stop
-	if(length <= 1) {
-		goto badlen;
-	}
-
-	/*
-	 * two character sequence (11-bit value)
-	 *	0080-07FF => T2 Tx
-	 */
-	c1 = *(uchar*)(str+1) ^ Tx;
-	if(c1 & Testx)
-		goto bad;
-	if(c < T3) {
-		if(c < T2)
-			goto bad;
-		l = ((c << Bitx) | c1) & Rune2;
-		if(l <= Rune1)
-			goto bad;
-		*rune = (Rune)l;
-		return 2;
-	}
-
-	// If we can't read more than two characters we must stop
-	if(length <= 2) {
-		goto badlen;
-	}
-
-	/*
-	 * three character sequence (16-bit value)
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	c2 = *(uchar*)(str+2) ^ Tx;
-	if(c2 & Testx)
-		goto bad;
-	if(c < T4) {
-		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
-		if(l <= Rune2)
-			goto bad;
-		if (SurrogateMin <= l && l <= SurrogateMax)
-			goto bad;
-		*rune = (Rune)l;
-		return 3;
-	}
-
-	if (length <= 3)
-		goto badlen;
-
-	/*
-	 * four character sequence (21-bit value)
-	 *	10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	c3 = *(uchar*)(str+3) ^ Tx;
-	if (c3 & Testx)
-		goto bad;
-	if (c < T5) {
-		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-		if (l <= Rune3 || l > Runemax)
-			goto bad;
-		*rune = (Rune)l;
-		return 4;
-	}
-
-	// Support for 5-byte or longer UTF-8 would go here, but
-	// since we don't have that, we'll just fall through to bad.
-
-	/*
-	 * bad decoding
-	 */
-bad:
-	*rune = Bad;
-	return 1;
-badlen:
-	*rune = Bad;
-	return 0;
-
-}
-
-
-/*
- * This is the older "unsafe" version, which works fine on
- * null-terminated strings.
- */
-int
-chartorune(Rune *rune, const char *str)
-{
-	int c, c1, c2, c3;
-	long l;
-
-	/*
-	 * one character sequence
-	 *	00000-0007F => T1
-	 */
-	c = *(uchar*)str;
-	if(c < Tx) {
-		*rune = (Rune)c;
-		return 1;
-	}
-
-	/*
-	 * two character sequence
-	 *	0080-07FF => T2 Tx
-	 */
-	c1 = *(uchar*)(str+1) ^ Tx;
-	if(c1 & Testx)
-		goto bad;
-	if(c < T3) {
-		if(c < T2)
-			goto bad;
-		l = ((c << Bitx) | c1) & Rune2;
-		if(l <= Rune1)
-			goto bad;
-		*rune = (Rune)l;
-		return 2;
-	}
-
-	/*
-	 * three character sequence
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	c2 = *(uchar*)(str+2) ^ Tx;
-	if(c2 & Testx)
-		goto bad;
-	if(c < T4) {
-		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
-		if(l <= Rune2)
-			goto bad;
-		if (SurrogateMin <= l && l <= SurrogateMax)
-			goto bad;
-		*rune = (Rune)l;
-		return 3;
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *	10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	c3 = *(uchar*)(str+3) ^ Tx;
-	if (c3 & Testx)
-		goto bad;
-	if (c < T5) {
-		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-		if (l <= Rune3 || l > Runemax)
-			goto bad;
-		*rune = (Rune)l;
-		return 4;
-	}
-
-	/*
-	 * Support for 5-byte or longer UTF-8 would go here, but
-	 * since we don't have that, we'll just fall through to bad.
-	 */
-
-	/*
-	 * bad decoding
-	 */
-bad:
-	*rune = Bad;
-	return 1;
-}
-
-int
-isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed)
-{
-	*consumed = charntorune(rune, str, length);
-	return *rune != Runeerror || *consumed == 3;
-}
-
-int
-runetochar(char *str, const Rune *rune)
-{
-	/* Runes are signed, so convert to unsigned for range check. */
-	unsigned long c;
-
-	/*
-	 * one character sequence
-	 *	00000-0007F => 00-7F
-	 */
-	c = *rune;
-	if(c <= Rune1) {
-		str[0] = (char)c;
-		return 1;
-	}
-
-	/*
-	 * two character sequence
-	 *	0080-07FF => T2 Tx
-	 */
-	if(c <= Rune2) {
-		str[0] = (char)(T2 | (c >> 1*Bitx));
-		str[1] = (char)(Tx | (c & Maskx));
-		return 2;
-	}
-
-	/*
-	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
-	 * Do this test here because the error rune encodes to three bytes.
-	 * Doing it earlier would duplicate work, since an out of range
-	 * Rune wouldn't have fit in one or two bytes.
-	 */
-	if (c > Runemax)
-		c = Runeerror;
-	if (SurrogateMin <= c && c <= SurrogateMax)
-		c = Runeerror;
-
-	/*
-	 * three character sequence
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	if (c <= Rune3) {
-		str[0] = (char)(T3 |  (c >> 2*Bitx));
-		str[1] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
-		str[2] = (char)(Tx |  (c & Maskx));
-		return 3;
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *     10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	str[0] = (char)(T4 | (c >> 3*Bitx));
-	str[1] = (char)(Tx | ((c >> 2*Bitx) & Maskx));
-	str[2] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
-	str[3] = (char)(Tx | (c & Maskx));
-	return 4;
-}
-
-int
-runelen(Rune rune)
-{
-	char str[10];
-
-	return runetochar(str, &rune);
-}
-
-int
-runenlen(const Rune *r, int nrune)
-{
-	int nb, c;
-
-	nb = 0;
-	while(nrune--) {
-		c = (int)*r++;
-		if (c <= Rune1)
-			nb++;
-		else if (c <= Rune2)
-			nb += 2;
-		else if (c <= Rune3)
-			nb += 3;
-		else /* assert(c <= Rune4) */
-			nb += 4;
-	}
-	return nb;
-}
-
-int
-fullrune(const char *str, int n)
-{
-	if (n > 0) {
-		int c = *(uchar*)str;
-		if (c < Tx)
-			return 1;
-		if (n > 1) {
-			if (c < T3)
-				return 1;
-			if (n > 2) {
-				if (c < T4 || n > 3)
-					return 1;
-			}
-		}
-	}
-	return 0;
-}
diff --git a/cpp/src/phonenumbers/utf/rune.cc b/cpp/src/phonenumbers/utf/rune.cc
new file mode 100644
index 000000000..4e5c24b8a
--- /dev/null
+++ b/cpp/src/phonenumbers/utf/rune.cc
@@ -0,0 +1,325 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ *              Portions Copyright (c) 2009 The Go Authors.  All rights
+ * reserved. Permission to use, copy, modify, and distribute this software for
+ * any purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
+ * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include "phonenumbers/utf/utf.h"
+#include "phonenumbers/utf/utfdef.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+enum {
+  Bit1 = 7,
+  Bitx = 6,
+  Bit2 = 5,
+  Bit3 = 4,
+  Bit4 = 3,
+  Bit5 = 2,
+
+  T1 = ((1 << (Bit1 + 1)) - 1) ^ 0xFF, /* 0000 0000 */
+  Tx = ((1 << (Bitx + 1)) - 1) ^ 0xFF, /* 1000 0000 */
+  T2 = ((1 << (Bit2 + 1)) - 1) ^ 0xFF, /* 1100 0000 */
+  T3 = ((1 << (Bit3 + 1)) - 1) ^ 0xFF, /* 1110 0000 */
+  T4 = ((1 << (Bit4 + 1)) - 1) ^ 0xFF, /* 1111 0000 */
+  T5 = ((1 << (Bit5 + 1)) - 1) ^ 0xFF, /* 1111 1000 */
+
+  Rune1 = (1 << (Bit1 + 0 * Bitx)) - 1, /* 0000 0000 0111 1111 */
+  Rune2 = (1 << (Bit2 + 1 * Bitx)) - 1, /* 0000 0111 1111 1111 */
+  Rune3 = (1 << (Bit3 + 2 * Bitx)) - 1, /* 1111 1111 1111 1111 */
+  Rune4 = (1 << (Bit4 + 3 * Bitx)) - 1, /* 0001 1111 1111 1111 1111 1111 */
+
+  Maskx = (1 << Bitx) - 1, /* 0011 1111 */
+  Testx = Maskx ^ 0xFF,    /* 1100 0000 */
+
+  SurrogateMin = 0xD800,
+  SurrogateMax = 0xDFFF,
+
+  Bad = Runeerror,
+};
+
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune
+ * that works on strings that are not necessarily null-terminated.
+ *
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer.  This is to avoid
+ * possible access violations.  If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
+int charntorune(Rune *rune, const char *str, int length) {
+  int c, c1, c2, c3;
+  long l;
+
+  /* When we're not allowed to read anything */
+  if (length <= 0) {
+    goto badlen;
+  }
+
+  /*
+   * one character sequence (7-bit value)
+   *	00000-0007F => T1
+   */
+  c = *(uchar *)str;
+  if (c < Tx) {
+    *rune = (Rune)c;
+    return 1;
+  }
+
+  // If we can't read more than one character we must stop
+  if (length <= 1) {
+    goto badlen;
+  }
+
+  /*
+   * two character sequence (11-bit value)
+   *	0080-07FF => T2 Tx
+   */
+  c1 = *(uchar *)(str + 1) ^ Tx;
+  if (c1 & Testx) goto bad;
+  if (c < T3) {
+    if (c < T2) goto bad;
+    l = ((c << Bitx) | c1) & Rune2;
+    if (l <= Rune1) goto bad;
+    *rune = (Rune)l;
+    return 2;
+  }
+
+  // If we can't read more than two characters we must stop
+  if (length <= 2) {
+    goto badlen;
+  }
+
+  /*
+   * three character sequence (16-bit value)
+   *	0800-FFFF => T3 Tx Tx
+   */
+  c2 = *(uchar *)(str + 2) ^ Tx;
+  if (c2 & Testx) goto bad;
+  if (c < T4) {
+    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+    if (l <= Rune2) goto bad;
+    if (SurrogateMin <= l && l <= SurrogateMax) goto bad;
+    *rune = (Rune)l;
+    return 3;
+  }
+
+  if (length <= 3) goto badlen;
+
+  /*
+   * four character sequence (21-bit value)
+   *	10000-1FFFFF => T4 Tx Tx Tx
+   */
+  c3 = *(uchar *)(str + 3) ^ Tx;
+  if (c3 & Testx) goto bad;
+  if (c < T5) {
+    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+    if (l <= Rune3 || l > Runemax) goto bad;
+    *rune = (Rune)l;
+    return 4;
+  }
+
+  // Support for 5-byte or longer UTF-8 would go here, but
+  // since we don't have that, we'll just fall through to bad.
+
+  /*
+   * bad decoding
+   */
+bad:
+  *rune = Bad;
+  return 1;
+badlen:
+  *rune = Bad;
+  return 0;
+}
+
+/*
+ * This is the older "unsafe" version, which works fine on
+ * null-terminated strings.
+ */
+int chartorune(Rune *rune, const char *str) {
+  int c, c1, c2, c3;
+  long l;
+
+  /*
+   * one character sequence
+   *	00000-0007F => T1
+   */
+  c = *(uchar *)str;
+  if (c < Tx) {
+    *rune = (Rune)c;
+    return 1;
+  }
+
+  /*
+   * two character sequence
+   *	0080-07FF => T2 Tx
+   */
+  c1 = *(uchar *)(str + 1) ^ Tx;
+  if (c1 & Testx) goto bad;
+  if (c < T3) {
+    if (c < T2) goto bad;
+    l = ((c << Bitx) | c1) & Rune2;
+    if (l <= Rune1) goto bad;
+    *rune = (Rune)l;
+    return 2;
+  }
+
+  /*
+   * three character sequence
+   *	0800-FFFF => T3 Tx Tx
+   */
+  c2 = *(uchar *)(str + 2) ^ Tx;
+  if (c2 & Testx) goto bad;
+  if (c < T4) {
+    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+    if (l <= Rune2) goto bad;
+    if (SurrogateMin <= l && l <= SurrogateMax) goto bad;
+    *rune = (Rune)l;
+    return 3;
+  }
+
+  /*
+   * four character sequence (21-bit value)
+   *	10000-1FFFFF => T4 Tx Tx Tx
+   */
+  c3 = *(uchar *)(str + 3) ^ Tx;
+  if (c3 & Testx) goto bad;
+  if (c < T5) {
+    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+    if (l <= Rune3 || l > Runemax) goto bad;
+    *rune = (Rune)l;
+    return 4;
+  }
+
+  /*
+   * Support for 5-byte or longer UTF-8 would go here, but
+   * since we don't have that, we'll just fall through to bad.
+   */
+
+  /*
+   * bad decoding
+   */
+bad:
+  *rune = Bad;
+  return 1;
+}
+
+int isvalidcharntorune(const char *str, int length, Rune *rune, int *consumed) {
+  *consumed = charntorune(rune, str, length);
+  return *rune != Runeerror || *consumed == 3;
+}
+
+int runetochar(char *str, const Rune *rune) {
+  /* Runes are signed, so convert to unsigned for range check. */
+  unsigned long c;
+
+  /*
+   * one character sequence
+   *	00000-0007F => 00-7F
+   */
+  c = *rune;
+  if (c <= Rune1) {
+    str[0] = (char)c;
+    return 1;
+  }
+
+  /*
+   * two character sequence
+   *	0080-07FF => T2 Tx
+   */
+  if (c <= Rune2) {
+    str[0] = (char)(T2 | (c >> 1 * Bitx));
+    str[1] = (char)(Tx | (c & Maskx));
+    return 2;
+  }
+
+  /*
+   * If the Rune is out of range or a surrogate half, convert it to the error
+   * rune. Do this test here because the error rune encodes to three bytes.
+   * Doing it earlier would duplicate work, since an out of range
+   * Rune wouldn't have fit in one or two bytes.
+   */
+  if (c > Runemax) c = Runeerror;
+  if (SurrogateMin <= c && c <= SurrogateMax) c = Runeerror;
+
+  /*
+   * three character sequence
+   *	0800-FFFF => T3 Tx Tx
+   */
+  if (c <= Rune3) {
+    str[0] = (char)(T3 | (c >> 2 * Bitx));
+    str[1] = (char)(Tx | ((c >> 1 * Bitx) & Maskx));
+    str[2] = (char)(Tx | (c & Maskx));
+    return 3;
+  }
+
+  /*
+   * four character sequence (21-bit value)
+   *     10000-1FFFFF => T4 Tx Tx Tx
+   */
+  str[0] = (char)(T4 | (c >> 3 * Bitx));
+  str[1] = (char)(Tx | ((c >> 2 * Bitx) & Maskx));
+  str[2] = (char)(Tx | ((c >> 1 * Bitx) & Maskx));
+  str[3] = (char)(Tx | (c & Maskx));
+  return 4;
+}
+
+int runelen(Rune rune) {
+  char str[10];
+
+  return runetochar(str, &rune);
+}
+
+int runenlen(const Rune *r, int nrune) {
+  int nb, c;
+
+  nb = 0;
+  while (nrune--) {
+    c = (int)*r++;
+    if (c <= Rune1)
+      nb++;
+    else if (c <= Rune2)
+      nb += 2;
+    else if (c <= Rune3)
+      nb += 3;
+    else /* assert(c <= Rune4) */
+      nb += 4;
+  }
+  return nb;
+}
+
+int fullrune(const char *str, int n) {
+  if (n > 0) {
+    int c = *(uchar *)str;
+    if (c < Tx) return 1;
+    if (n > 1) {
+      if (c < T3) return 1;
+      if (n > 2) {
+        if (c < T4 || n > 3) return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+}  // namespace phonenumbers
+}  // namespace i18n
diff --git a/cpp/src/phonenumbers/utf/utf.h b/cpp/src/phonenumbers/utf/utf.h
index 72d01ed63..f3d16d59e 100644
--- a/cpp/src/phonenumbers/utf/utf.h
+++ b/cpp/src/phonenumbers/utf/utf.h
@@ -1,33 +1,36 @@
 /*
  * The authors of this software are Rob Pike and Ken Thompson.
  *              Copyright (c) 1998-2002 by Lucent Technologies.
- *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
+ *              Portions Copyright (c) 2009 The Go Authors.  All rights
+ * reserved. Permission to use, copy, modify, and distribute this software for
+ * any purpose without fee is hereby granted, provided that this entire notice
  * is included in all copies of any software which is or includes a copy
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
+ * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 
 #ifndef _UTFH_
 #define _UTFH_ 1
 
-typedef unsigned int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
+namespace i18n {
+namespace phonenumbers {
 
-enum
-{
-  UTFmax	= 4,		/* maximum bytes per rune */
-  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
-  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
-  Runeerror	= 0xFFFD,	/* decoding error in UTF */
-  Runemax	= 0x10FFFF,	/* maximum rune value */
+typedef unsigned int
+    Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
+
+enum {
+  UTFmax = 4,         /* maximum bytes per rune */
+  Runesync = 0x80,    /* cannot represent part of a UTF sequence (<) */
+  Runeself = 0x80,    /* rune and UTF sequences are the same (<) */
+  Runeerror = 0xFFFD, /* decoding error in UTF */
+  Runemax = 0x10FFFF, /* maximum rune value */
 };
 
-#ifdef	__cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -41,14 +44,13 @@ extern "C" {
  * SEE ALSO
  * utf (7)
  * tcs (1)
-*/
+ */
 
 // runetochar copies (encodes) one rune, pointed to by r, to at most
 // UTFmax bytes starting at s and returns the number of bytes generated.
 
 int runetochar(char* s, const Rune* r);
 
-
 // chartorune copies (decodes) at most UTFmax bytes starting at s to
 // one rune, pointed to by r, and returns the number of bytes consumed.
 // If the input is not exactly in UTF format, chartorune will set *r
@@ -61,7 +63,6 @@ int runetochar(char* s, const Rune* r);
 
 int chartorune(Rune* r, const char* s);
 
-
 // charntorune is like chartorune, except that it will access at most
 // n bytes of s.  If the UTF sequence is incomplete within n bytes,
 // charntorune will set *r to Runeerror and return 0. If it is complete
@@ -82,13 +83,11 @@ int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
 
 int runelen(Rune r);
 
-
 // runenlen returns the number of bytes required to convert the n
 // runes pointed to by r into UTF.
 
 int runenlen(const Rune* r, int n);
 
-
 // fullrune returns 1 if the string s of length n is long enough to be
 // decoded by chartorune, and 0 otherwise. This does not guarantee
 // that the string contains a legal UTF encoding. This routine is used
@@ -106,7 +105,6 @@ int fullrune(const char* s, int n);
 
 int utflen(const char* s);
 
-
 // utfnlen returns the number of complete runes that are represented
 // by the first n bytes of the UTF string s. If the last few bytes of
 // the string contain an incompletely coded rune, utfnlen will not
@@ -115,7 +113,6 @@ int utflen(const char* s);
 
 int utfnlen(const char* s, long n);
 
-
 // utfrune returns a pointer to the first occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
@@ -123,7 +120,6 @@ int utfnlen(const char* s, long n);
 
 /*const*/ char* utfrune(const char* s, Rune r);
 
-
 // utfrrune returns a pointer to the last occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
@@ -131,22 +127,18 @@ int utfnlen(const char* s, long n);
 
 /*const*/ char* utfrrune(const char* s, Rune r);
 
-
 // utfutf returns a pointer to the first occurrence of the UTF string
 // s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
 // null string, utfutf returns s1. (cf. strstr)
 
 const char* utfutf(const char* s1, const char* s2);
 
-
 // utfecpy copies UTF sequences until a null sequence has been copied,
 // but writes no sequences beyond es1.  If any sequences are copied,
 // s1 is terminated by a null sequence, and a pointer to that sequence
 // is returned.  Otherwise, the original s1 is returned. (cf. strecpy)
 
-char* utfecpy(char *s1, char *es1, const char *s2);
-
-
+char* utfecpy(char* s1, char* es1, const char* s2);
 
 // These functions are rune-string analogues of the corresponding
 // functions in strcat (3).
@@ -177,8 +169,6 @@ const Rune* runestrrchr(const Rune* s, Rune c);
 long runestrlen(const Rune* s);
 const Rune* runestrstr(const Rune* s1, const Rune* s2);
 
-
-
 // The following routines test types and modify cases for Unicode
 // characters.  Unicode defines some characters as letters and
 // specifies three cases: upper, lower, and title.  Mappings among the
@@ -200,7 +190,6 @@ Rune toupperrune(Rune r);
 Rune tolowerrune(Rune r);
 Rune totitlerune(Rune r);
 
-
 // isupperrune tests for upper case characters, including Unicode
 // upper case letters and targets of the toupper mapping. islowerrune
 // and istitlerune are defined analogously.
@@ -209,31 +198,30 @@ int isupperrune(Rune r);
 int islowerrune(Rune r);
 int istitlerune(Rune r);
 
-
 // isalpharune tests for Unicode letters; this includes ideographs in
 // addition to alphabetic characters.
 
 int isalpharune(Rune r);
 
-
 // isdigitrune tests for digits. Non-digit numbers, such as Roman
 // numerals, are not included.
 
 int isdigitrune(Rune r);
 
-
 // isspacerune tests for whitespace characters, including "C" locale
 // whitespace, Unicode defined whitespace, and the "zero-width
 // non-break space" character.
 
 int isspacerune(Rune r);
 
-
 // (The comments in this file were copied from the manpage files rune.3,
 // isalpharune.3, and runestrcat.3. Some formatting changes were also made
 // to conform to Google style. /JRM 11/11/05)
 
-#ifdef	__cplusplus
+}  // namespace phonenumbers
+}  // namespace phonenumbers
+
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/cpp/src/phonenumbers/utf/utfdef.h b/cpp/src/phonenumbers/utf/utfdef.h
index 4bbdfc643..4e69d12ba 100644
--- a/cpp/src/phonenumbers/utf/utfdef.h
+++ b/cpp/src/phonenumbers/utf/utfdef.h
@@ -7,11 +7,14 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
+ * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 
+#ifndef UTIL_UTF8_UTFDEF_H__
+#define UTIL_UTF8_UTFDEF_H__
+
 #define uchar _utfuchar
 #define ushort _utfushort
 #define uint _utfuint
@@ -19,9 +22,11 @@
 #define vlong _utfvlong
 #define uvlong _utfuvlong
 
-typedef unsigned char		uchar;
-typedef unsigned short		ushort;
-typedef unsigned int		uint;
-typedef unsigned long		ulong;
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#define nelem(x) (sizeof(x) / sizeof((x)[0]))
 
-#define nelem(x) (sizeof(x)/sizeof((x)[0]))
+#endif  // UTIL_UTF8_UTFDEF_H__
diff --git a/debian/copyright b/debian/copyright
index d95c23874..7c54d8797 100644
--- a/debian/copyright
+++ b/debian/copyright
@@ -16,7 +16,7 @@ License: Apache-2.0
  License version 2.0 can be found in the file
  `/usr/share/common-licenses/Apache-2.0'.
 
-Files: cpp/src/phonenumbers/utf/rune.c
+Files: cpp/src/phonenumbers/utf/rune.cc
  cpp/src/phonenumbers/utf/utf.h
  cpp/src/phonenumbers/utf/utfdef.h
 Copyright: 1998-2002, Lucent Technologies