Merge e7ec4de1cc into 209b9d524b

4 weeks ago · e3b2c125bc
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@ -277,7 +277,7 @@ set (
  "src/phonenumbers/string_byte_sink.cc"
  "src/phonenumbers/stringutil.cc"
  "src/phonenumbers/unicodestring.cc"
  "src/phonenumbers/utf/rune.c"
  "src/phonenumbers/utf/rune.cc"
  "src/phonenumbers/utf/unicodetext.cc"
  "src/phonenumbers/utf/unilib.cc"
 )
--- a/cpp/src/phonenumbers/utf/rune.c
+++ b/cpp/src/phonenumbers/utf/rune.c
@ -1,358 +0,0 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 2002 by Lucent Technologies.
 *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include "phonenumbers/utf/utf.h"
 #include "phonenumbers/utf/utfdef.h"

 enum
 {
 	Bit1	= 7,
 	Bitx	= 6,
 	Bit2	= 5,
 	Bit3	= 4,
 	Bit4	= 3,
 	Bit5	= 2,

 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
 	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */

 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
 	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0001 1111 1111 1111 1111 1111 */

 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */

 	SurrogateMin	= 0xD800,
 	SurrogateMax	= 0xDFFF,

 	Bad	= Runeerror,
 };

 /*
 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
 * This is a slower but "safe" version of the old chartorune
 * that works on strings that are not necessarily null-terminated.
 *
 * If you know for sure that your string is null-terminated,
 * chartorune will be a bit faster.
 *
 * It is guaranteed not to attempt to access "length"
 * past the incoming pointer.  This is to avoid
 * possible access violations.  If the string appears to be
 * well-formed but incomplete (i.e., to get the whole Rune
 * we'd need to read past str+length) then we'll set the Rune
 * to Bad and return 0.
 *
 * Note that if we have decoding problems for other
 * reasons, we return 1 instead of 0.
 */
 int
 charntorune(Rune *rune, const char *str, int length)
 {
 	int c, c1, c2, c3;
 	long l;

 	/* When we're not allowed to read anything */
 	if(length <= 0) {
 		goto badlen;
 	}

 	/*
 	 * one character sequence (7-bit value)
 	 *	00000-0007F => T1
 	 */
 	c = *(uchar*)str;
 	if(c < Tx) {
 		*rune = (Rune)c;
 		return 1;
 	}

 	// If we can't read more than one character we must stop
 	if(length <= 1) {
 		goto badlen;
 	}

 	/*
 	 * two character sequence (11-bit value)
 	 *	0080-07FF => T2 Tx
 	 */
 	c1 = *(uchar*)(str+1) ^ Tx;
 	if(c1 & Testx)
 		goto bad;
 	if(c < T3) {
 		if(c < T2)
 			goto bad;
 		l = ((c << Bitx) | c1) & Rune2;
 		if(l <= Rune1)
 			goto bad;
 		*rune = (Rune)l;
 		return 2;
 	}

 	// If we can't read more than two characters we must stop
 	if(length <= 2) {
 		goto badlen;
 	}

 	/*
 	 * three character sequence (16-bit value)
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	c2 = *(uchar*)(str+2) ^ Tx;
 	if(c2 & Testx)
 		goto bad;
 	if(c < T4) {
 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
 		if(l <= Rune2)
 			goto bad;
 		if (SurrogateMin <= l && l <= SurrogateMax)
 			goto bad;
 		*rune = (Rune)l;
 		return 3;
 	}

 	if (length <= 3)
 		goto badlen;

 	/*
 	 * four character sequence (21-bit value)
 	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	c3 = *(uchar*)(str+3) ^ Tx;
 	if (c3 & Testx)
 		goto bad;
 	if (c < T5) {
 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
 		if (l <= Rune3 || l > Runemax)
 			goto bad;
 		*rune = (Rune)l;
 		return 4;
 	}

 	// Support for 5-byte or longer UTF-8 would go here, but
 	// since we don't have that, we'll just fall through to bad.

 	/*
 	 * bad decoding
 	 */
 bad:
 	*rune = Bad;
 	return 1;
 badlen:
 	*rune = Bad;
 	return 0;

 }


 /*
 * This is the older "unsafe" version, which works fine on
 * null-terminated strings.
 */
 int
 chartorune(Rune *rune, const char *str)
 {
 	int c, c1, c2, c3;
 	long l;

 	/*
 	 * one character sequence
 	 *	00000-0007F => T1
 	 */
 	c = *(uchar*)str;
 	if(c < Tx) {
 		*rune = (Rune)c;
 		return 1;
 	}

 	/*
 	 * two character sequence
 	 *	0080-07FF => T2 Tx
 	 */
 	c1 = *(uchar*)(str+1) ^ Tx;
 	if(c1 & Testx)
 		goto bad;
 	if(c < T3) {
 		if(c < T2)
 			goto bad;
 		l = ((c << Bitx) | c1) & Rune2;
 		if(l <= Rune1)
 			goto bad;
 		*rune = (Rune)l;
 		return 2;
 	}

 	/*
 	 * three character sequence
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	c2 = *(uchar*)(str+2) ^ Tx;
 	if(c2 & Testx)
 		goto bad;
 	if(c < T4) {
 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
 		if(l <= Rune2)
 			goto bad;
 		if (SurrogateMin <= l && l <= SurrogateMax)
 			goto bad;
 		*rune = (Rune)l;
 		return 3;
 	}

 	/*
 	 * four character sequence (21-bit value)
 	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	c3 = *(uchar*)(str+3) ^ Tx;
 	if (c3 & Testx)
 		goto bad;
 	if (c < T5) {
 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
 		if (l <= Rune3 || l > Runemax)
 			goto bad;
 		*rune = (Rune)l;
 		return 4;
 	}

 	/*
 	 * Support for 5-byte or longer UTF-8 would go here, but
 	 * since we don't have that, we'll just fall through to bad.
 	 */

 	/*
 	 * bad decoding
 	 */
 bad:
 	*rune = Bad;
 	return 1;
 }

 int
 isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed)
 {
 	*consumed = charntorune(rune, str, length);
 	return *rune != Runeerror || *consumed == 3;
 }

 int
 runetochar(char *str, const Rune *rune)
 {
 	/* Runes are signed, so convert to unsigned for range check. */
 	unsigned long c;

 	/*
 	 * one character sequence
 	 *	00000-0007F => 00-7F
 	 */
 	c = *rune;
 	if(c <= Rune1) {
 		str[0] = (char)c;
 		return 1;
 	}

 	/*
 	 * two character sequence
 	 *	0080-07FF => T2 Tx
 	 */
 	if(c <= Rune2) {
 		str[0] = (char)(T2 | (c >> 1*Bitx));
 		str[1] = (char)(Tx | (c & Maskx));
 		return 2;
 	}

 	/*
 	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
 	 * Do this test here because the error rune encodes to three bytes.
 	 * Doing it earlier would duplicate work, since an out of range
 	 * Rune wouldn't have fit in one or two bytes.
 	 */
 	if (c > Runemax)
 		c = Runeerror;
 	if (SurrogateMin <= c && c <= SurrogateMax)
 		c = Runeerror;

 	/*
 	 * three character sequence
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	if (c <= Rune3) {
 		str[0] = (char)(T3 |  (c >> 2*Bitx));
 		str[1] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
 		str[2] = (char)(Tx |  (c & Maskx));
 		return 3;
 	}

 	/*
 	 * four character sequence (21-bit value)
 	 *     10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	str[0] = (char)(T4 | (c >> 3*Bitx));
 	str[1] = (char)(Tx | ((c >> 2*Bitx) & Maskx));
 	str[2] = (char)(Tx | ((c >> 1*Bitx) & Maskx));
 	str[3] = (char)(Tx | (c & Maskx));
 	return 4;
 }

 int
 runelen(Rune rune)
 {
 	char str[10];

 	return runetochar(str, &rune);
 }

 int
 runenlen(const Rune *r, int nrune)
 {
 	int nb, c;

 	nb = 0;
 	while(nrune--) {
 		c = (int)*r++;
 		if (c <= Rune1)
 			nb++;
 		else if (c <= Rune2)
 			nb += 2;
 		else if (c <= Rune3)
 			nb += 3;
 		else /* assert(c <= Rune4) */
 			nb += 4;
 	}
 	return nb;
 }

 int
 fullrune(const char *str, int n)
 {
 	if (n > 0) {
 		int c = *(uchar*)str;
 		if (c < Tx)
 			return 1;
 		if (n > 1) {
 			if (c < T3)
 				return 1;
 			if (n > 2) {
 				if (c < T4 || n > 3)
 					return 1;
 			}
 		}
 	}
 	return 0;
 }
--- a/cpp/src/phonenumbers/utf/rune.cc
+++ b/cpp/src/phonenumbers/utf/rune.cc
@ -0,0 +1,325 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 2002 by Lucent Technologies.
 *              Portions Copyright (c) 2009 The Go Authors.  All rights
 * reserved. Permission to use, copy, modify, and distribute this software for
 * any purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
 * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include "phonenumbers/utf/utf.h"
 #include "phonenumbers/utf/utfdef.h"

 namespace i18n {
 namespace phonenumbers {

 enum {
  Bit1 = 7,
  Bitx = 6,
  Bit2 = 5,
  Bit3 = 4,
  Bit4 = 3,
  Bit5 = 2,

  T1 = ((1 << (Bit1 + 1)) - 1) ^ 0xFF, /* 0000 0000 */
  Tx = ((1 << (Bitx + 1)) - 1) ^ 0xFF, /* 1000 0000 */
  T2 = ((1 << (Bit2 + 1)) - 1) ^ 0xFF, /* 1100 0000 */
  T3 = ((1 << (Bit3 + 1)) - 1) ^ 0xFF, /* 1110 0000 */
  T4 = ((1 << (Bit4 + 1)) - 1) ^ 0xFF, /* 1111 0000 */
  T5 = ((1 << (Bit5 + 1)) - 1) ^ 0xFF, /* 1111 1000 */

  Rune1 = (1 << (Bit1 + 0 * Bitx)) - 1, /* 0000 0000 0111 1111 */
  Rune2 = (1 << (Bit2 + 1 * Bitx)) - 1, /* 0000 0111 1111 1111 */
  Rune3 = (1 << (Bit3 + 2 * Bitx)) - 1, /* 1111 1111 1111 1111 */
  Rune4 = (1 << (Bit4 + 3 * Bitx)) - 1, /* 0001 1111 1111 1111 1111 1111 */

  Maskx = (1 << Bitx) - 1, /* 0011 1111 */
  Testx = Maskx ^ 0xFF,    /* 1100 0000 */

  SurrogateMin = 0xD800,
  SurrogateMax = 0xDFFF,

  Bad = Runeerror,
 };

 /*
 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
 * This is a slower but "safe" version of the old chartorune
 * that works on strings that are not necessarily null-terminated.
 *
 * If you know for sure that your string is null-terminated,
 * chartorune will be a bit faster.
 *
 * It is guaranteed not to attempt to access "length"
 * past the incoming pointer.  This is to avoid
 * possible access violations.  If the string appears to be
 * well-formed but incomplete (i.e., to get the whole Rune
 * we'd need to read past str+length) then we'll set the Rune
 * to Bad and return 0.
 *
 * Note that if we have decoding problems for other
 * reasons, we return 1 instead of 0.
 */
 int charntorune(Rune *rune, const char *str, int length) {
  int c, c1, c2, c3;
  long l;

  /* When we're not allowed to read anything */
  if (length <= 0) {
    goto badlen;
  }

  /*
   * one character sequence (7-bit value)
   *	00000-0007F => T1
   */
  c = *(uchar *)str;
  if (c < Tx) {
    *rune = (Rune)c;
    return 1;
  }

  // If we can't read more than one character we must stop
  if (length <= 1) {
    goto badlen;
  }

  /*
   * two character sequence (11-bit value)
   *	0080-07FF => T2 Tx
   */
  c1 = *(uchar *)(str + 1) ^ Tx;
  if (c1 & Testx) goto bad;
  if (c < T3) {
    if (c < T2) goto bad;
    l = ((c << Bitx) | c1) & Rune2;
    if (l <= Rune1) goto bad;
    *rune = (Rune)l;
    return 2;
  }

  // If we can't read more than two characters we must stop
  if (length <= 2) {
    goto badlen;
  }

  /*
   * three character sequence (16-bit value)
   *	0800-FFFF => T3 Tx Tx
   */
  c2 = *(uchar *)(str + 2) ^ Tx;
  if (c2 & Testx) goto bad;
  if (c < T4) {
    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
    if (l <= Rune2) goto bad;
    if (SurrogateMin <= l && l <= SurrogateMax) goto bad;
    *rune = (Rune)l;
    return 3;
  }

  if (length <= 3) goto badlen;

  /*
   * four character sequence (21-bit value)
   *	10000-1FFFFF => T4 Tx Tx Tx
   */
  c3 = *(uchar *)(str + 3) ^ Tx;
  if (c3 & Testx) goto bad;
  if (c < T5) {
    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
    if (l <= Rune3 || l > Runemax) goto bad;
    *rune = (Rune)l;
    return 4;
  }

  // Support for 5-byte or longer UTF-8 would go here, but
  // since we don't have that, we'll just fall through to bad.

  /*
   * bad decoding
   */
 bad:
  *rune = Bad;
  return 1;
 badlen:
  *rune = Bad;
  return 0;
 }

 /*
 * This is the older "unsafe" version, which works fine on
 * null-terminated strings.
 */
 int chartorune(Rune *rune, const char *str) {
  int c, c1, c2, c3;
  long l;

  /*
   * one character sequence
   *	00000-0007F => T1
   */
  c = *(uchar *)str;
  if (c < Tx) {
    *rune = (Rune)c;
    return 1;
  }

  /*
   * two character sequence
   *	0080-07FF => T2 Tx
   */
  c1 = *(uchar *)(str + 1) ^ Tx;
  if (c1 & Testx) goto bad;
  if (c < T3) {
    if (c < T2) goto bad;
    l = ((c << Bitx) | c1) & Rune2;
    if (l <= Rune1) goto bad;
    *rune = (Rune)l;
    return 2;
  }

  /*
   * three character sequence
   *	0800-FFFF => T3 Tx Tx
   */
  c2 = *(uchar *)(str + 2) ^ Tx;
  if (c2 & Testx) goto bad;
  if (c < T4) {
    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
    if (l <= Rune2) goto bad;
    if (SurrogateMin <= l && l <= SurrogateMax) goto bad;
    *rune = (Rune)l;
    return 3;
  }

  /*
   * four character sequence (21-bit value)
   *	10000-1FFFFF => T4 Tx Tx Tx
   */
  c3 = *(uchar *)(str + 3) ^ Tx;
  if (c3 & Testx) goto bad;
  if (c < T5) {
    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
    if (l <= Rune3 || l > Runemax) goto bad;
    *rune = (Rune)l;
    return 4;
  }

  /*
   * Support for 5-byte or longer UTF-8 would go here, but
   * since we don't have that, we'll just fall through to bad.
   */

  /*
   * bad decoding
   */
 bad:
  *rune = Bad;
  return 1;
 }

 int isvalidcharntorune(const char *str, int length, Rune *rune, int *consumed) {
  *consumed = charntorune(rune, str, length);
  return *rune != Runeerror || *consumed == 3;
 }

 int runetochar(char *str, const Rune *rune) {
  /* Runes are signed, so convert to unsigned for range check. */
  unsigned long c;

  /*
   * one character sequence
   *	00000-0007F => 00-7F
   */
  c = *rune;
  if (c <= Rune1) {
    str[0] = (char)c;
    return 1;
  }

  /*
   * two character sequence
   *	0080-07FF => T2 Tx
   */
  if (c <= Rune2) {
    str[0] = (char)(T2 | (c >> 1 * Bitx));
    str[1] = (char)(Tx | (c & Maskx));
    return 2;
  }

  /*
   * If the Rune is out of range or a surrogate half, convert it to the error
   * rune. Do this test here because the error rune encodes to three bytes.
   * Doing it earlier would duplicate work, since an out of range
   * Rune wouldn't have fit in one or two bytes.
   */
  if (c > Runemax) c = Runeerror;
  if (SurrogateMin <= c && c <= SurrogateMax) c = Runeerror;

  /*
   * three character sequence
   *	0800-FFFF => T3 Tx Tx
   */
  if (c <= Rune3) {
    str[0] = (char)(T3 | (c >> 2 * Bitx));
    str[1] = (char)(Tx | ((c >> 1 * Bitx) & Maskx));
    str[2] = (char)(Tx | (c & Maskx));
    return 3;
  }

  /*
   * four character sequence (21-bit value)
   *     10000-1FFFFF => T4 Tx Tx Tx
   */
  str[0] = (char)(T4 | (c >> 3 * Bitx));
  str[1] = (char)(Tx | ((c >> 2 * Bitx) & Maskx));
  str[2] = (char)(Tx | ((c >> 1 * Bitx) & Maskx));
  str[3] = (char)(Tx | (c & Maskx));
  return 4;
 }

 int runelen(Rune rune) {
  char str[10];

  return runetochar(str, &rune);
 }

 int runenlen(const Rune *r, int nrune) {
  int nb, c;

  nb = 0;
  while (nrune--) {
    c = (int)*r++;
    if (c <= Rune1)
      nb++;
    else if (c <= Rune2)
      nb += 2;
    else if (c <= Rune3)
      nb += 3;
    else /* assert(c <= Rune4) */
      nb += 4;
  }
  return nb;
 }

 int fullrune(const char *str, int n) {
  if (n > 0) {
    int c = *(uchar *)str;
    if (c < Tx) return 1;
    if (n > 1) {
      if (c < T3) return 1;
      if (n > 2) {
        if (c < T4 || n > 3) return 1;
      }
    }
  }
  return 0;
 }

 }  // namespace phonenumbers
 }  // namespace i18n
--- a/cpp/src/phonenumbers/utf/utf.h
+++ b/cpp/src/phonenumbers/utf/utf.h
@ -1,33 +1,36 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 1998-2002 by Lucent Technologies.
 *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 *              Portions Copyright (c) 2009 The Go Authors.  All rights
 * reserved. Permission to use, copy, modify, and distribute this software for
 * any purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
 * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */

 #ifndef _UTFH_
 #define _UTFH_ 1

 typedef unsigned int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
 namespace i18n {
 namespace phonenumbers {

 enum
 {
  UTFmax	= 4,		/* maximum bytes per rune */
  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
  Runeerror	= 0xFFFD,	/* decoding error in UTF */
  Runemax	= 0x10FFFF,	/* maximum rune value */
 typedef unsigned int
    Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/

 enum {
  UTFmax = 4,         /* maximum bytes per rune */
  Runesync = 0x80,    /* cannot represent part of a UTF sequence (<) */
  Runeself = 0x80,    /* rune and UTF sequences are the same (<) */
  Runeerror = 0xFFFD, /* decoding error in UTF */
  Runemax = 0x10FFFF, /* maximum rune value */
 };

 #ifdef	__cplusplus
 #ifdef __cplusplus
 extern "C" {
 #endif

@ -41,14 +44,13 @@ extern "C" {
 * SEE ALSO
 * utf (7)
 * tcs (1)
 */
 */

 // runetochar copies (encodes) one rune, pointed to by r, to at most
 // UTFmax bytes starting at s and returns the number of bytes generated.

 int runetochar(char* s, const Rune* r);


 // chartorune copies (decodes) at most UTFmax bytes starting at s to
 // one rune, pointed to by r, and returns the number of bytes consumed.
 // If the input is not exactly in UTF format, chartorune will set *r
@ -61,7 +63,6 @@ int runetochar(char* s, const Rune* r);

 int chartorune(Rune* r, const char* s);


 // charntorune is like chartorune, except that it will access at most
 // n bytes of s.  If the UTF sequence is incomplete within n bytes,
 // charntorune will set *r to Runeerror and return 0. If it is complete
@ -82,13 +83,11 @@ int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);

 int runelen(Rune r);


 // runenlen returns the number of bytes required to convert the n
 // runes pointed to by r into UTF.

 int runenlen(const Rune* r, int n);


 // fullrune returns 1 if the string s of length n is long enough to be
 // decoded by chartorune, and 0 otherwise. This does not guarantee
 // that the string contains a legal UTF encoding. This routine is used
@ -106,7 +105,6 @@ int fullrune(const char* s, int n);

 int utflen(const char* s);


 // utfnlen returns the number of complete runes that are represented
 // by the first n bytes of the UTF string s. If the last few bytes of
 // the string contain an incompletely coded rune, utfnlen will not
@ -115,7 +113,6 @@ int utflen(const char* s);

 int utfnlen(const char* s, long n);


 // utfrune returns a pointer to the first occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
@ -123,7 +120,6 @@ int utfnlen(const char* s, long n);

 /*const*/ char* utfrune(const char* s, Rune r);


 // utfrrune returns a pointer to the last occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
@ -131,22 +127,18 @@ int utfnlen(const char* s, long n);

 /*const*/ char* utfrrune(const char* s, Rune r);


 // utfutf returns a pointer to the first occurrence of the UTF string
 // s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
 // null string, utfutf returns s1. (cf. strstr)

 const char* utfutf(const char* s1, const char* s2);


 // utfecpy copies UTF sequences until a null sequence has been copied,
 // but writes no sequences beyond es1.  If any sequences are copied,
 // s1 is terminated by a null sequence, and a pointer to that sequence
 // is returned.  Otherwise, the original s1 is returned. (cf. strecpy)

 char* utfecpy(char *s1, char *es1, const char *s2);


 char* utfecpy(char* s1, char* es1, const char* s2);

 // These functions are rune-string analogues of the corresponding
 // functions in strcat (3).
@ -177,8 +169,6 @@ const Rune* runestrrchr(const Rune* s, Rune c);
 long runestrlen(const Rune* s);
 const Rune* runestrstr(const Rune* s1, const Rune* s2);



 // The following routines test types and modify cases for Unicode
 // characters.  Unicode defines some characters as letters and
 // specifies three cases: upper, lower, and title.  Mappings among the
@ -200,7 +190,6 @@ Rune toupperrune(Rune r);
 Rune tolowerrune(Rune r);
 Rune totitlerune(Rune r);


 // isupperrune tests for upper case characters, including Unicode
 // upper case letters and targets of the toupper mapping. islowerrune
 // and istitlerune are defined analogously.
@ -209,31 +198,30 @@ int isupperrune(Rune r);
 int islowerrune(Rune r);
 int istitlerune(Rune r);


 // isalpharune tests for Unicode letters; this includes ideographs in
 // addition to alphabetic characters.

 int isalpharune(Rune r);


 // isdigitrune tests for digits. Non-digit numbers, such as Roman
 // numerals, are not included.

 int isdigitrune(Rune r);


 // isspacerune tests for whitespace characters, including "C" locale
 // whitespace, Unicode defined whitespace, and the "zero-width
 // non-break space" character.

 int isspacerune(Rune r);


 // (The comments in this file were copied from the manpage files rune.3,
 // isalpharune.3, and runestrcat.3. Some formatting changes were also made
 // to conform to Google style. /JRM 11/11/05)

 #ifdef	__cplusplus
 }  // namespace phonenumbers
 }  // namespace phonenumbers

 #ifdef __cplusplus
 }
 #endif

--- a/cpp/src/phonenumbers/utf/utfdef.h
+++ b/cpp/src/phonenumbers/utf/utfdef.h
@ -7,11 +7,14 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF
 * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */

 #ifndef UTIL_UTF8_UTFDEF_H__
 #define UTIL_UTF8_UTFDEF_H__

 #define uchar _utfuchar
 #define ushort _utfushort
 #define uint _utfuint
@ -19,9 +22,11 @@
 #define vlong _utfvlong
 #define uvlong _utfuvlong

 typedef unsigned char		uchar;
 typedef unsigned short		ushort;
 typedef unsigned int		uint;
 typedef unsigned long		ulong;
 typedef unsigned char uchar;
 typedef unsigned short ushort;
 typedef unsigned int uint;
 typedef unsigned long ulong;

 #define nelem(x) (sizeof(x) / sizeof((x)[0]))

 #define nelem(x) (sizeof(x)/sizeof((x)[0]))
 #endif  // UTIL_UTF8_UTFDEF_H__
--- a/debian/copyright
+++ b/debian/copyright
@ -16,7 +16,7 @@ License: Apache-2.0
 License version 2.0 can be found in the file
 `/usr/share/common-licenses/Apache-2.0'.

 Files: cpp/src/phonenumbers/utf/rune.c
 Files: cpp/src/phonenumbers/utf/rune.cc
 cpp/src/phonenumbers/utf/utf.h
 cpp/src/phonenumbers/utf/utfdef.h
 Copyright: 1998-2002, Lucent Technologies