From e7ec4de1cc296d34fcca214aebd3a43378893173 Mon Sep 17 00:00:00 2001 From: mcrouse Date: Thu, 4 Aug 2022 09:22:33 -0700 Subject: [PATCH] done --- cpp/CMakeLists.txt | 2 +- cpp/src/phonenumbers/utf/rune.c | 358 ------------------------------ cpp/src/phonenumbers/utf/rune.cc | 325 +++++++++++++++++++++++++++ cpp/src/phonenumbers/utf/utf.h | 60 ++--- cpp/src/phonenumbers/utf/utfdef.h | 21 +- debian/copyright | 2 +- 6 files changed, 364 insertions(+), 404 deletions(-) delete mode 100644 cpp/src/phonenumbers/utf/rune.c create mode 100644 cpp/src/phonenumbers/utf/rune.cc diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 434c593bd..35a9a1cf9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -243,7 +243,7 @@ set ( "src/phonenumbers/string_byte_sink.cc" "src/phonenumbers/stringutil.cc" "src/phonenumbers/unicodestring.cc" - "src/phonenumbers/utf/rune.c" + "src/phonenumbers/utf/rune.cc" "src/phonenumbers/utf/unicodetext.cc" "src/phonenumbers/utf/unilib.cc" ) diff --git a/cpp/src/phonenumbers/utf/rune.c b/cpp/src/phonenumbers/utf/rune.c deleted file mode 100644 index b4aa93b5d..000000000 --- a/cpp/src/phonenumbers/utf/rune.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Portions Copyright (c) 2009 The Go Authors. All rights reserved. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include "phonenumbers/utf/utf.h" -#include "phonenumbers/utf/utfdef.h" - -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */ - - Maskx = (1< T1 - */ - c = *(uchar*)str; - if(c < Tx) { - *rune = (Rune)c; - return 1; - } - - // If we can't read more than one character we must stop - if(length <= 1) { - goto badlen; - } - - /* - * two character sequence (11-bit value) - * 0080-07FF => T2 Tx - */ - c1 = *(uchar*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = (Rune)l; - return 2; - } - - // If we can't read more than two characters we must stop - if(length <= 2) { - goto badlen; - } - - /* - * three character sequence (16-bit value) - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(uchar*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - if (SurrogateMin <= l && l <= SurrogateMax) - goto bad; - *rune = (Rune)l; - return 3; - } - - if (length <= 3) - goto badlen; - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(uchar*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3 || l > Runemax) - goto bad; - *rune = (Rune)l; - return 4; - } - - // Support for 5-byte or longer UTF-8 would go here, but - // since we don't have that, we'll just fall through to bad. - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -badlen: - *rune = Bad; - return 0; - -} - - -/* - * This is the older "unsafe" version, which works fine on - * null-terminated strings. - */ -int -chartorune(Rune *rune, const char *str) -{ - int c, c1, c2, c3; - long l; - - /* - * one character sequence - * 00000-0007F => T1 - */ - c = *(uchar*)str; - if(c < Tx) { - *rune = (Rune)c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(uchar*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = (Rune)l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(uchar*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - if (SurrogateMin <= l && l <= SurrogateMax) - goto bad; - *rune = (Rune)l; - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - c3 = *(uchar*)(str+3) ^ Tx; - if (c3 & Testx) - goto bad; - if (c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if (l <= Rune3 || l > Runemax) - goto bad; - *rune = (Rune)l; - return 4; - } - - /* - * Support for 5-byte or longer UTF-8 would go here, but - * since we don't have that, we'll just fall through to bad. - */ - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) -{ - *consumed = charntorune(rune, str, length); - return *rune != Runeerror || *consumed == 3; -} - -int -runetochar(char *str, const Rune *rune) -{ - /* Runes are signed, so convert to unsigned for range check. */ - unsigned long c; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - c = *rune; - if(c <= Rune1) { - str[0] = (char)c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - if(c <= Rune2) { - str[0] = (char)(T2 | (c >> 1*Bitx)); - str[1] = (char)(Tx | (c & Maskx)); - return 2; - } - - /* - * If the Rune is out of range or a surrogate half, convert it to the error rune. - * Do this test here because the error rune encodes to three bytes. - * Doing it earlier would duplicate work, since an out of range - * Rune wouldn't have fit in one or two bytes. - */ - if (c > Runemax) - c = Runeerror; - if (SurrogateMin <= c && c <= SurrogateMax) - c = Runeerror; - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - if (c <= Rune3) { - str[0] = (char)(T3 | (c >> 2*Bitx)); - str[1] = (char)(Tx | ((c >> 1*Bitx) & Maskx)); - str[2] = (char)(Tx | (c & Maskx)); - return 3; - } - - /* - * four character sequence (21-bit value) - * 10000-1FFFFF => T4 Tx Tx Tx - */ - str[0] = (char)(T4 | (c >> 3*Bitx)); - str[1] = (char)(Tx | ((c >> 2*Bitx) & Maskx)); - str[2] = (char)(Tx | ((c >> 1*Bitx) & Maskx)); - str[3] = (char)(Tx | (c & Maskx)); - return 4; -} - -int -runelen(Rune rune) -{ - char str[10]; - - return runetochar(str, &rune); -} - -int -runenlen(const Rune *r, int nrune) -{ - int nb, c; - - nb = 0; - while(nrune--) { - c = (int)*r++; - if (c <= Rune1) - nb++; - else if (c <= Rune2) - nb += 2; - else if (c <= Rune3) - nb += 3; - else /* assert(c <= Rune4) */ - nb += 4; - } - return nb; -} - -int -fullrune(const char *str, int n) -{ - if (n > 0) { - int c = *(uchar*)str; - if (c < Tx) - return 1; - if (n > 1) { - if (c < T3) - return 1; - if (n > 2) { - if (c < T4 || n > 3) - return 1; - } - } - } - return 0; -} diff --git a/cpp/src/phonenumbers/utf/rune.cc b/cpp/src/phonenumbers/utf/rune.cc new file mode 100644 index 000000000..4e5c24b8a --- /dev/null +++ b/cpp/src/phonenumbers/utf/rune.cc @@ -0,0 +1,325 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Portions Copyright (c) 2009 The Go Authors. All rights + * reserved. Permission to use, copy, modify, and distribute this software for + * any purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF + * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include "phonenumbers/utf/utf.h" +#include "phonenumbers/utf/utfdef.h" + +namespace i18n { +namespace phonenumbers { + +enum { + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1 << (Bit1 + 1)) - 1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1 << (Bitx + 1)) - 1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1 << (Bit2 + 1)) - 1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1 << (Bit3 + 1)) - 1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1 << (Bit4 + 1)) - 1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1 << (Bit5 + 1)) - 1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1 << (Bit1 + 0 * Bitx)) - 1, /* 0000 0000 0111 1111 */ + Rune2 = (1 << (Bit2 + 1 * Bitx)) - 1, /* 0000 0111 1111 1111 */ + Rune3 = (1 << (Bit3 + 2 * Bitx)) - 1, /* 1111 1111 1111 1111 */ + Rune4 = (1 << (Bit4 + 3 * Bitx)) - 1, /* 0001 1111 1111 1111 1111 1111 */ + + Maskx = (1 << Bitx) - 1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + SurrogateMin = 0xD800, + SurrogateMax = 0xDFFF, + + Bad = Runeerror, +}; + +/* + * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24 + * This is a slower but "safe" version of the old chartorune + * that works on strings that are not necessarily null-terminated. + * + * If you know for sure that your string is null-terminated, + * chartorune will be a bit faster. + * + * It is guaranteed not to attempt to access "length" + * past the incoming pointer. This is to avoid + * possible access violations. If the string appears to be + * well-formed but incomplete (i.e., to get the whole Rune + * we'd need to read past str+length) then we'll set the Rune + * to Bad and return 0. + * + * Note that if we have decoding problems for other + * reasons, we return 1 instead of 0. + */ +int charntorune(Rune *rune, const char *str, int length) { + int c, c1, c2, c3; + long l; + + /* When we're not allowed to read anything */ + if (length <= 0) { + goto badlen; + } + + /* + * one character sequence (7-bit value) + * 00000-0007F => T1 + */ + c = *(uchar *)str; + if (c < Tx) { + *rune = (Rune)c; + return 1; + } + + // If we can't read more than one character we must stop + if (length <= 1) { + goto badlen; + } + + /* + * two character sequence (11-bit value) + * 0080-07FF => T2 Tx + */ + c1 = *(uchar *)(str + 1) ^ Tx; + if (c1 & Testx) goto bad; + if (c < T3) { + if (c < T2) goto bad; + l = ((c << Bitx) | c1) & Rune2; + if (l <= Rune1) goto bad; + *rune = (Rune)l; + return 2; + } + + // If we can't read more than two characters we must stop + if (length <= 2) { + goto badlen; + } + + /* + * three character sequence (16-bit value) + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar *)(str + 2) ^ Tx; + if (c2 & Testx) goto bad; + if (c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if (l <= Rune2) goto bad; + if (SurrogateMin <= l && l <= SurrogateMax) goto bad; + *rune = (Rune)l; + return 3; + } + + if (length <= 3) goto badlen; + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(uchar *)(str + 3) ^ Tx; + if (c3 & Testx) goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3 || l > Runemax) goto bad; + *rune = (Rune)l; + return 4; + } + + // Support for 5-byte or longer UTF-8 would go here, but + // since we don't have that, we'll just fall through to bad. + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +badlen: + *rune = Bad; + return 0; +} + +/* + * This is the older "unsafe" version, which works fine on + * null-terminated strings. + */ +int chartorune(Rune *rune, const char *str) { + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(uchar *)str; + if (c < Tx) { + *rune = (Rune)c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(uchar *)(str + 1) ^ Tx; + if (c1 & Testx) goto bad; + if (c < T3) { + if (c < T2) goto bad; + l = ((c << Bitx) | c1) & Rune2; + if (l <= Rune1) goto bad; + *rune = (Rune)l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar *)(str + 2) ^ Tx; + if (c2 & Testx) goto bad; + if (c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if (l <= Rune2) goto bad; + if (SurrogateMin <= l && l <= SurrogateMax) goto bad; + *rune = (Rune)l; + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(uchar *)(str + 3) ^ Tx; + if (c3 & Testx) goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3 || l > Runemax) goto bad; + *rune = (Rune)l; + return 4; + } + + /* + * Support for 5-byte or longer UTF-8 would go here, but + * since we don't have that, we'll just fall through to bad. + */ + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int isvalidcharntorune(const char *str, int length, Rune *rune, int *consumed) { + *consumed = charntorune(rune, str, length); + return *rune != Runeerror || *consumed == 3; +} + +int runetochar(char *str, const Rune *rune) { + /* Runes are signed, so convert to unsigned for range check. */ + unsigned long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if (c <= Rune1) { + str[0] = (char)c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + if (c <= Rune2) { + str[0] = (char)(T2 | (c >> 1 * Bitx)); + str[1] = (char)(Tx | (c & Maskx)); + return 2; + } + + /* + * If the Rune is out of range or a surrogate half, convert it to the error + * rune. Do this test here because the error rune encodes to three bytes. + * Doing it earlier would duplicate work, since an out of range + * Rune wouldn't have fit in one or two bytes. + */ + if (c > Runemax) c = Runeerror; + if (SurrogateMin <= c && c <= SurrogateMax) c = Runeerror; + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + if (c <= Rune3) { + str[0] = (char)(T3 | (c >> 2 * Bitx)); + str[1] = (char)(Tx | ((c >> 1 * Bitx) & Maskx)); + str[2] = (char)(Tx | (c & Maskx)); + return 3; + } + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = (char)(T4 | (c >> 3 * Bitx)); + str[1] = (char)(Tx | ((c >> 2 * Bitx) & Maskx)); + str[2] = (char)(Tx | ((c >> 1 * Bitx) & Maskx)); + str[3] = (char)(Tx | (c & Maskx)); + return 4; +} + +int runelen(Rune rune) { + char str[10]; + + return runetochar(str, &rune); +} + +int runenlen(const Rune *r, int nrune) { + int nb, c; + + nb = 0; + while (nrune--) { + c = (int)*r++; + if (c <= Rune1) + nb++; + else if (c <= Rune2) + nb += 2; + else if (c <= Rune3) + nb += 3; + else /* assert(c <= Rune4) */ + nb += 4; + } + return nb; +} + +int fullrune(const char *str, int n) { + if (n > 0) { + int c = *(uchar *)str; + if (c < Tx) return 1; + if (n > 1) { + if (c < T3) return 1; + if (n > 2) { + if (c < T4 || n > 3) return 1; + } + } + } + return 0; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumbers/utf/utf.h b/cpp/src/phonenumbers/utf/utf.h index 72d01ed63..f3d16d59e 100644 --- a/cpp/src/phonenumbers/utf/utf.h +++ b/cpp/src/phonenumbers/utf/utf.h @@ -1,33 +1,36 @@ /* * The authors of this software are Rob Pike and Ken Thompson. * Copyright (c) 1998-2002 by Lucent Technologies. - * Portions Copyright (c) 2009 The Go Authors. All rights reserved. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice + * Portions Copyright (c) 2009 The Go Authors. All rights + * reserved. Permission to use, copy, modify, and distribute this software for + * any purpose without fee is hereby granted, provided that this entire notice * is included in all copies of any software which is or includes a copy * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF + * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #ifndef _UTFH_ #define _UTFH_ 1 -typedef unsigned int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ +namespace i18n { +namespace phonenumbers { -enum -{ - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF, /* maximum rune value */ +typedef unsigned int + Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ + +enum { + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* maximum rune value */ }; -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -41,14 +44,13 @@ extern "C" { * SEE ALSO * utf (7) * tcs (1) -*/ + */ // runetochar copies (encodes) one rune, pointed to by r, to at most // UTFmax bytes starting at s and returns the number of bytes generated. int runetochar(char* s, const Rune* r); - // chartorune copies (decodes) at most UTFmax bytes starting at s to // one rune, pointed to by r, and returns the number of bytes consumed. // If the input is not exactly in UTF format, chartorune will set *r @@ -61,7 +63,6 @@ int runetochar(char* s, const Rune* r); int chartorune(Rune* r, const char* s); - // charntorune is like chartorune, except that it will access at most // n bytes of s. If the UTF sequence is incomplete within n bytes, // charntorune will set *r to Runeerror and return 0. If it is complete @@ -82,13 +83,11 @@ int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed); int runelen(Rune r); - // runenlen returns the number of bytes required to convert the n // runes pointed to by r into UTF. int runenlen(const Rune* r, int n); - // fullrune returns 1 if the string s of length n is long enough to be // decoded by chartorune, and 0 otherwise. This does not guarantee // that the string contains a legal UTF encoding. This routine is used @@ -106,7 +105,6 @@ int fullrune(const char* s, int n); int utflen(const char* s); - // utfnlen returns the number of complete runes that are represented // by the first n bytes of the UTF string s. If the last few bytes of // the string contain an incompletely coded rune, utfnlen will not @@ -115,7 +113,6 @@ int utflen(const char* s); int utfnlen(const char* s, long n); - // utfrune returns a pointer to the first occurrence of rune r in the // UTF string s, or 0 if r does not occur in the string. The NULL // byte terminating a string is considered to be part of the string s. @@ -123,7 +120,6 @@ int utfnlen(const char* s, long n); /*const*/ char* utfrune(const char* s, Rune r); - // utfrrune returns a pointer to the last occurrence of rune r in the // UTF string s, or 0 if r does not occur in the string. The NULL // byte terminating a string is considered to be part of the string s. @@ -131,22 +127,18 @@ int utfnlen(const char* s, long n); /*const*/ char* utfrrune(const char* s, Rune r); - // utfutf returns a pointer to the first occurrence of the UTF string // s2 as a UTF substring of s1, or 0 if there is none. If s2 is the // null string, utfutf returns s1. (cf. strstr) const char* utfutf(const char* s1, const char* s2); - // utfecpy copies UTF sequences until a null sequence has been copied, // but writes no sequences beyond es1. If any sequences are copied, // s1 is terminated by a null sequence, and a pointer to that sequence // is returned. Otherwise, the original s1 is returned. (cf. strecpy) -char* utfecpy(char *s1, char *es1, const char *s2); - - +char* utfecpy(char* s1, char* es1, const char* s2); // These functions are rune-string analogues of the corresponding // functions in strcat (3). @@ -177,8 +169,6 @@ const Rune* runestrrchr(const Rune* s, Rune c); long runestrlen(const Rune* s); const Rune* runestrstr(const Rune* s1, const Rune* s2); - - // The following routines test types and modify cases for Unicode // characters. Unicode defines some characters as letters and // specifies three cases: upper, lower, and title. Mappings among the @@ -200,7 +190,6 @@ Rune toupperrune(Rune r); Rune tolowerrune(Rune r); Rune totitlerune(Rune r); - // isupperrune tests for upper case characters, including Unicode // upper case letters and targets of the toupper mapping. islowerrune // and istitlerune are defined analogously. @@ -209,31 +198,30 @@ int isupperrune(Rune r); int islowerrune(Rune r); int istitlerune(Rune r); - // isalpharune tests for Unicode letters; this includes ideographs in // addition to alphabetic characters. int isalpharune(Rune r); - // isdigitrune tests for digits. Non-digit numbers, such as Roman // numerals, are not included. int isdigitrune(Rune r); - // isspacerune tests for whitespace characters, including "C" locale // whitespace, Unicode defined whitespace, and the "zero-width // non-break space" character. int isspacerune(Rune r); - // (The comments in this file were copied from the manpage files rune.3, // isalpharune.3, and runestrcat.3. Some formatting changes were also made // to conform to Google style. /JRM 11/11/05) -#ifdef __cplusplus +} // namespace phonenumbers +} // namespace phonenumbers + +#ifdef __cplusplus } #endif diff --git a/cpp/src/phonenumbers/utf/utfdef.h b/cpp/src/phonenumbers/utf/utfdef.h index 4bbdfc643..4e69d12ba 100644 --- a/cpp/src/phonenumbers/utf/utfdef.h +++ b/cpp/src/phonenumbers/utf/utfdef.h @@ -7,11 +7,14 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF + * THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ +#ifndef UTIL_UTF8_UTFDEF_H__ +#define UTIL_UTF8_UTFDEF_H__ + #define uchar _utfuchar #define ushort _utfushort #define uint _utfuint @@ -19,9 +22,11 @@ #define vlong _utfvlong #define uvlong _utfuvlong -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#define nelem(x) (sizeof(x) / sizeof((x)[0])) -#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#endif // UTIL_UTF8_UTFDEF_H__ diff --git a/debian/copyright b/debian/copyright index d95c23874..7c54d8797 100644 --- a/debian/copyright +++ b/debian/copyright @@ -16,7 +16,7 @@ License: Apache-2.0 License version 2.0 can be found in the file `/usr/share/common-licenses/Apache-2.0'. -Files: cpp/src/phonenumbers/utf/rune.c +Files: cpp/src/phonenumbers/utf/rune.cc cpp/src/phonenumbers/utf/utf.h cpp/src/phonenumbers/utf/utfdef.h Copyright: 1998-2002, Lucent Technologies