|
|
@ -1,33 +1,36 @@ |
|
|
/* |
|
|
/* |
|
|
* The authors of this software are Rob Pike and Ken Thompson. |
|
|
* The authors of this software are Rob Pike and Ken Thompson. |
|
|
* Copyright (c) 1998-2002 by Lucent Technologies. |
|
|
* Copyright (c) 1998-2002 by Lucent Technologies. |
|
|
* Portions Copyright (c) 2009 The Go Authors. All rights reserved. |
|
|
|
|
|
* Permission to use, copy, modify, and distribute this software for any |
|
|
|
|
|
* purpose without fee is hereby granted, provided that this entire notice |
|
|
|
|
|
|
|
|
* Portions Copyright (c) 2009 The Go Authors. All rights |
|
|
|
|
|
* reserved. Permission to use, copy, modify, and distribute this software for |
|
|
|
|
|
* any purpose without fee is hereby granted, provided that this entire notice |
|
|
* is included in all copies of any software which is or includes a copy |
|
|
* is included in all copies of any software which is or includes a copy |
|
|
* or modification of this software and in all copies of the supporting |
|
|
* or modification of this software and in all copies of the supporting |
|
|
* documentation for such software. |
|
|
* documentation for such software. |
|
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED |
|
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED |
|
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY |
|
|
|
|
|
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY |
|
|
|
|
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. |
|
|
|
|
|
|
|
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE |
|
|
|
|
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY OF |
|
|
|
|
|
* THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. |
|
|
*/ |
|
|
*/ |
|
|
|
|
|
|
|
|
#ifndef _UTFH_ |
|
|
#ifndef _UTFH_ |
|
|
#define _UTFH_ 1 |
|
|
#define _UTFH_ 1 |
|
|
|
|
|
|
|
|
typedef unsigned int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ |
|
|
|
|
|
|
|
|
namespace i18n { |
|
|
|
|
|
namespace phonenumbers { |
|
|
|
|
|
|
|
|
enum |
|
|
|
|
|
{ |
|
|
|
|
|
UTFmax = 4, /* maximum bytes per rune */ |
|
|
|
|
|
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ |
|
|
|
|
|
Runeself = 0x80, /* rune and UTF sequences are the same (<) */ |
|
|
|
|
|
Runeerror = 0xFFFD, /* decoding error in UTF */ |
|
|
|
|
|
Runemax = 0x10FFFF, /* maximum rune value */ |
|
|
|
|
|
|
|
|
typedef unsigned int |
|
|
|
|
|
Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ |
|
|
|
|
|
|
|
|
|
|
|
enum { |
|
|
|
|
|
UTFmax = 4, /* maximum bytes per rune */ |
|
|
|
|
|
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ |
|
|
|
|
|
Runeself = 0x80, /* rune and UTF sequences are the same (<) */ |
|
|
|
|
|
Runeerror = 0xFFFD, /* decoding error in UTF */ |
|
|
|
|
|
Runemax = 0x10FFFF, /* maximum rune value */ |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
|
extern "C" { |
|
|
extern "C" { |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
@ -41,14 +44,13 @@ extern "C" { |
|
|
* SEE ALSO |
|
|
* SEE ALSO |
|
|
* utf (7) |
|
|
* utf (7) |
|
|
* tcs (1) |
|
|
* tcs (1) |
|
|
*/ |
|
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
// runetochar copies (encodes) one rune, pointed to by r, to at most |
|
|
// runetochar copies (encodes) one rune, pointed to by r, to at most |
|
|
// UTFmax bytes starting at s and returns the number of bytes generated. |
|
|
// UTFmax bytes starting at s and returns the number of bytes generated. |
|
|
|
|
|
|
|
|
int runetochar(char* s, const Rune* r); |
|
|
int runetochar(char* s, const Rune* r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// chartorune copies (decodes) at most UTFmax bytes starting at s to |
|
|
// chartorune copies (decodes) at most UTFmax bytes starting at s to |
|
|
// one rune, pointed to by r, and returns the number of bytes consumed. |
|
|
// one rune, pointed to by r, and returns the number of bytes consumed. |
|
|
// If the input is not exactly in UTF format, chartorune will set *r |
|
|
// If the input is not exactly in UTF format, chartorune will set *r |
|
|
@ -61,7 +63,6 @@ int runetochar(char* s, const Rune* r); |
|
|
|
|
|
|
|
|
int chartorune(Rune* r, const char* s); |
|
|
int chartorune(Rune* r, const char* s); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// charntorune is like chartorune, except that it will access at most |
|
|
// charntorune is like chartorune, except that it will access at most |
|
|
// n bytes of s. If the UTF sequence is incomplete within n bytes, |
|
|
// n bytes of s. If the UTF sequence is incomplete within n bytes, |
|
|
// charntorune will set *r to Runeerror and return 0. If it is complete |
|
|
// charntorune will set *r to Runeerror and return 0. If it is complete |
|
|
@ -82,13 +83,11 @@ int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed); |
|
|
|
|
|
|
|
|
int runelen(Rune r); |
|
|
int runelen(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// runenlen returns the number of bytes required to convert the n |
|
|
// runenlen returns the number of bytes required to convert the n |
|
|
// runes pointed to by r into UTF. |
|
|
// runes pointed to by r into UTF. |
|
|
|
|
|
|
|
|
int runenlen(const Rune* r, int n); |
|
|
int runenlen(const Rune* r, int n); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// fullrune returns 1 if the string s of length n is long enough to be |
|
|
// fullrune returns 1 if the string s of length n is long enough to be |
|
|
// decoded by chartorune, and 0 otherwise. This does not guarantee |
|
|
// decoded by chartorune, and 0 otherwise. This does not guarantee |
|
|
// that the string contains a legal UTF encoding. This routine is used |
|
|
// that the string contains a legal UTF encoding. This routine is used |
|
|
@ -106,7 +105,6 @@ int fullrune(const char* s, int n); |
|
|
|
|
|
|
|
|
int utflen(const char* s); |
|
|
int utflen(const char* s); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// utfnlen returns the number of complete runes that are represented |
|
|
// utfnlen returns the number of complete runes that are represented |
|
|
// by the first n bytes of the UTF string s. If the last few bytes of |
|
|
// by the first n bytes of the UTF string s. If the last few bytes of |
|
|
// the string contain an incompletely coded rune, utfnlen will not |
|
|
// the string contain an incompletely coded rune, utfnlen will not |
|
|
@ -115,7 +113,6 @@ int utflen(const char* s); |
|
|
|
|
|
|
|
|
int utfnlen(const char* s, long n); |
|
|
int utfnlen(const char* s, long n); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// utfrune returns a pointer to the first occurrence of rune r in the |
|
|
// utfrune returns a pointer to the first occurrence of rune r in the |
|
|
// UTF string s, or 0 if r does not occur in the string. The NULL |
|
|
// UTF string s, or 0 if r does not occur in the string. The NULL |
|
|
// byte terminating a string is considered to be part of the string s. |
|
|
// byte terminating a string is considered to be part of the string s. |
|
|
@ -123,7 +120,6 @@ int utfnlen(const char* s, long n); |
|
|
|
|
|
|
|
|
/*const*/ char* utfrune(const char* s, Rune r); |
|
|
/*const*/ char* utfrune(const char* s, Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// utfrrune returns a pointer to the last occurrence of rune r in the |
|
|
// utfrrune returns a pointer to the last occurrence of rune r in the |
|
|
// UTF string s, or 0 if r does not occur in the string. The NULL |
|
|
// UTF string s, or 0 if r does not occur in the string. The NULL |
|
|
// byte terminating a string is considered to be part of the string s. |
|
|
// byte terminating a string is considered to be part of the string s. |
|
|
@ -131,22 +127,18 @@ int utfnlen(const char* s, long n); |
|
|
|
|
|
|
|
|
/*const*/ char* utfrrune(const char* s, Rune r); |
|
|
/*const*/ char* utfrrune(const char* s, Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// utfutf returns a pointer to the first occurrence of the UTF string |
|
|
// utfutf returns a pointer to the first occurrence of the UTF string |
|
|
// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the |
|
|
// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the |
|
|
// null string, utfutf returns s1. (cf. strstr) |
|
|
// null string, utfutf returns s1. (cf. strstr) |
|
|
|
|
|
|
|
|
const char* utfutf(const char* s1, const char* s2); |
|
|
const char* utfutf(const char* s1, const char* s2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// utfecpy copies UTF sequences until a null sequence has been copied, |
|
|
// utfecpy copies UTF sequences until a null sequence has been copied, |
|
|
// but writes no sequences beyond es1. If any sequences are copied, |
|
|
// but writes no sequences beyond es1. If any sequences are copied, |
|
|
// s1 is terminated by a null sequence, and a pointer to that sequence |
|
|
// s1 is terminated by a null sequence, and a pointer to that sequence |
|
|
// is returned. Otherwise, the original s1 is returned. (cf. strecpy) |
|
|
// is returned. Otherwise, the original s1 is returned. (cf. strecpy) |
|
|
|
|
|
|
|
|
char* utfecpy(char *s1, char *es1, const char *s2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char* utfecpy(char* s1, char* es1, const char* s2); |
|
|
|
|
|
|
|
|
// These functions are rune-string analogues of the corresponding |
|
|
// These functions are rune-string analogues of the corresponding |
|
|
// functions in strcat (3). |
|
|
// functions in strcat (3). |
|
|
@ -177,8 +169,6 @@ const Rune* runestrrchr(const Rune* s, Rune c); |
|
|
long runestrlen(const Rune* s); |
|
|
long runestrlen(const Rune* s); |
|
|
const Rune* runestrstr(const Rune* s1, const Rune* s2); |
|
|
const Rune* runestrstr(const Rune* s1, const Rune* s2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The following routines test types and modify cases for Unicode |
|
|
// The following routines test types and modify cases for Unicode |
|
|
// characters. Unicode defines some characters as letters and |
|
|
// characters. Unicode defines some characters as letters and |
|
|
// specifies three cases: upper, lower, and title. Mappings among the |
|
|
// specifies three cases: upper, lower, and title. Mappings among the |
|
|
@ -200,7 +190,6 @@ Rune toupperrune(Rune r); |
|
|
Rune tolowerrune(Rune r); |
|
|
Rune tolowerrune(Rune r); |
|
|
Rune totitlerune(Rune r); |
|
|
Rune totitlerune(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// isupperrune tests for upper case characters, including Unicode |
|
|
// isupperrune tests for upper case characters, including Unicode |
|
|
// upper case letters and targets of the toupper mapping. islowerrune |
|
|
// upper case letters and targets of the toupper mapping. islowerrune |
|
|
// and istitlerune are defined analogously. |
|
|
// and istitlerune are defined analogously. |
|
|
@ -209,31 +198,30 @@ int isupperrune(Rune r); |
|
|
int islowerrune(Rune r); |
|
|
int islowerrune(Rune r); |
|
|
int istitlerune(Rune r); |
|
|
int istitlerune(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// isalpharune tests for Unicode letters; this includes ideographs in |
|
|
// isalpharune tests for Unicode letters; this includes ideographs in |
|
|
// addition to alphabetic characters. |
|
|
// addition to alphabetic characters. |
|
|
|
|
|
|
|
|
int isalpharune(Rune r); |
|
|
int isalpharune(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// isdigitrune tests for digits. Non-digit numbers, such as Roman |
|
|
// isdigitrune tests for digits. Non-digit numbers, such as Roman |
|
|
// numerals, are not included. |
|
|
// numerals, are not included. |
|
|
|
|
|
|
|
|
int isdigitrune(Rune r); |
|
|
int isdigitrune(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// isspacerune tests for whitespace characters, including "C" locale |
|
|
// isspacerune tests for whitespace characters, including "C" locale |
|
|
// whitespace, Unicode defined whitespace, and the "zero-width |
|
|
// whitespace, Unicode defined whitespace, and the "zero-width |
|
|
// non-break space" character. |
|
|
// non-break space" character. |
|
|
|
|
|
|
|
|
int isspacerune(Rune r); |
|
|
int isspacerune(Rune r); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// (The comments in this file were copied from the manpage files rune.3, |
|
|
// (The comments in this file were copied from the manpage files rune.3, |
|
|
// isalpharune.3, and runestrcat.3. Some formatting changes were also made |
|
|
// isalpharune.3, and runestrcat.3. Some formatting changes were also made |
|
|
// to conform to Google style. /JRM 11/11/05) |
|
|
// to conform to Google style. /JRM 11/11/05) |
|
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
|
|
|
|
|
|
|
} // namespace phonenumbers |
|
|
|
|
|
} // namespace phonenumbers |
|
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus |
|
|
} |
|
|
} |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
|