xref: /onnv-gate/usr/src/lib/gss_mechs/mech_krb5/include/k5-utf8.h (revision 10598:6f30db2c2cd0)
1*10598SGlenn.Barry@Sun.COM /*
2*10598SGlenn.Barry@Sun.COM  * Copyright (C) 2008 by the Massachusetts Institute of Technology,
3*10598SGlenn.Barry@Sun.COM  * Cambridge, MA, USA.  All Rights Reserved.
4*10598SGlenn.Barry@Sun.COM  *
5*10598SGlenn.Barry@Sun.COM  * This software is being provided to you, the LICENSEE, by the
6*10598SGlenn.Barry@Sun.COM  * Massachusetts Institute of Technology (M.I.T.) under the following
7*10598SGlenn.Barry@Sun.COM  * license.  By obtaining, using and/or copying this software, you agree
8*10598SGlenn.Barry@Sun.COM  * that you have read, understood, and will comply with these terms and
9*10598SGlenn.Barry@Sun.COM  * conditions:
10*10598SGlenn.Barry@Sun.COM  *
11*10598SGlenn.Barry@Sun.COM  * Export of this software from the United States of America may
12*10598SGlenn.Barry@Sun.COM  * require a specific license from the United States Government.
13*10598SGlenn.Barry@Sun.COM  * It is the responsibility of any person or organization contemplating
14*10598SGlenn.Barry@Sun.COM  * export to obtain such a license before exporting.
15*10598SGlenn.Barry@Sun.COM  *
16*10598SGlenn.Barry@Sun.COM  * WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute
17*10598SGlenn.Barry@Sun.COM  * this software and its documentation for any purpose and without fee or
18*10598SGlenn.Barry@Sun.COM  * royalty is hereby granted, provided that you agree to comply with the
19*10598SGlenn.Barry@Sun.COM  * following copyright notice and statements, including the disclaimer, and
20*10598SGlenn.Barry@Sun.COM  * that the same appear on ALL copies of the software and documentation,
21*10598SGlenn.Barry@Sun.COM  * including modifications that you make for internal use or for
22*10598SGlenn.Barry@Sun.COM  * distribution:
23*10598SGlenn.Barry@Sun.COM  *
24*10598SGlenn.Barry@Sun.COM  * THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS
25*10598SGlenn.Barry@Sun.COM  * OR WARRANTIES, EXPRESS OR IMPLIED.  By way of example, but not
26*10598SGlenn.Barry@Sun.COM  * limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF
27*10598SGlenn.Barry@Sun.COM  * MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF
28*10598SGlenn.Barry@Sun.COM  * THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
29*10598SGlenn.Barry@Sun.COM  * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
30*10598SGlenn.Barry@Sun.COM  *
31*10598SGlenn.Barry@Sun.COM  * The name of the Massachusetts Institute of Technology or M.I.T. may NOT
32*10598SGlenn.Barry@Sun.COM  * be used in advertising or publicity pertaining to distribution of the
33*10598SGlenn.Barry@Sun.COM  * software.  Title to copyright in this software and any associated
34*10598SGlenn.Barry@Sun.COM  * documentation shall at all times remain with M.I.T., and USER agrees to
35*10598SGlenn.Barry@Sun.COM  * preserve same.
36*10598SGlenn.Barry@Sun.COM  *
37*10598SGlenn.Barry@Sun.COM  * Furthermore if you modify this software you must label
38*10598SGlenn.Barry@Sun.COM  * your software as modified software and not distribute it in such a
39*10598SGlenn.Barry@Sun.COM  * fashion that it might be confused with the original M.I.T. software.
40*10598SGlenn.Barry@Sun.COM  */
41*10598SGlenn.Barry@Sun.COM /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
42*10598SGlenn.Barry@Sun.COM  *
43*10598SGlenn.Barry@Sun.COM  * Copyright 1998-2008 The OpenLDAP Foundation.
44*10598SGlenn.Barry@Sun.COM  * All rights reserved.
45*10598SGlenn.Barry@Sun.COM  *
46*10598SGlenn.Barry@Sun.COM  * Redistribution and use in source and binary forms, with or without
47*10598SGlenn.Barry@Sun.COM  * modification, are permitted only as authorized by the OpenLDAP
48*10598SGlenn.Barry@Sun.COM  * Public License.
49*10598SGlenn.Barry@Sun.COM  *
50*10598SGlenn.Barry@Sun.COM  * A copy of this license is available in file LICENSE in the
51*10598SGlenn.Barry@Sun.COM  * top-level directory of the distribution or, alternatively, at
52*10598SGlenn.Barry@Sun.COM  * <http://www.OpenLDAP.org/license.html>.
53*10598SGlenn.Barry@Sun.COM  */
54*10598SGlenn.Barry@Sun.COM /* This notice applies to changes, created by or for Novell, Inc.,
55*10598SGlenn.Barry@Sun.COM  * to preexisting works for which notices appear elsewhere in this file.
56*10598SGlenn.Barry@Sun.COM  *
57*10598SGlenn.Barry@Sun.COM  * Copyright (C) 2000 Novell, Inc. All Rights Reserved.
58*10598SGlenn.Barry@Sun.COM  *
59*10598SGlenn.Barry@Sun.COM  * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
60*10598SGlenn.Barry@Sun.COM  * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
61*10598SGlenn.Barry@Sun.COM  * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
62*10598SGlenn.Barry@Sun.COM  * HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
63*10598SGlenn.Barry@Sun.COM  * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
64*10598SGlenn.Barry@Sun.COM  * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
65*10598SGlenn.Barry@Sun.COM  * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
66*10598SGlenn.Barry@Sun.COM  * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
67*10598SGlenn.Barry@Sun.COM  */
68*10598SGlenn.Barry@Sun.COM 
69*10598SGlenn.Barry@Sun.COM #ifndef K5_UTF8_H
70*10598SGlenn.Barry@Sun.COM #define K5_UTF8_H
71*10598SGlenn.Barry@Sun.COM 
72*10598SGlenn.Barry@Sun.COM #include "autoconf.h"
73*10598SGlenn.Barry@Sun.COM 
74*10598SGlenn.Barry@Sun.COM #ifdef HAVE_SYS_TYPES_H
75*10598SGlenn.Barry@Sun.COM #include <sys/types.h>
76*10598SGlenn.Barry@Sun.COM #endif
77*10598SGlenn.Barry@Sun.COM 
78*10598SGlenn.Barry@Sun.COM #ifdef HAVE_UNISTD_H
79*10598SGlenn.Barry@Sun.COM #include <unistd.h>
80*10598SGlenn.Barry@Sun.COM #endif
81*10598SGlenn.Barry@Sun.COM 
82*10598SGlenn.Barry@Sun.COM #ifdef HAVE_STDLIB_H
83*10598SGlenn.Barry@Sun.COM #include <stdlib.h>
84*10598SGlenn.Barry@Sun.COM #endif
85*10598SGlenn.Barry@Sun.COM 
86*10598SGlenn.Barry@Sun.COM #if INT_MAX == 0x7fff
87*10598SGlenn.Barry@Sun.COM typedef	unsigned int	krb5_ucs2;
88*10598SGlenn.Barry@Sun.COM #elif SHRT_MAX == 0x7fff
89*10598SGlenn.Barry@Sun.COM typedef	unsigned short	krb5_ucs2;
90*10598SGlenn.Barry@Sun.COM #else
91*10598SGlenn.Barry@Sun.COM #error undefined 16 bit type
92*10598SGlenn.Barry@Sun.COM #endif
93*10598SGlenn.Barry@Sun.COM 
94*10598SGlenn.Barry@Sun.COM #if INT_MAX == 0x7fffffffL
95*10598SGlenn.Barry@Sun.COM typedef int	krb5_ucs4;
96*10598SGlenn.Barry@Sun.COM #elif LONG_MAX == 0x7fffffffL
97*10598SGlenn.Barry@Sun.COM typedef long	krb5_ucs4;
98*10598SGlenn.Barry@Sun.COM #elif SHRT_MAX == 0x7fffffffL
99*10598SGlenn.Barry@Sun.COM typedef short	krb5_ucs4;
100*10598SGlenn.Barry@Sun.COM #else
101*10598SGlenn.Barry@Sun.COM #error: undefined 32 bit type
102*10598SGlenn.Barry@Sun.COM #endif
103*10598SGlenn.Barry@Sun.COM 
104*10598SGlenn.Barry@Sun.COM #define KRB5_MAX_UTF8_LEN   (sizeof(krb5_ucs2) * 3/2)
105*10598SGlenn.Barry@Sun.COM 
106*10598SGlenn.Barry@Sun.COM int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
107*10598SGlenn.Barry@Sun.COM size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
108*10598SGlenn.Barry@Sun.COM 
109*10598SGlenn.Barry@Sun.COM int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
110*10598SGlenn.Barry@Sun.COM size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
111*10598SGlenn.Barry@Sun.COM 
112*10598SGlenn.Barry@Sun.COM int
113*10598SGlenn.Barry@Sun.COM krb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s,
114*10598SGlenn.Barry@Sun.COM 		       char **utf8s,
115*10598SGlenn.Barry@Sun.COM 		       size_t *utf8slen);
116*10598SGlenn.Barry@Sun.COM 
117*10598SGlenn.Barry@Sun.COM int
118*10598SGlenn.Barry@Sun.COM krb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s,
119*10598SGlenn.Barry@Sun.COM 			size_t ucs2slen,
120*10598SGlenn.Barry@Sun.COM 		        char **utf8s,
121*10598SGlenn.Barry@Sun.COM 		        size_t *utf8slen);
122*10598SGlenn.Barry@Sun.COM 
123*10598SGlenn.Barry@Sun.COM int
124*10598SGlenn.Barry@Sun.COM krb5int_ucs2les_to_utf8s(const unsigned char *ucs2les,
125*10598SGlenn.Barry@Sun.COM 			 char **utf8s,
126*10598SGlenn.Barry@Sun.COM 			 size_t *utf8slen);
127*10598SGlenn.Barry@Sun.COM 
128*10598SGlenn.Barry@Sun.COM int
129*10598SGlenn.Barry@Sun.COM krb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les,
130*10598SGlenn.Barry@Sun.COM 			  size_t ucs2leslen,
131*10598SGlenn.Barry@Sun.COM 			  char **utf8s,
132*10598SGlenn.Barry@Sun.COM 			  size_t *utf8slen);
133*10598SGlenn.Barry@Sun.COM 
134*10598SGlenn.Barry@Sun.COM int
135*10598SGlenn.Barry@Sun.COM krb5int_utf8s_to_ucs2s(const char *utf8s,
136*10598SGlenn.Barry@Sun.COM 		       krb5_ucs2 **ucs2s,
137*10598SGlenn.Barry@Sun.COM 		       size_t *ucs2chars);
138*10598SGlenn.Barry@Sun.COM 
139*10598SGlenn.Barry@Sun.COM int
140*10598SGlenn.Barry@Sun.COM krb5int_utf8cs_to_ucs2s(const char *utf8s,
141*10598SGlenn.Barry@Sun.COM 			size_t utf8slen,
142*10598SGlenn.Barry@Sun.COM 		        krb5_ucs2 **ucs2s,
143*10598SGlenn.Barry@Sun.COM 		        size_t *ucs2chars);
144*10598SGlenn.Barry@Sun.COM 
145*10598SGlenn.Barry@Sun.COM int
146*10598SGlenn.Barry@Sun.COM krb5int_utf8s_to_ucs2les(const char *utf8s,
147*10598SGlenn.Barry@Sun.COM 			 unsigned char **ucs2les,
148*10598SGlenn.Barry@Sun.COM 		         size_t *ucs2leslen);
149*10598SGlenn.Barry@Sun.COM 
150*10598SGlenn.Barry@Sun.COM int
151*10598SGlenn.Barry@Sun.COM krb5int_utf8cs_to_ucs2les(const char *utf8s,
152*10598SGlenn.Barry@Sun.COM 			  size_t utf8slen,
153*10598SGlenn.Barry@Sun.COM 		          unsigned char **ucs2les,
154*10598SGlenn.Barry@Sun.COM 			  size_t *ucs2leslen);
155*10598SGlenn.Barry@Sun.COM 
156*10598SGlenn.Barry@Sun.COM /* returns the number of bytes in the UTF-8 string */
157*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_bytes(const char *);
158*10598SGlenn.Barry@Sun.COM /* returns the number of UTF-8 characters in the string */
159*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_chars(const char *);
160*10598SGlenn.Barry@Sun.COM /* returns the number of UTF-8 characters in the counted string */
161*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8c_chars(const char *, size_t);
162*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) of the UTF-8 character */
163*10598SGlenn.Barry@Sun.COM int krb5int_utf8_offset(const char *);
164*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) indicated by the UTF-8 character */
165*10598SGlenn.Barry@Sun.COM int krb5int_utf8_charlen(const char *);
166*10598SGlenn.Barry@Sun.COM 
167*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) indicated by the UTF-8 character
168*10598SGlenn.Barry@Sun.COM  * also checks that shortest possible encoding was used
169*10598SGlenn.Barry@Sun.COM  */
170*10598SGlenn.Barry@Sun.COM int krb5int_utf8_charlen2(const char *);
171*10598SGlenn.Barry@Sun.COM 
172*10598SGlenn.Barry@Sun.COM /* copies a UTF-8 character and returning number of bytes copied */
173*10598SGlenn.Barry@Sun.COM int krb5int_utf8_copy(char *, const char *);
174*10598SGlenn.Barry@Sun.COM 
175*10598SGlenn.Barry@Sun.COM /* returns pointer of next UTF-8 character in string */
176*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_next( const char *);
177*10598SGlenn.Barry@Sun.COM /* returns pointer of previous UTF-8 character in string */
178*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_prev( const char *);
179*10598SGlenn.Barry@Sun.COM 
180*10598SGlenn.Barry@Sun.COM /* primitive ctype routines -- not aware of non-ascii characters */
181*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isascii( const char *);
182*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isalpha( const char *);
183*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isalnum( const char *);
184*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isdigit( const char *);
185*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isxdigit( const char *);
186*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isspace( const char *);
187*10598SGlenn.Barry@Sun.COM 
188*10598SGlenn.Barry@Sun.COM /* span characters not in set, return bytes spanned */
189*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_strcspn( const char* str, const char *set);
190*10598SGlenn.Barry@Sun.COM /* span characters in set, return bytes spanned */
191*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_strspn( const char* str, const char *set);
192*10598SGlenn.Barry@Sun.COM /* return first occurance of character in string */
193*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strchr( const char* str, const char *chr);
194*10598SGlenn.Barry@Sun.COM /* return first character of set in string */
195*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strpbrk( const char* str, const char *set);
196*10598SGlenn.Barry@Sun.COM /* reentrant tokenizer */
197*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strtok( char* sp, const char* sep, char **last);
198*10598SGlenn.Barry@Sun.COM 
199*10598SGlenn.Barry@Sun.COM /* Optimizations */
200*10598SGlenn.Barry@Sun.COM extern const char krb5int_utf8_lentab[128];
201*10598SGlenn.Barry@Sun.COM extern const char krb5int_utf8_mintab[32];
202*10598SGlenn.Barry@Sun.COM 
203*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_ISASCII(p) ( !(*(const unsigned char *)(p) & 0x80 ) )
204*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_CHARLEN(p) ( KRB5_UTF8_ISASCII(p) \
205*10598SGlenn.Barry@Sun.COM 	? 1 : krb5int_utf8_lentab[*(const unsigned char *)(p) ^ 0x80] )
206*10598SGlenn.Barry@Sun.COM 
207*10598SGlenn.Barry@Sun.COM /* This is like CHARLEN but additionally validates to make sure
208*10598SGlenn.Barry@Sun.COM  * the char used the shortest possible encoding.
209*10598SGlenn.Barry@Sun.COM  * 'l' is used to temporarily hold the result of CHARLEN.
210*10598SGlenn.Barry@Sun.COM  */
211*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_CHARLEN2(p, l) ( ( ( l = KRB5_UTF8_CHARLEN( p )) < 3 || \
212*10598SGlenn.Barry@Sun.COM 	( krb5int_utf8_mintab[*(const unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \
213*10598SGlenn.Barry@Sun.COM 	l : 0 )
214*10598SGlenn.Barry@Sun.COM 
215*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_OFFSET(p) ( KRB5_UTF8_ISASCII(p) \
216*10598SGlenn.Barry@Sun.COM 	? 1 : krb5int_utf8_offset((p)) )
217*10598SGlenn.Barry@Sun.COM 
218*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_COPY(d,s) ( KRB5_UTF8_ISASCII(s) \
219*10598SGlenn.Barry@Sun.COM 	? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)) )
220*10598SGlenn.Barry@Sun.COM 
221*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_NEXT(p) (	KRB5_UTF8_ISASCII(p) \
222*10598SGlenn.Barry@Sun.COM 	? (char *)(p)+1 : krb5int_utf8_next((p)) )
223*10598SGlenn.Barry@Sun.COM 
224*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p))
225*10598SGlenn.Barry@Sun.COM 
226*10598SGlenn.Barry@Sun.COM /* For symmetry */
227*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p)))
228*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p)))
229*10598SGlenn.Barry@Sun.COM 
230*10598SGlenn.Barry@Sun.COM /*
231*10598SGlenn.Barry@Sun.COM  * these macros assume 'x' is an ASCII x
232*10598SGlenn.Barry@Sun.COM  * and assume the "C" locale
233*10598SGlenn.Barry@Sun.COM  */
234*10598SGlenn.Barry@Sun.COM #define KRB5_ASCII(c)		(!((c) & 0x80))
235*10598SGlenn.Barry@Sun.COM #define KRB5_SPACE(c)		((c) == ' ' || (c) == '\t' || (c) == '\n')
236*10598SGlenn.Barry@Sun.COM #define KRB5_DIGIT(c)		((c) >= '0' && (c) <= '9')
237*10598SGlenn.Barry@Sun.COM #define KRB5_LOWER(c)		((c) >= 'a' && (c) <= 'z')
238*10598SGlenn.Barry@Sun.COM #define KRB5_UPPER(c)		((c) >= 'A' && (c) <= 'Z')
239*10598SGlenn.Barry@Sun.COM #define KRB5_ALPHA(c)		(KRB5_LOWER(c) || KRB5_UPPER(c))
240*10598SGlenn.Barry@Sun.COM #define KRB5_ALNUM(c)		(KRB5_ALPHA(c) || KRB5_DIGIT(c))
241*10598SGlenn.Barry@Sun.COM 
242*10598SGlenn.Barry@Sun.COM #define KRB5_LDH(c)		(KRB5_ALNUM(c) || (c) == '-')
243*10598SGlenn.Barry@Sun.COM 
244*10598SGlenn.Barry@Sun.COM #define KRB5_HEXLOWER(c)	((c) >= 'a' && (c) <= 'f')
245*10598SGlenn.Barry@Sun.COM #define KRB5_HEXUPPER(c)	((c) >= 'A' && (c) <= 'F')
246*10598SGlenn.Barry@Sun.COM #define KRB5_HEX(c)		(KRB5_DIGIT(c) || \
247*10598SGlenn.Barry@Sun.COM 				KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c))
248*10598SGlenn.Barry@Sun.COM 
249*10598SGlenn.Barry@Sun.COM #endif /* K5_UTF8_H */
250