1*10598SGlenn.Barry@Sun.COM /* 2*10598SGlenn.Barry@Sun.COM * Copyright (C) 2008 by the Massachusetts Institute of Technology, 3*10598SGlenn.Barry@Sun.COM * Cambridge, MA, USA. All Rights Reserved. 4*10598SGlenn.Barry@Sun.COM * 5*10598SGlenn.Barry@Sun.COM * This software is being provided to you, the LICENSEE, by the 6*10598SGlenn.Barry@Sun.COM * Massachusetts Institute of Technology (M.I.T.) under the following 7*10598SGlenn.Barry@Sun.COM * license. By obtaining, using and/or copying this software, you agree 8*10598SGlenn.Barry@Sun.COM * that you have read, understood, and will comply with these terms and 9*10598SGlenn.Barry@Sun.COM * conditions: 10*10598SGlenn.Barry@Sun.COM * 11*10598SGlenn.Barry@Sun.COM * Export of this software from the United States of America may 12*10598SGlenn.Barry@Sun.COM * require a specific license from the United States Government. 13*10598SGlenn.Barry@Sun.COM * It is the responsibility of any person or organization contemplating 14*10598SGlenn.Barry@Sun.COM * export to obtain such a license before exporting. 15*10598SGlenn.Barry@Sun.COM * 16*10598SGlenn.Barry@Sun.COM * WITHIN THAT CONSTRAINT, permission to use, copy, modify and distribute 17*10598SGlenn.Barry@Sun.COM * this software and its documentation for any purpose and without fee or 18*10598SGlenn.Barry@Sun.COM * royalty is hereby granted, provided that you agree to comply with the 19*10598SGlenn.Barry@Sun.COM * following copyright notice and statements, including the disclaimer, and 20*10598SGlenn.Barry@Sun.COM * that the same appear on ALL copies of the software and documentation, 21*10598SGlenn.Barry@Sun.COM * including modifications that you make for internal use or for 22*10598SGlenn.Barry@Sun.COM * distribution: 23*10598SGlenn.Barry@Sun.COM * 24*10598SGlenn.Barry@Sun.COM * THIS SOFTWARE IS PROVIDED "AS IS", AND M.I.T. MAKES NO REPRESENTATIONS 25*10598SGlenn.Barry@Sun.COM * OR WARRANTIES, EXPRESS OR IMPLIED. By way of example, but not 26*10598SGlenn.Barry@Sun.COM * limitation, M.I.T. MAKES NO REPRESENTATIONS OR WARRANTIES OF 27*10598SGlenn.Barry@Sun.COM * MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF 28*10598SGlenn.Barry@Sun.COM * THE LICENSED SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY 29*10598SGlenn.Barry@Sun.COM * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. 30*10598SGlenn.Barry@Sun.COM * 31*10598SGlenn.Barry@Sun.COM * The name of the Massachusetts Institute of Technology or M.I.T. may NOT 32*10598SGlenn.Barry@Sun.COM * be used in advertising or publicity pertaining to distribution of the 33*10598SGlenn.Barry@Sun.COM * software. Title to copyright in this software and any associated 34*10598SGlenn.Barry@Sun.COM * documentation shall at all times remain with M.I.T., and USER agrees to 35*10598SGlenn.Barry@Sun.COM * preserve same. 36*10598SGlenn.Barry@Sun.COM * 37*10598SGlenn.Barry@Sun.COM * Furthermore if you modify this software you must label 38*10598SGlenn.Barry@Sun.COM * your software as modified software and not distribute it in such a 39*10598SGlenn.Barry@Sun.COM * fashion that it might be confused with the original M.I.T. software. 40*10598SGlenn.Barry@Sun.COM */ 41*10598SGlenn.Barry@Sun.COM /* This work is part of OpenLDAP Software <http://www.openldap.org/>. 42*10598SGlenn.Barry@Sun.COM * 43*10598SGlenn.Barry@Sun.COM * Copyright 1998-2008 The OpenLDAP Foundation. 44*10598SGlenn.Barry@Sun.COM * All rights reserved. 45*10598SGlenn.Barry@Sun.COM * 46*10598SGlenn.Barry@Sun.COM * Redistribution and use in source and binary forms, with or without 47*10598SGlenn.Barry@Sun.COM * modification, are permitted only as authorized by the OpenLDAP 48*10598SGlenn.Barry@Sun.COM * Public License. 49*10598SGlenn.Barry@Sun.COM * 50*10598SGlenn.Barry@Sun.COM * A copy of this license is available in file LICENSE in the 51*10598SGlenn.Barry@Sun.COM * top-level directory of the distribution or, alternatively, at 52*10598SGlenn.Barry@Sun.COM * <http://www.OpenLDAP.org/license.html>. 53*10598SGlenn.Barry@Sun.COM */ 54*10598SGlenn.Barry@Sun.COM /* This notice applies to changes, created by or for Novell, Inc., 55*10598SGlenn.Barry@Sun.COM * to preexisting works for which notices appear elsewhere in this file. 56*10598SGlenn.Barry@Sun.COM * 57*10598SGlenn.Barry@Sun.COM * Copyright (C) 2000 Novell, Inc. All Rights Reserved. 58*10598SGlenn.Barry@Sun.COM * 59*10598SGlenn.Barry@Sun.COM * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES. 60*10598SGlenn.Barry@Sun.COM * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION 61*10598SGlenn.Barry@Sun.COM * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT 62*10598SGlenn.Barry@Sun.COM * HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE 63*10598SGlenn.Barry@Sun.COM * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS 64*10598SGlenn.Barry@Sun.COM * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC 65*10598SGlenn.Barry@Sun.COM * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE 66*10598SGlenn.Barry@Sun.COM * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 67*10598SGlenn.Barry@Sun.COM */ 68*10598SGlenn.Barry@Sun.COM 69*10598SGlenn.Barry@Sun.COM #ifndef K5_UTF8_H 70*10598SGlenn.Barry@Sun.COM #define K5_UTF8_H 71*10598SGlenn.Barry@Sun.COM 72*10598SGlenn.Barry@Sun.COM #include "autoconf.h" 73*10598SGlenn.Barry@Sun.COM 74*10598SGlenn.Barry@Sun.COM #ifdef HAVE_SYS_TYPES_H 75*10598SGlenn.Barry@Sun.COM #include <sys/types.h> 76*10598SGlenn.Barry@Sun.COM #endif 77*10598SGlenn.Barry@Sun.COM 78*10598SGlenn.Barry@Sun.COM #ifdef HAVE_UNISTD_H 79*10598SGlenn.Barry@Sun.COM #include <unistd.h> 80*10598SGlenn.Barry@Sun.COM #endif 81*10598SGlenn.Barry@Sun.COM 82*10598SGlenn.Barry@Sun.COM #ifdef HAVE_STDLIB_H 83*10598SGlenn.Barry@Sun.COM #include <stdlib.h> 84*10598SGlenn.Barry@Sun.COM #endif 85*10598SGlenn.Barry@Sun.COM 86*10598SGlenn.Barry@Sun.COM #if INT_MAX == 0x7fff 87*10598SGlenn.Barry@Sun.COM typedef unsigned int krb5_ucs2; 88*10598SGlenn.Barry@Sun.COM #elif SHRT_MAX == 0x7fff 89*10598SGlenn.Barry@Sun.COM typedef unsigned short krb5_ucs2; 90*10598SGlenn.Barry@Sun.COM #else 91*10598SGlenn.Barry@Sun.COM #error undefined 16 bit type 92*10598SGlenn.Barry@Sun.COM #endif 93*10598SGlenn.Barry@Sun.COM 94*10598SGlenn.Barry@Sun.COM #if INT_MAX == 0x7fffffffL 95*10598SGlenn.Barry@Sun.COM typedef int krb5_ucs4; 96*10598SGlenn.Barry@Sun.COM #elif LONG_MAX == 0x7fffffffL 97*10598SGlenn.Barry@Sun.COM typedef long krb5_ucs4; 98*10598SGlenn.Barry@Sun.COM #elif SHRT_MAX == 0x7fffffffL 99*10598SGlenn.Barry@Sun.COM typedef short krb5_ucs4; 100*10598SGlenn.Barry@Sun.COM #else 101*10598SGlenn.Barry@Sun.COM #error: undefined 32 bit type 102*10598SGlenn.Barry@Sun.COM #endif 103*10598SGlenn.Barry@Sun.COM 104*10598SGlenn.Barry@Sun.COM #define KRB5_MAX_UTF8_LEN (sizeof(krb5_ucs2) * 3/2) 105*10598SGlenn.Barry@Sun.COM 106*10598SGlenn.Barry@Sun.COM int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out); 107*10598SGlenn.Barry@Sun.COM size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf); 108*10598SGlenn.Barry@Sun.COM 109*10598SGlenn.Barry@Sun.COM int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out); 110*10598SGlenn.Barry@Sun.COM size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf); 111*10598SGlenn.Barry@Sun.COM 112*10598SGlenn.Barry@Sun.COM int 113*10598SGlenn.Barry@Sun.COM krb5int_ucs2s_to_utf8s(const krb5_ucs2 *ucs2s, 114*10598SGlenn.Barry@Sun.COM char **utf8s, 115*10598SGlenn.Barry@Sun.COM size_t *utf8slen); 116*10598SGlenn.Barry@Sun.COM 117*10598SGlenn.Barry@Sun.COM int 118*10598SGlenn.Barry@Sun.COM krb5int_ucs2cs_to_utf8s(const krb5_ucs2 *ucs2s, 119*10598SGlenn.Barry@Sun.COM size_t ucs2slen, 120*10598SGlenn.Barry@Sun.COM char **utf8s, 121*10598SGlenn.Barry@Sun.COM size_t *utf8slen); 122*10598SGlenn.Barry@Sun.COM 123*10598SGlenn.Barry@Sun.COM int 124*10598SGlenn.Barry@Sun.COM krb5int_ucs2les_to_utf8s(const unsigned char *ucs2les, 125*10598SGlenn.Barry@Sun.COM char **utf8s, 126*10598SGlenn.Barry@Sun.COM size_t *utf8slen); 127*10598SGlenn.Barry@Sun.COM 128*10598SGlenn.Barry@Sun.COM int 129*10598SGlenn.Barry@Sun.COM krb5int_ucs2lecs_to_utf8s(const unsigned char *ucs2les, 130*10598SGlenn.Barry@Sun.COM size_t ucs2leslen, 131*10598SGlenn.Barry@Sun.COM char **utf8s, 132*10598SGlenn.Barry@Sun.COM size_t *utf8slen); 133*10598SGlenn.Barry@Sun.COM 134*10598SGlenn.Barry@Sun.COM int 135*10598SGlenn.Barry@Sun.COM krb5int_utf8s_to_ucs2s(const char *utf8s, 136*10598SGlenn.Barry@Sun.COM krb5_ucs2 **ucs2s, 137*10598SGlenn.Barry@Sun.COM size_t *ucs2chars); 138*10598SGlenn.Barry@Sun.COM 139*10598SGlenn.Barry@Sun.COM int 140*10598SGlenn.Barry@Sun.COM krb5int_utf8cs_to_ucs2s(const char *utf8s, 141*10598SGlenn.Barry@Sun.COM size_t utf8slen, 142*10598SGlenn.Barry@Sun.COM krb5_ucs2 **ucs2s, 143*10598SGlenn.Barry@Sun.COM size_t *ucs2chars); 144*10598SGlenn.Barry@Sun.COM 145*10598SGlenn.Barry@Sun.COM int 146*10598SGlenn.Barry@Sun.COM krb5int_utf8s_to_ucs2les(const char *utf8s, 147*10598SGlenn.Barry@Sun.COM unsigned char **ucs2les, 148*10598SGlenn.Barry@Sun.COM size_t *ucs2leslen); 149*10598SGlenn.Barry@Sun.COM 150*10598SGlenn.Barry@Sun.COM int 151*10598SGlenn.Barry@Sun.COM krb5int_utf8cs_to_ucs2les(const char *utf8s, 152*10598SGlenn.Barry@Sun.COM size_t utf8slen, 153*10598SGlenn.Barry@Sun.COM unsigned char **ucs2les, 154*10598SGlenn.Barry@Sun.COM size_t *ucs2leslen); 155*10598SGlenn.Barry@Sun.COM 156*10598SGlenn.Barry@Sun.COM /* returns the number of bytes in the UTF-8 string */ 157*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_bytes(const char *); 158*10598SGlenn.Barry@Sun.COM /* returns the number of UTF-8 characters in the string */ 159*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_chars(const char *); 160*10598SGlenn.Barry@Sun.COM /* returns the number of UTF-8 characters in the counted string */ 161*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8c_chars(const char *, size_t); 162*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) of the UTF-8 character */ 163*10598SGlenn.Barry@Sun.COM int krb5int_utf8_offset(const char *); 164*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) indicated by the UTF-8 character */ 165*10598SGlenn.Barry@Sun.COM int krb5int_utf8_charlen(const char *); 166*10598SGlenn.Barry@Sun.COM 167*10598SGlenn.Barry@Sun.COM /* returns the length (in bytes) indicated by the UTF-8 character 168*10598SGlenn.Barry@Sun.COM * also checks that shortest possible encoding was used 169*10598SGlenn.Barry@Sun.COM */ 170*10598SGlenn.Barry@Sun.COM int krb5int_utf8_charlen2(const char *); 171*10598SGlenn.Barry@Sun.COM 172*10598SGlenn.Barry@Sun.COM /* copies a UTF-8 character and returning number of bytes copied */ 173*10598SGlenn.Barry@Sun.COM int krb5int_utf8_copy(char *, const char *); 174*10598SGlenn.Barry@Sun.COM 175*10598SGlenn.Barry@Sun.COM /* returns pointer of next UTF-8 character in string */ 176*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_next( const char *); 177*10598SGlenn.Barry@Sun.COM /* returns pointer of previous UTF-8 character in string */ 178*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_prev( const char *); 179*10598SGlenn.Barry@Sun.COM 180*10598SGlenn.Barry@Sun.COM /* primitive ctype routines -- not aware of non-ascii characters */ 181*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isascii( const char *); 182*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isalpha( const char *); 183*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isalnum( const char *); 184*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isdigit( const char *); 185*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isxdigit( const char *); 186*10598SGlenn.Barry@Sun.COM int krb5int_utf8_isspace( const char *); 187*10598SGlenn.Barry@Sun.COM 188*10598SGlenn.Barry@Sun.COM /* span characters not in set, return bytes spanned */ 189*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_strcspn( const char* str, const char *set); 190*10598SGlenn.Barry@Sun.COM /* span characters in set, return bytes spanned */ 191*10598SGlenn.Barry@Sun.COM size_t krb5int_utf8_strspn( const char* str, const char *set); 192*10598SGlenn.Barry@Sun.COM /* return first occurance of character in string */ 193*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strchr( const char* str, const char *chr); 194*10598SGlenn.Barry@Sun.COM /* return first character of set in string */ 195*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strpbrk( const char* str, const char *set); 196*10598SGlenn.Barry@Sun.COM /* reentrant tokenizer */ 197*10598SGlenn.Barry@Sun.COM char *krb5int_utf8_strtok( char* sp, const char* sep, char **last); 198*10598SGlenn.Barry@Sun.COM 199*10598SGlenn.Barry@Sun.COM /* Optimizations */ 200*10598SGlenn.Barry@Sun.COM extern const char krb5int_utf8_lentab[128]; 201*10598SGlenn.Barry@Sun.COM extern const char krb5int_utf8_mintab[32]; 202*10598SGlenn.Barry@Sun.COM 203*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_ISASCII(p) ( !(*(const unsigned char *)(p) & 0x80 ) ) 204*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_CHARLEN(p) ( KRB5_UTF8_ISASCII(p) \ 205*10598SGlenn.Barry@Sun.COM ? 1 : krb5int_utf8_lentab[*(const unsigned char *)(p) ^ 0x80] ) 206*10598SGlenn.Barry@Sun.COM 207*10598SGlenn.Barry@Sun.COM /* This is like CHARLEN but additionally validates to make sure 208*10598SGlenn.Barry@Sun.COM * the char used the shortest possible encoding. 209*10598SGlenn.Barry@Sun.COM * 'l' is used to temporarily hold the result of CHARLEN. 210*10598SGlenn.Barry@Sun.COM */ 211*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_CHARLEN2(p, l) ( ( ( l = KRB5_UTF8_CHARLEN( p )) < 3 || \ 212*10598SGlenn.Barry@Sun.COM ( krb5int_utf8_mintab[*(const unsigned char *)(p) & 0x1f] & (p)[1] ) ) ? \ 213*10598SGlenn.Barry@Sun.COM l : 0 ) 214*10598SGlenn.Barry@Sun.COM 215*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_OFFSET(p) ( KRB5_UTF8_ISASCII(p) \ 216*10598SGlenn.Barry@Sun.COM ? 1 : krb5int_utf8_offset((p)) ) 217*10598SGlenn.Barry@Sun.COM 218*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_COPY(d,s) ( KRB5_UTF8_ISASCII(s) \ 219*10598SGlenn.Barry@Sun.COM ? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)) ) 220*10598SGlenn.Barry@Sun.COM 221*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_NEXT(p) ( KRB5_UTF8_ISASCII(p) \ 222*10598SGlenn.Barry@Sun.COM ? (char *)(p)+1 : krb5int_utf8_next((p)) ) 223*10598SGlenn.Barry@Sun.COM 224*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p)) 225*10598SGlenn.Barry@Sun.COM 226*10598SGlenn.Barry@Sun.COM /* For symmetry */ 227*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p))) 228*10598SGlenn.Barry@Sun.COM #define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p))) 229*10598SGlenn.Barry@Sun.COM 230*10598SGlenn.Barry@Sun.COM /* 231*10598SGlenn.Barry@Sun.COM * these macros assume 'x' is an ASCII x 232*10598SGlenn.Barry@Sun.COM * and assume the "C" locale 233*10598SGlenn.Barry@Sun.COM */ 234*10598SGlenn.Barry@Sun.COM #define KRB5_ASCII(c) (!((c) & 0x80)) 235*10598SGlenn.Barry@Sun.COM #define KRB5_SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 236*10598SGlenn.Barry@Sun.COM #define KRB5_DIGIT(c) ((c) >= '0' && (c) <= '9') 237*10598SGlenn.Barry@Sun.COM #define KRB5_LOWER(c) ((c) >= 'a' && (c) <= 'z') 238*10598SGlenn.Barry@Sun.COM #define KRB5_UPPER(c) ((c) >= 'A' && (c) <= 'Z') 239*10598SGlenn.Barry@Sun.COM #define KRB5_ALPHA(c) (KRB5_LOWER(c) || KRB5_UPPER(c)) 240*10598SGlenn.Barry@Sun.COM #define KRB5_ALNUM(c) (KRB5_ALPHA(c) || KRB5_DIGIT(c)) 241*10598SGlenn.Barry@Sun.COM 242*10598SGlenn.Barry@Sun.COM #define KRB5_LDH(c) (KRB5_ALNUM(c) || (c) == '-') 243*10598SGlenn.Barry@Sun.COM 244*10598SGlenn.Barry@Sun.COM #define KRB5_HEXLOWER(c) ((c) >= 'a' && (c) <= 'f') 245*10598SGlenn.Barry@Sun.COM #define KRB5_HEXUPPER(c) ((c) >= 'A' && (c) <= 'F') 246*10598SGlenn.Barry@Sun.COM #define KRB5_HEX(c) (KRB5_DIGIT(c) || \ 247*10598SGlenn.Barry@Sun.COM KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c)) 248*10598SGlenn.Barry@Sun.COM 249*10598SGlenn.Barry@Sun.COM #endif /* K5_UTF8_H */ 250