1 /* $NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_ues.c,v 1.2 2010/12/07 22:01:22 joerg Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <string.h> 37 #include <stdio.h> 38 #include <stdint.h> 39 #include <stdlib.h> 40 #include <limits.h> 41 #include <wchar.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_bcs.h" 46 #include "citrus_module.h" 47 #include "citrus_ctype.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_ues.h" 50 51 typedef struct { 52 int mode; 53 #define MODE_C99 1 54 size_t mb_cur_max; 55 } _UESEncodingInfo; 56 57 typedef struct { 58 int chlen; 59 char ch[12]; 60 } _UESState; 61 62 typedef struct { 63 _UESEncodingInfo ei; 64 struct { 65 /* for future multi-locale facility */ 66 _UESState s_mblen; 67 _UESState s_mbrlen; 68 _UESState s_mbrtowc; 69 _UESState s_mbtowc; 70 _UESState s_mbsrtowcs; 71 _UESState s_wcrtomb; 72 _UESState s_wcsrtombs; 73 _UESState s_wctomb; 74 } states; 75 } _UESCTypeInfo; 76 77 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 78 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 79 80 #define _FUNCNAME(m) _citrus_UES_##m 81 #define _ENCODING_INFO _UESEncodingInfo 82 #define _CTYPE_INFO _UESCTypeInfo 83 #define _ENCODING_STATE _UESState 84 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 85 #define _ENCODING_IS_STATE_DEPENDENT 0 86 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 87 88 static __inline void 89 /*ARGSUSED*/ 90 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei, 91 _UESState * __restrict psenc) 92 { 93 psenc->chlen = 0; 94 } 95 96 static __inline void 97 /*ARGSUSED*/ 98 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei, 99 void *__restrict pspriv, const _UESState * __restrict psenc) 100 { 101 /* ei seem to be unused */ 102 _DIAGASSERT(pspriv != NULL); 103 _DIAGASSERT(psenc != NULL); 104 105 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 106 } 107 108 static __inline void 109 /*ARGSUSED*/ 110 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei, 111 _UESState * __restrict psenc, const void * __restrict pspriv) 112 { 113 /* ei seem to be unused */ 114 _DIAGASSERT(psenc != NULL); 115 _DIAGASSERT(pspriv != NULL); 116 117 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 118 } 119 120 static __inline int 121 to_int(int ch) 122 { 123 if (ch >= '0' && ch <= '9') 124 return ch - '0'; 125 else if (ch >= 'A' && ch <= 'F') 126 return (ch - 'A') + 10; 127 else if (ch >= 'a' && ch <= 'f') 128 return (ch - 'a') + 10; 129 return -1; 130 } 131 132 #define ESCAPE '\\' 133 #define UCS2_ESC 'u' 134 #define UCS4_ESC 'U' 135 136 #define UCS2_BIT 16 137 #define UCS4_BIT 32 138 #define BMP_MAX UINT32_C(0xFFFF) 139 #define UCS2_MAX UINT32_C(0x10FFFF) 140 #define UCS4_MAX UINT32_C(0x7FFFFFFF) 141 142 static const char *xdig = "0123456789abcdef"; 143 144 static __inline int 145 to_str(char *s, wchar_t wc, int bit) 146 { 147 char *p; 148 149 p = s; 150 *p++ = ESCAPE; 151 switch (bit) { 152 case UCS2_BIT: 153 *p++ = UCS2_ESC; 154 break; 155 case UCS4_BIT: 156 *p++ = UCS4_ESC; 157 break; 158 default: 159 abort(); 160 } 161 do { 162 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 163 } while (bit > 0); 164 return p - s; 165 } 166 167 static __inline int 168 is_hi_surrogate(wchar_t wc) 169 { 170 return wc >= 0xD800 && wc <= 0xDBFF; 171 } 172 173 static __inline int 174 is_lo_surrogate(wchar_t wc) 175 { 176 return wc >= 0xDC00 && wc <= 0xDFFF; 177 } 178 179 static __inline wchar_t 180 surrogate_to_ucs(wchar_t hi, wchar_t lo) 181 { 182 _DIAGASSERT(is_hi_surrogate(hi)); 183 _DIAGASSERT(is_lo_surrogate(lo)); 184 185 hi -= 0xD800; 186 lo -= 0xDC00; 187 return (hi << 10 | lo) + 0x10000; 188 } 189 190 static __inline void 191 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 192 { 193 _DIAGASSERT(hi != NULL); 194 _DIAGASSERT(lo != NULL); 195 _DIAGASSERT(wc >= 0x10000); 196 197 wc -= 0x10000; 198 *hi = (wc >> 10) + 0xD800; 199 *lo = (wc & 0x3FF) + 0xDC00; 200 } 201 202 static __inline int 203 is_basic(wchar_t wc) 204 { 205 return (uint32_t)wc <= 0x9F && 206 wc != 0x24 && wc != 0x40 && wc != 0x60; 207 } 208 209 static int 210 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 211 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 212 _UESState * __restrict psenc, size_t * __restrict nresult) 213 { 214 const char *s0; 215 int ch, head, tail, i, num; 216 wchar_t hi, wc; 217 218 _DIAGASSERT(ei != NULL); 219 /* pwc may be null */ 220 _DIAGASSERT(s != NULL); 221 _DIAGASSERT(psenc != NULL); 222 _DIAGASSERT(nresult != NULL); 223 224 if (*s == NULL) { 225 _citrus_UES_init_state(ei, psenc); 226 *nresult = 0; 227 return 0; 228 } 229 s0 = *s; 230 231 hi = (wchar_t)0; 232 tail = 0; 233 234 surrogate: 235 wc = (wchar_t)0; 236 head = tail; 237 if (psenc->chlen == head) { 238 if (n-- < 1) 239 goto restart; 240 psenc->ch[psenc->chlen++] = *s0++; 241 } 242 ch = (unsigned char)psenc->ch[head++]; 243 if (ch == ESCAPE) { 244 if (psenc->chlen == head) { 245 if (n-- < 1) 246 goto restart; 247 psenc->ch[psenc->chlen++] = *s0++; 248 } 249 switch (psenc->ch[head]) { 250 case UCS2_ESC: 251 tail += 6; 252 break; 253 case UCS4_ESC: 254 if (ei->mode & MODE_C99) { 255 tail = 10; 256 break; 257 } 258 /*FALLTHROUGH*/ 259 default: 260 tail = 0; 261 } 262 ++head; 263 } 264 for (; head < tail; ++head) { 265 if (psenc->chlen == head) { 266 if (n-- < 1) { 267 restart: 268 *s = s0; 269 *nresult = (size_t)-2; 270 return 0; 271 } 272 psenc->ch[psenc->chlen++] = *s0++; 273 } 274 num = to_int((int)(unsigned char)psenc->ch[head]); 275 if (num < 0) { 276 tail = 0; 277 break; 278 } 279 wc = (wc << 4) | num; 280 } 281 head = 0; 282 switch (tail) { 283 case 0: 284 break; 285 case 6: 286 if (hi != (wchar_t)0) 287 break; 288 if ((ei->mode & MODE_C99) == 0) { 289 if (is_hi_surrogate(wc) != 0) { 290 hi = wc; 291 goto surrogate; 292 } 293 if ((uint32_t)wc <= 0x7F /* XXX */ || 294 is_lo_surrogate(wc) != 0) 295 break; 296 goto done; 297 } 298 /*FALLTHROUGH*/ 299 case 10: 300 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 301 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 302 goto done; 303 *nresult = (size_t)-1; 304 return EILSEQ; 305 case 12: 306 if (is_lo_surrogate(wc) == 0) 307 break; 308 wc = surrogate_to_ucs(hi, wc); 309 goto done; 310 } 311 ch = (unsigned char)psenc->ch[0]; 312 head = psenc->chlen; 313 if (--head > 0) 314 memmove(&psenc->ch[0], &psenc->ch[1], head); 315 wc = (wchar_t)ch; 316 done: 317 psenc->chlen = head; 318 if (pwc != NULL) 319 *pwc = wc; 320 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 321 *s = s0; 322 323 return 0; 324 } 325 326 static int 327 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 328 char * __restrict s, size_t n, wchar_t wc, 329 _UESState * __restrict psenc, size_t * __restrict nresult) 330 { 331 wchar_t hi, lo; 332 333 if (psenc->chlen != 0) 334 return EINVAL; 335 336 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 337 if (n-- < 1) 338 goto e2big; 339 psenc->ch[psenc->chlen++] = (char)wc; 340 } else if ((uint32_t)wc <= BMP_MAX) { 341 if (n < 6) 342 goto e2big; 343 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 344 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 345 if (n < 12) 346 goto e2big; 347 ucs_to_surrogate(wc, &hi, &lo); 348 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 349 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 350 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 351 if (n < 10) 352 goto e2big; 353 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 354 } else { 355 *nresult = (size_t)-1; 356 return EILSEQ; 357 } 358 memcpy(s, psenc->ch, psenc->chlen); 359 *nresult = psenc->chlen; 360 psenc->chlen = 0; 361 362 return 0; 363 364 e2big: 365 *nresult = (size_t)-1; 366 return E2BIG; 367 } 368 369 /*ARGSUSED*/ 370 static int 371 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei, 372 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 373 { 374 /* ei seem to be unused */ 375 _DIAGASSERT(csid != NULL); 376 _DIAGASSERT(idx != NULL); 377 378 *csid = 0; 379 *idx = (_index_t)wc; 380 381 return 0; 382 } 383 384 static __inline int 385 /*ARGSUSED*/ 386 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei, 387 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 388 { 389 /* ei seem to be unused */ 390 _DIAGASSERT(wc != NULL); 391 392 if (csid != 0) 393 return EILSEQ; 394 *wc = (wchar_t)idx; 395 396 return 0; 397 } 398 399 static __inline int 400 /*ARGSUSED*/ 401 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei, 402 _UESState * __restrict psenc, int * __restrict rstate) 403 { 404 _DIAGASSERT(psenc != NULL); 405 _DIAGASSERT(rstate != NULL); 406 407 if (psenc->chlen == 0) 408 *rstate = _STDENC_SDGEN_INITIAL; 409 else 410 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */ 411 412 return 0; 413 } 414 415 static void 416 /*ARGSUSED*/ 417 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei) 418 { 419 /* ei seems to be unused */ 420 } 421 422 static int 423 /*ARGSUSED*/ 424 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 425 const void * __restrict var, size_t lenvar) 426 { 427 const char *p; 428 429 _DIAGASSERT(ei != NULL); 430 431 p = var; 432 #define MATCH(x, act) \ 433 do { \ 434 if (lenvar >= (sizeof(#x)-1) && \ 435 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 436 act; \ 437 lenvar -= sizeof(#x)-1; \ 438 p += sizeof(#x)-1; \ 439 } \ 440 } while (/*CONSTCOND*/0) 441 memset((void *)ei, 0, sizeof(*ei)); 442 while (lenvar > 0) { 443 switch (_bcs_toupper(*p)) { 444 case 'C': 445 MATCH(C99, ei->mode |= MODE_C99); 446 break; 447 } 448 ++p; 449 --lenvar; 450 } 451 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 452 453 return 0; 454 } 455 456 /* ---------------------------------------------------------------------- 457 * public interface for ctype 458 */ 459 460 _CITRUS_CTYPE_DECLS(UES); 461 _CITRUS_CTYPE_DEF_OPS(UES); 462 463 #include "citrus_ctype_template.h" 464 465 /* ---------------------------------------------------------------------- 466 * public interface for stdenc 467 */ 468 469 _CITRUS_STDENC_DECLS(UES); 470 _CITRUS_STDENC_DEF_OPS(UES); 471 472 #include "citrus_stdenc_template.h" 473