1 /* $NetBSD: citrus_ues.c,v 1.4 2013/05/28 16:57:56 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_ues.c,v 1.4 2013/05/28 16:57:56 joerg Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <string.h> 37 #include <stdio.h> 38 #include <stdint.h> 39 #include <stdlib.h> 40 #include <limits.h> 41 #include <wchar.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_bcs.h" 46 #include "citrus_module.h" 47 #include "citrus_ctype.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_ues.h" 50 51 typedef struct { 52 int mode; 53 #define MODE_C99 1 54 size_t mb_cur_max; 55 } _UESEncodingInfo; 56 57 typedef struct { 58 int chlen; 59 char ch[12]; 60 } _UESState; 61 62 typedef struct { 63 _UESEncodingInfo ei; 64 struct { 65 /* for future multi-locale facility */ 66 _UESState s_mblen; 67 _UESState s_mbrlen; 68 _UESState s_mbrtowc; 69 _UESState s_mbtowc; 70 _UESState s_mbsrtowcs; 71 _UESState s_mbsnrtowcs; 72 _UESState s_wcrtomb; 73 _UESState s_wcsrtombs; 74 _UESState s_wcsnrtombs; 75 _UESState s_wctomb; 76 } states; 77 } _UESCTypeInfo; 78 79 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 80 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 81 82 #define _FUNCNAME(m) _citrus_UES_##m 83 #define _ENCODING_INFO _UESEncodingInfo 84 #define _CTYPE_INFO _UESCTypeInfo 85 #define _ENCODING_STATE _UESState 86 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 87 #define _ENCODING_IS_STATE_DEPENDENT 0 88 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 89 90 static __inline void 91 /*ARGSUSED*/ 92 _citrus_UES_init_state(_UESEncodingInfo * __restrict ei, 93 _UESState * __restrict psenc) 94 { 95 psenc->chlen = 0; 96 } 97 98 static __inline void 99 /*ARGSUSED*/ 100 _citrus_UES_pack_state(_UESEncodingInfo * __restrict ei, 101 void *__restrict pspriv, const _UESState * __restrict psenc) 102 { 103 /* ei seem to be unused */ 104 _DIAGASSERT(pspriv != NULL); 105 _DIAGASSERT(psenc != NULL); 106 107 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 108 } 109 110 static __inline void 111 /*ARGSUSED*/ 112 _citrus_UES_unpack_state(_UESEncodingInfo * __restrict ei, 113 _UESState * __restrict psenc, const void * __restrict pspriv) 114 { 115 /* ei seem to be unused */ 116 _DIAGASSERT(psenc != NULL); 117 _DIAGASSERT(pspriv != NULL); 118 119 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 120 } 121 122 static __inline int 123 to_int(int ch) 124 { 125 if (ch >= '0' && ch <= '9') 126 return ch - '0'; 127 else if (ch >= 'A' && ch <= 'F') 128 return (ch - 'A') + 10; 129 else if (ch >= 'a' && ch <= 'f') 130 return (ch - 'a') + 10; 131 return -1; 132 } 133 134 #define ESCAPE '\\' 135 #define UCS2_ESC 'u' 136 #define UCS4_ESC 'U' 137 138 #define UCS2_BIT 16 139 #define UCS4_BIT 32 140 #define BMP_MAX UINT32_C(0xFFFF) 141 #define UCS2_MAX UINT32_C(0x10FFFF) 142 #define UCS4_MAX UINT32_C(0x7FFFFFFF) 143 144 static const char *xdig = "0123456789abcdef"; 145 146 static __inline int 147 to_str(char *s, wchar_t wc, int bit) 148 { 149 char *p; 150 151 p = s; 152 *p++ = ESCAPE; 153 switch (bit) { 154 case UCS2_BIT: 155 *p++ = UCS2_ESC; 156 break; 157 case UCS4_BIT: 158 *p++ = UCS4_ESC; 159 break; 160 default: 161 abort(); 162 } 163 do { 164 *p++ = xdig[(wc >> (bit -= 4)) & 0xF]; 165 } while (bit > 0); 166 return p - s; 167 } 168 169 static __inline int 170 is_hi_surrogate(wchar_t wc) 171 { 172 return wc >= 0xD800 && wc <= 0xDBFF; 173 } 174 175 static __inline int 176 is_lo_surrogate(wchar_t wc) 177 { 178 return wc >= 0xDC00 && wc <= 0xDFFF; 179 } 180 181 static __inline wchar_t 182 surrogate_to_ucs(wchar_t hi, wchar_t lo) 183 { 184 _DIAGASSERT(is_hi_surrogate(hi)); 185 _DIAGASSERT(is_lo_surrogate(lo)); 186 187 hi -= 0xD800; 188 lo -= 0xDC00; 189 return (hi << 10 | lo) + 0x10000; 190 } 191 192 static __inline void 193 ucs_to_surrogate(wchar_t wc, wchar_t * __restrict hi, wchar_t * __restrict lo) 194 { 195 _DIAGASSERT(hi != NULL); 196 _DIAGASSERT(lo != NULL); 197 _DIAGASSERT(wc >= 0x10000); 198 199 wc -= 0x10000; 200 *hi = (wc >> 10) + 0xD800; 201 *lo = (wc & 0x3FF) + 0xDC00; 202 } 203 204 static __inline int 205 is_basic(wchar_t wc) 206 { 207 return (uint32_t)wc <= 0x9F && 208 wc != 0x24 && wc != 0x40 && wc != 0x60; 209 } 210 211 static int 212 _citrus_UES_mbrtowc_priv(_UESEncodingInfo * __restrict ei, 213 wchar_t * __restrict pwc, const char ** __restrict s, size_t n, 214 _UESState * __restrict psenc, size_t * __restrict nresult) 215 { 216 const char *s0; 217 int ch, head, tail, num; 218 wchar_t hi, wc; 219 220 _DIAGASSERT(ei != NULL); 221 /* pwc may be null */ 222 _DIAGASSERT(s != NULL); 223 _DIAGASSERT(psenc != NULL); 224 _DIAGASSERT(nresult != NULL); 225 226 if (*s == NULL) { 227 _citrus_UES_init_state(ei, psenc); 228 *nresult = 0; 229 return 0; 230 } 231 s0 = *s; 232 233 hi = (wchar_t)0; 234 tail = 0; 235 236 surrogate: 237 wc = (wchar_t)0; 238 head = tail; 239 if (psenc->chlen == head) { 240 if (n-- < 1) 241 goto restart; 242 psenc->ch[psenc->chlen++] = *s0++; 243 } 244 ch = (unsigned char)psenc->ch[head++]; 245 if (ch == ESCAPE) { 246 if (psenc->chlen == head) { 247 if (n-- < 1) 248 goto restart; 249 psenc->ch[psenc->chlen++] = *s0++; 250 } 251 switch (psenc->ch[head]) { 252 case UCS2_ESC: 253 tail += 6; 254 break; 255 case UCS4_ESC: 256 if (ei->mode & MODE_C99) { 257 tail = 10; 258 break; 259 } 260 /*FALLTHROUGH*/ 261 default: 262 tail = 0; 263 } 264 ++head; 265 } 266 for (; head < tail; ++head) { 267 if (psenc->chlen == head) { 268 if (n-- < 1) { 269 restart: 270 *s = s0; 271 *nresult = (size_t)-2; 272 return 0; 273 } 274 psenc->ch[psenc->chlen++] = *s0++; 275 } 276 num = to_int((int)(unsigned char)psenc->ch[head]); 277 if (num < 0) { 278 tail = 0; 279 break; 280 } 281 wc = (wc << 4) | num; 282 } 283 head = 0; 284 switch (tail) { 285 case 0: 286 break; 287 case 6: 288 if (hi != (wchar_t)0) 289 break; 290 if ((ei->mode & MODE_C99) == 0) { 291 if (is_hi_surrogate(wc) != 0) { 292 hi = wc; 293 goto surrogate; 294 } 295 if ((uint32_t)wc <= 0x7F /* XXX */ || 296 is_lo_surrogate(wc) != 0) 297 break; 298 goto done; 299 } 300 /*FALLTHROUGH*/ 301 case 10: 302 if (is_basic(wc) == 0 && (uint32_t)wc <= UCS4_MAX && 303 is_hi_surrogate(wc) == 0 && is_lo_surrogate(wc) == 0) 304 goto done; 305 *nresult = (size_t)-1; 306 return EILSEQ; 307 case 12: 308 if (is_lo_surrogate(wc) == 0) 309 break; 310 wc = surrogate_to_ucs(hi, wc); 311 goto done; 312 } 313 ch = (unsigned char)psenc->ch[0]; 314 head = psenc->chlen; 315 if (--head > 0) 316 memmove(&psenc->ch[0], &psenc->ch[1], head); 317 wc = (wchar_t)ch; 318 done: 319 psenc->chlen = head; 320 if (pwc != NULL) 321 *pwc = wc; 322 *nresult = (size_t)((wc == 0) ? 0 : (s0 - *s)); 323 *s = s0; 324 325 return 0; 326 } 327 328 static int 329 _citrus_UES_wcrtomb_priv(_UESEncodingInfo * __restrict ei, 330 char * __restrict s, size_t n, wchar_t wc, 331 _UESState * __restrict psenc, size_t * __restrict nresult) 332 { 333 wchar_t hi, lo; 334 335 if (psenc->chlen != 0) 336 return EINVAL; 337 338 if ((ei->mode & MODE_C99) ? is_basic(wc) : (uint32_t)wc <= 0x7F) { 339 if (n-- < 1) 340 goto e2big; 341 psenc->ch[psenc->chlen++] = (char)wc; 342 } else if ((uint32_t)wc <= BMP_MAX) { 343 if (n < 6) 344 goto e2big; 345 psenc->chlen = to_str(&psenc->ch[0], wc, UCS2_BIT); 346 } else if ((ei->mode & MODE_C99) == 0 && (uint32_t)wc <= UCS2_MAX) { 347 if (n < 12) 348 goto e2big; 349 ucs_to_surrogate(wc, &hi, &lo); 350 psenc->chlen += to_str(&psenc->ch[0], hi, UCS2_BIT); 351 psenc->chlen += to_str(&psenc->ch[6], lo, UCS2_BIT); 352 } else if ((ei->mode & MODE_C99) && (uint32_t)wc <= UCS4_MAX) { 353 if (n < 10) 354 goto e2big; 355 psenc->chlen = to_str(&psenc->ch[0], wc, UCS4_BIT); 356 } else { 357 *nresult = (size_t)-1; 358 return EILSEQ; 359 } 360 memcpy(s, psenc->ch, psenc->chlen); 361 *nresult = psenc->chlen; 362 psenc->chlen = 0; 363 364 return 0; 365 366 e2big: 367 *nresult = (size_t)-1; 368 return E2BIG; 369 } 370 371 /*ARGSUSED*/ 372 static int 373 _citrus_UES_stdenc_wctocs(_UESEncodingInfo * __restrict ei, 374 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 375 { 376 /* ei seem to be unused */ 377 _DIAGASSERT(csid != NULL); 378 _DIAGASSERT(idx != NULL); 379 380 *csid = 0; 381 *idx = (_index_t)wc; 382 383 return 0; 384 } 385 386 static __inline int 387 /*ARGSUSED*/ 388 _citrus_UES_stdenc_cstowc(_UESEncodingInfo * __restrict ei, 389 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 390 { 391 /* ei seem to be unused */ 392 _DIAGASSERT(wc != NULL); 393 394 if (csid != 0) 395 return EILSEQ; 396 *wc = (wchar_t)idx; 397 398 return 0; 399 } 400 401 static __inline int 402 /*ARGSUSED*/ 403 _citrus_UES_stdenc_get_state_desc_generic(_UESEncodingInfo * __restrict ei, 404 _UESState * __restrict psenc, int * __restrict rstate) 405 { 406 _DIAGASSERT(psenc != NULL); 407 _DIAGASSERT(rstate != NULL); 408 409 if (psenc->chlen == 0) 410 *rstate = _STDENC_SDGEN_INITIAL; 411 else 412 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; /* XXX */ 413 414 return 0; 415 } 416 417 static void 418 /*ARGSUSED*/ 419 _citrus_UES_encoding_module_uninit(_UESEncodingInfo *ei) 420 { 421 /* ei seems to be unused */ 422 } 423 424 static int 425 /*ARGSUSED*/ 426 _citrus_UES_encoding_module_init(_UESEncodingInfo * __restrict ei, 427 const void * __restrict var, size_t lenvar) 428 { 429 const char *p; 430 431 _DIAGASSERT(ei != NULL); 432 433 p = var; 434 #define MATCH(x, act) \ 435 do { \ 436 if (lenvar >= (sizeof(#x)-1) && \ 437 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 438 act; \ 439 lenvar -= sizeof(#x)-1; \ 440 p += sizeof(#x)-1; \ 441 } \ 442 } while (/*CONSTCOND*/0) 443 memset((void *)ei, 0, sizeof(*ei)); 444 while (lenvar > 0) { 445 switch (_bcs_toupper(*p)) { 446 case 'C': 447 MATCH(C99, ei->mode |= MODE_C99); 448 break; 449 } 450 ++p; 451 --lenvar; 452 } 453 ei->mb_cur_max = (ei->mode & MODE_C99) ? 10 : 12; 454 455 return 0; 456 } 457 458 /* ---------------------------------------------------------------------- 459 * public interface for ctype 460 */ 461 462 _CITRUS_CTYPE_DECLS(UES); 463 _CITRUS_CTYPE_DEF_OPS(UES); 464 465 #include "citrus_ctype_template.h" 466 467 /* ---------------------------------------------------------------------- 468 * public interface for stdenc 469 */ 470 471 _CITRUS_STDENC_DECLS(UES); 472 _CITRUS_STDENC_DEF_OPS(UES); 473 474 #include "citrus_stdenc_template.h" 475