1 /* $NetBSD: citrus_big5.c,v 1.11 2006/11/22 23:38:27 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)2002, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 __RCSID("$NetBSD: citrus_big5.c,v 1.11 2006/11/22 23:38:27 tnozaki Exp $"); 64 #endif /* LIBC_SCCS and not lint */ 65 66 #include <sys/queue.h> 67 #include <sys/types.h> 68 #include <assert.h> 69 #include <errno.h> 70 #include <string.h> 71 #include <stdint.h> 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <stddef.h> 75 #include <locale.h> 76 #include <wchar.h> 77 #include <limits.h> 78 79 #include "citrus_namespace.h" 80 #include "citrus_types.h" 81 #include "citrus_bcs.h" 82 #include "citrus_module.h" 83 #include "citrus_ctype.h" 84 #include "citrus_stdenc.h" 85 #include "citrus_big5.h" 86 87 #include "citrus_prop.h" 88 89 /* ---------------------------------------------------------------------- 90 * private stuffs used by templates 91 */ 92 93 typedef struct { 94 char ch[2]; 95 int chlen; 96 } _BIG5State; 97 98 typedef struct _BIG5Exclude { 99 TAILQ_ENTRY(_BIG5Exclude) entry; 100 wint_t start, end; 101 } _BIG5Exclude; 102 103 typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList; 104 105 typedef struct { 106 int cell[0x100]; 107 _BIG5ExcludeList excludes; 108 } _BIG5EncodingInfo; 109 110 typedef struct { 111 _BIG5EncodingInfo ei; 112 struct { 113 /* for future multi-locale facility */ 114 _BIG5State s_mblen; 115 _BIG5State s_mbrlen; 116 _BIG5State s_mbrtowc; 117 _BIG5State s_mbtowc; 118 _BIG5State s_mbsrtowcs; 119 _BIG5State s_wcrtomb; 120 _BIG5State s_wcsrtombs; 121 _BIG5State s_wctomb; 122 } states; 123 } _BIG5CTypeInfo; 124 125 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 126 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 127 128 #define _FUNCNAME(m) _citrus_BIG5_##m 129 #define _ENCODING_INFO _BIG5EncodingInfo 130 #define _CTYPE_INFO _BIG5CTypeInfo 131 #define _ENCODING_STATE _BIG5State 132 #define _ENCODING_MB_CUR_MAX(_ei_) 2 133 #define _ENCODING_IS_STATE_DEPENDENT 0 134 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 135 136 137 static __inline void 138 /*ARGSUSED*/ 139 _citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei, 140 _BIG5State * __restrict s) 141 { 142 memset(s, 0, sizeof(*s)); 143 } 144 145 static __inline void 146 /*ARGSUSED*/ 147 _citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei, 148 void * __restrict pspriv, 149 const _BIG5State * __restrict s) 150 { 151 memcpy(pspriv, (const void *)s, sizeof(*s)); 152 } 153 154 static __inline void 155 /*ARGSUSED*/ 156 _citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei, 157 _BIG5State * __restrict s, 158 const void * __restrict pspriv) 159 { 160 memcpy((void *)s, pspriv, sizeof(*s)); 161 } 162 163 static __inline int 164 _citrus_BIG5_check(_BIG5EncodingInfo *ei, u_int c) 165 { 166 _DIAGASSERT(ei != NULL); 167 168 return (ei->cell[c & 0xFF] & 0x1) ? 2 : 1; 169 } 170 171 static __inline int 172 _citrus_BIG5_check2(_BIG5EncodingInfo *ei, u_int c) 173 { 174 _DIAGASSERT(ei != NULL); 175 176 return (ei->cell[c & 0xFF] & 0x2) ? 1 : 0; 177 } 178 179 static __inline int 180 _citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c) 181 { 182 _BIG5Exclude *exclude; 183 184 _DIAGASSERT(ei != NULL); 185 186 TAILQ_FOREACH(exclude, &ei->excludes, entry) { 187 if (c >= exclude->start && c <= exclude->end) 188 return EILSEQ; 189 } 190 return 0; 191 } 192 193 static int 194 _citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s, 195 uint64_t start, uint64_t end) 196 { 197 _BIG5EncodingInfo *ei; 198 int i; 199 uint64_t n; 200 201 _DIAGASSERT(ctx != NULL && *ctx != NULL); 202 203 if (start > 0xFF || end > 0xFF) 204 return EINVAL; 205 ei = (_BIG5EncodingInfo *)*ctx; 206 i = strcmp("row", s) ? 1 : 0; 207 i = 1 << i; 208 for (n = start; n <= end; ++n) 209 ei->cell[n & 0xFF] |= i; 210 return 0; 211 } 212 213 static int 214 /*ARGSUSED*/ 215 _citrus_BIG5_fill_excludes(void ** __restrict ctx, const char * __restrict s, 216 uint64_t start, uint64_t end) 217 { 218 _BIG5EncodingInfo *ei; 219 _BIG5Exclude *exclude; 220 221 _DIAGASSERT(ctx != NULL && *ctx != NULL); 222 223 if (start > 0xFFFF || end > 0xFFFF) 224 return EINVAL; 225 ei = (_BIG5EncodingInfo *)*ctx; 226 exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList); 227 if (exclude != NULL && (wint_t)start <= exclude->end) 228 return EINVAL; 229 exclude = (void *)malloc(sizeof(*exclude)); 230 if (exclude == NULL) 231 return ENOMEM; 232 exclude->start = (wint_t)start; 233 exclude->end = (wint_t)end; 234 TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry); 235 236 return 0; 237 } 238 239 static const _citrus_prop_hint_t root_hints[] = { 240 _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol), 241 _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol), 242 _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes), 243 _CITRUS_PROP_HINT_END 244 }; 245 246 static void 247 /*ARGSUSED*/ 248 _citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei) 249 { 250 _BIG5Exclude *exclude; 251 252 _DIAGASSERT(ei != NULL); 253 254 while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) { 255 TAILQ_REMOVE(&ei->excludes, exclude, entry); 256 free(exclude); 257 } 258 } 259 260 static int 261 /*ARGSUSED*/ 262 _citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei, 263 const void * __restrict var, size_t lenvar) 264 { 265 int err; 266 const char *s; 267 268 _DIAGASSERT(ei != NULL); 269 270 memset((void *)ei, 0, sizeof(*ei)); 271 TAILQ_INIT(&ei->excludes); 272 273 if (lenvar > 0 && var != NULL) { 274 s = _bcs_skip_ws_len((const char *)var, &lenvar); 275 if (lenvar > 0 && *s != '\0') { 276 err = _citrus_prop_parse_variable( 277 root_hints, (void *)ei, s, lenvar); 278 if (err == 0) 279 return 0; 280 281 _citrus_BIG5_encoding_module_uninit(ei); 282 memset((void *)ei, 0, sizeof(*ei)); 283 TAILQ_INIT(&ei->excludes); 284 } 285 } 286 287 /* fallback Big5-1984, for backward compatibility. */ 288 _citrus_BIG5_fill_rowcol((void **)&ei, "row", 0xA1, 0xFE); 289 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0x40, 0x7E); 290 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0xA1, 0xFE); 291 292 return 0; 293 } 294 295 static int 296 /*ARGSUSED*/ 297 _citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei, 298 wchar_t * __restrict pwc, 299 const char ** __restrict s, size_t n, 300 _BIG5State * __restrict psenc, 301 size_t * __restrict nresult) 302 { 303 wchar_t wchar; 304 int c; 305 int chlenbak; 306 const char *s0; 307 308 _DIAGASSERT(nresult != 0); 309 _DIAGASSERT(ei != NULL); 310 _DIAGASSERT(psenc != NULL); 311 _DIAGASSERT(s != NULL && *s != NULL); 312 313 s0 = *s; 314 315 if (s0 == NULL) { 316 _citrus_BIG5_init_state(ei, psenc); 317 *nresult = 0; 318 return (0); 319 } 320 321 chlenbak = psenc->chlen; 322 323 /* make sure we have the first byte in the buffer */ 324 switch (psenc->chlen) { 325 case 0: 326 if (n < 1) 327 goto restart; 328 psenc->ch[0] = *s0++; 329 psenc->chlen = 1; 330 n--; 331 break; 332 case 1: 333 break; 334 default: 335 /* illegal state */ 336 goto ilseq; 337 } 338 339 c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff); 340 if (c == 0) 341 goto ilseq; 342 while (psenc->chlen < c) { 343 if (n < 1) { 344 goto restart; 345 } 346 psenc->ch[psenc->chlen] = *s0++; 347 psenc->chlen++; 348 n--; 349 } 350 351 switch (c) { 352 case 1: 353 wchar = psenc->ch[0] & 0xff; 354 break; 355 case 2: 356 if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff)) 357 goto ilseq; 358 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 359 break; 360 default: 361 /* illegal state */ 362 goto ilseq; 363 } 364 365 if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0) 366 goto ilseq; 367 368 *s = s0; 369 psenc->chlen = 0; 370 if (pwc) 371 *pwc = wchar; 372 if (!wchar) 373 *nresult = 0; 374 else 375 *nresult = c - chlenbak; 376 377 return (0); 378 379 ilseq: 380 psenc->chlen = 0; 381 *nresult = (size_t)-1; 382 return (EILSEQ); 383 384 restart: 385 *s = s0; 386 *nresult = (size_t)-2; 387 return (0); 388 } 389 390 static int 391 /*ARGSUSED*/ 392 _citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei, 393 char * __restrict s, 394 size_t n, wchar_t wc, _BIG5State * __restrict psenc, 395 size_t * __restrict nresult) 396 { 397 int l, ret; 398 399 _DIAGASSERT(ei != NULL); 400 _DIAGASSERT(nresult != 0); 401 _DIAGASSERT(s != NULL); 402 403 /* check invalid sequence */ 404 if (wc & ~0xffff || 405 _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) { 406 ret = EILSEQ; 407 goto err; 408 } 409 410 if (wc & 0x8000) { 411 if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 || 412 !_citrus_BIG5_check2(ei, wc & 0xff)) { 413 ret = EILSEQ; 414 goto err; 415 } 416 l = 2; 417 } else { 418 if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) { 419 ret = EILSEQ; 420 goto err; 421 } 422 l = 1; 423 } 424 425 if (n < l) { 426 /* bound check failure */ 427 ret = E2BIG; 428 goto err; 429 } 430 431 if (l == 2) { 432 s[0] = (wc >> 8) & 0xff; 433 s[1] = wc & 0xff; 434 } else 435 s[0] = wc & 0xff; 436 437 *nresult = l; 438 439 return 0; 440 441 err: 442 *nresult = (size_t)-1; 443 return ret; 444 } 445 446 static __inline int 447 /*ARGSUSED*/ 448 _citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei, 449 _csid_t * __restrict csid, 450 _index_t * __restrict idx, wchar_t wc) 451 { 452 453 _DIAGASSERT(csid != NULL && idx != NULL); 454 455 *csid = (wc < 0x100) ? 0 : 1; 456 *idx = (_index_t)wc; 457 458 return 0; 459 } 460 461 static __inline int 462 /*ARGSUSED*/ 463 _citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei, 464 wchar_t * __restrict wc, 465 _csid_t csid, _index_t idx) 466 { 467 _DIAGASSERT(wc != NULL); 468 469 switch (csid) { 470 case 0: 471 case 1: 472 *wc = (wchar_t)idx; 473 break; 474 default: 475 return EILSEQ; 476 } 477 478 return 0; 479 } 480 481 static __inline int 482 /*ARGSUSED*/ 483 _citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei, 484 _BIG5State * __restrict psenc, 485 int * __restrict rstate) 486 { 487 488 if (psenc->chlen == 0) 489 *rstate = _STDENC_SDGEN_INITIAL; 490 else 491 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 492 493 return 0; 494 } 495 496 /* ---------------------------------------------------------------------- 497 * public interface for ctype 498 */ 499 500 _CITRUS_CTYPE_DECLS(BIG5); 501 _CITRUS_CTYPE_DEF_OPS(BIG5); 502 503 #include "citrus_ctype_template.h" 504 505 506 /* ---------------------------------------------------------------------- 507 * public interface for stdenc 508 */ 509 510 _CITRUS_STDENC_DECLS(BIG5); 511 _CITRUS_STDENC_DEF_OPS(BIG5); 512 513 #include "citrus_stdenc_template.h" 514