1 /* $NetBSD: citrus_gbk2k.c,v 1.6 2006/02/15 19:50:27 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.6 2006/02/15 19:50:27 tnozaki Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <string.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <stddef.h> 40 #include <locale.h> 41 #include <wchar.h> 42 #include <sys/types.h> 43 #include <limits.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_bcs.h" 48 #include "citrus_module.h" 49 #include "citrus_ctype.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_gbk2k.h" 52 53 54 /* ---------------------------------------------------------------------- 55 * private stuffs used by templates 56 */ 57 58 typedef struct _GBK2KState { 59 char ch[4]; 60 int chlen; 61 } _GBK2KState; 62 63 typedef struct { 64 int mb_cur_max; 65 } _GBK2KEncodingInfo; 66 67 typedef struct { 68 _GBK2KEncodingInfo ei; 69 struct { 70 /* for future multi-locale facility */ 71 _GBK2KState s_mblen; 72 _GBK2KState s_mbrlen; 73 _GBK2KState s_mbrtowc; 74 _GBK2KState s_mbtowc; 75 _GBK2KState s_mbsrtowcs; 76 _GBK2KState s_wcrtomb; 77 _GBK2KState s_wcsrtombs; 78 _GBK2KState s_wctomb; 79 } states; 80 } _GBK2KCTypeInfo; 81 82 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 83 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 84 85 #define _FUNCNAME(m) _citrus_GBK2K_##m 86 #define _ENCODING_INFO _GBK2KEncodingInfo 87 #define _CTYPE_INFO _GBK2KCTypeInfo 88 #define _ENCODING_STATE _GBK2KState 89 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 90 #define _ENCODING_IS_STATE_DEPENDENT 0 91 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 92 93 static __inline void 94 /*ARGSUSED*/ 95 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei, 96 _GBK2KState * __restrict s) 97 { 98 memset(s, 0, sizeof(*s)); 99 } 100 101 static __inline void 102 /*ARGSUSED*/ 103 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei, 104 void * __restrict pspriv, 105 const _GBK2KState * __restrict s) 106 { 107 memcpy(pspriv, (const void *)s, sizeof(*s)); 108 } 109 110 static __inline void 111 /*ARGSUSED*/ 112 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei, 113 _GBK2KState * __restrict s, 114 const void * __restrict pspriv) 115 { 116 memcpy((void *)s, pspriv, sizeof(*s)); 117 } 118 119 static __inline int 120 _mb_singlebyte(int c) 121 { 122 c &= 0xff; 123 return (c <= 0x7f); 124 } 125 126 static __inline int 127 _mb_leadbyte(int c) 128 { 129 c &= 0xff; 130 return (0x81 <= c && c <= 0xfe); 131 } 132 133 static __inline int 134 _mb_trailbyte(int c) 135 { 136 c &= 0xff; 137 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 138 } 139 140 static __inline int 141 _mb_surrogate(int c) 142 { 143 c &= 0xff; 144 return (0x30 <= c && c <= 0x39); 145 } 146 147 static __inline int 148 _mb_count(wchar_t v) 149 { 150 u_int32_t c; 151 152 c = (u_int32_t)v; /* XXX */ 153 if (!(c & 0xffffff00)) 154 return (1); 155 if (!(c & 0xffff0000)) 156 return (2); 157 return (4); 158 } 159 160 #define _PSENC (psenc->ch[psenc->chlen - 1]) 161 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 162 163 static int 164 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 165 wchar_t * __restrict pwc, 166 const char ** __restrict s, size_t n, 167 _GBK2KState * __restrict psenc, 168 size_t * __restrict nresult) 169 { 170 int chlenbak, len; 171 const char *s0, *s1; 172 wchar_t wc; 173 174 _DIAGASSERT(ei != NULL); 175 /* pwc may be NULL */ 176 _DIAGASSERT(s != NULL); 177 _DIAGASSERT(psenc != NULL); 178 179 s0 = *s; 180 181 if (s0 == NULL) { 182 /* _citrus_GBK2K_init_state(ei, psenc); */ 183 psenc->chlen = 0; 184 *nresult = 0; 185 return (0); 186 } 187 188 chlenbak = psenc->chlen; 189 190 switch (psenc->chlen) { 191 case 3: 192 if (!_mb_leadbyte (_PSENC)) 193 goto invalid; 194 /* FALLTHROUGH */ 195 case 2: 196 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 197 goto invalid; 198 /* FALLTHROUGH */ 199 case 1: 200 if (!_mb_leadbyte (_PSENC)) 201 goto invalid; 202 /* FALLTHOROUGH */ 203 case 0: 204 break; 205 default: 206 goto invalid; 207 } 208 209 for (;;) { 210 if (n-- < 1) 211 goto restart; 212 213 _PUSH_PSENC(*s0++); 214 215 switch (psenc->chlen) { 216 case 1: 217 if (_mb_singlebyte(_PSENC)) 218 goto convert; 219 if (_mb_leadbyte (_PSENC)) 220 continue; 221 goto ilseq; 222 case 2: 223 if (_mb_trailbyte (_PSENC)) 224 goto convert; 225 if (ei->mb_cur_max == 4 && 226 _mb_surrogate (_PSENC)) 227 continue; 228 goto ilseq; 229 case 3: 230 if (_mb_leadbyte (_PSENC)) 231 continue; 232 goto ilseq; 233 case 4: 234 if (_mb_surrogate (_PSENC)) 235 goto convert; 236 goto ilseq; 237 } 238 } 239 240 convert: 241 len = psenc->chlen; 242 s1 = &psenc->ch[0]; 243 wc = 0; 244 while (len-- > 0) 245 wc = (wc << 8) | (*s1++ & 0xff); 246 247 if (pwc != NULL) 248 *pwc = wc; 249 *s = s0; 250 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 251 /* _citrus_GBK2K_init_state(ei, psenc); */ 252 psenc->chlen = 0; 253 254 return (0); 255 256 restart: 257 *s = s0; 258 *nresult = (size_t)-2; 259 260 return (0); 261 262 invalid: 263 return (EINVAL); 264 265 ilseq: 266 *nresult = (size_t)-1; 267 return (EILSEQ); 268 } 269 270 static int 271 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 272 char * __restrict s, size_t n, wchar_t wc, 273 _GBK2KState * __restrict psenc, 274 size_t * __restrict nresult) 275 { 276 int len, ret; 277 278 _DIAGASSERT(ei != NULL); 279 _DIAGASSERT(s != NULL); 280 _DIAGASSERT(psenc != NULL); 281 282 if (psenc->chlen != 0) { 283 ret = EINVAL; 284 goto err; 285 } 286 287 len = _mb_count(wc); 288 if (n < len) { 289 ret = E2BIG; 290 goto err; 291 } 292 293 switch (len) { 294 case 1: 295 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 296 ret = EILSEQ; 297 goto err; 298 } 299 break; 300 case 2: 301 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 302 !_mb_trailbyte (_PUSH_PSENC(wc ))) { 303 ret = EILSEQ; 304 goto err; 305 } 306 break; 307 case 4: 308 if (ei->mb_cur_max != 4 || 309 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 310 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 311 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 312 !_mb_surrogate (_PUSH_PSENC(wc ))) { 313 ret = EILSEQ; 314 goto err; 315 } 316 break; 317 } 318 319 _DIAGASSERT(len == psenc->chlen); 320 321 memcpy(s, psenc->ch, psenc->chlen); 322 *nresult = psenc->chlen; 323 /* _citrus_GBK2K_init_state(ei, psenc); */ 324 psenc->chlen = 0; 325 326 return (0); 327 328 err: 329 *nresult = (size_t)-1; 330 return ret; 331 } 332 333 static __inline int 334 /*ARGSUSED*/ 335 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei, 336 _csid_t * __restrict csid, 337 _index_t * __restrict idx, wchar_t wc) 338 { 339 u_int8_t ch, cl; 340 341 _DIAGASSERT(csid != NULL && idx != NULL); 342 343 if ((u_int32_t)wc<0x80) { 344 /* ISO646 */ 345 *csid = 0; 346 *idx = (_index_t)wc; 347 } else if ((u_int32_t)wc>=0x10000) { 348 /* GBKUCS : XXX */ 349 *csid = 3; 350 *idx = (_index_t)wc; 351 } else { 352 ch = (u_int8_t)(wc >> 8); 353 cl = (u_int8_t)wc; 354 if (ch>=0xA1 && cl>=0xA1) { 355 /* EUC G1 */ 356 *csid = 1; 357 *idx = (_index_t)wc & 0x7F7FU; 358 } else { 359 /* extended area (0x8140-) */ 360 *csid = 2; 361 *idx = (_index_t)wc; 362 } 363 } 364 365 return 0; 366 } 367 368 static __inline int 369 /*ARGSUSED*/ 370 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 371 wchar_t * __restrict wc, 372 _csid_t csid, _index_t idx) 373 { 374 375 _DIAGASSERT(wc != NULL); 376 377 switch (csid) { 378 case 0: 379 /* ISO646 */ 380 *wc = (wchar_t)idx; 381 break; 382 case 1: 383 /* EUC G1 */ 384 *wc = (wchar_t)idx | 0x8080U; 385 break; 386 case 2: 387 /* extended area */ 388 *wc = (wchar_t)idx; 389 break; 390 case 3: 391 /* GBKUCS : XXX */ 392 if (ei->mb_cur_max != 4) 393 return EINVAL; 394 *wc = (wchar_t)idx; 395 break; 396 default: 397 return EILSEQ; 398 } 399 400 return 0; 401 } 402 403 static __inline int 404 /*ARGSUSED*/ 405 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei, 406 _GBK2KState * __restrict psenc, 407 int * __restrict rstate) 408 { 409 410 if (psenc->chlen == 0) 411 *rstate = _STDENC_SDGEN_INITIAL; 412 else 413 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 414 415 return 0; 416 } 417 418 static int 419 /*ARGSUSED*/ 420 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 421 const void * __restrict var, size_t lenvar) 422 { 423 const char *p; 424 425 _DIAGASSERT(ei != NULL); 426 427 p = var; 428 #define MATCH(x, act) \ 429 do { \ 430 if (lenvar >= (sizeof(#x)-1) && \ 431 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 432 act; \ 433 lenvar -= sizeof(#x)-1; \ 434 p += sizeof(#x)-1; \ 435 } \ 436 } while (/*CONSTCOND*/0) 437 memset((void *)ei, 0, sizeof(*ei)); 438 ei->mb_cur_max = 4; 439 while (lenvar>0) { 440 switch (_bcs_tolower(*p)) { 441 case '2': 442 MATCH("2byte", ei->mb_cur_max = 2); 443 break; 444 } 445 p++; 446 lenvar--; 447 } 448 449 return (0); 450 } 451 452 static void 453 /*ARGSUSED*/ 454 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei) 455 { 456 } 457 458 459 /* ---------------------------------------------------------------------- 460 * public interface for ctype 461 */ 462 463 _CITRUS_CTYPE_DECLS(GBK2K); 464 _CITRUS_CTYPE_DEF_OPS(GBK2K); 465 466 #include "citrus_ctype_template.h" 467 468 /* ---------------------------------------------------------------------- 469 * public interface for stdenc 470 */ 471 472 _CITRUS_STDENC_DECLS(GBK2K); 473 _CITRUS_STDENC_DEF_OPS(GBK2K); 474 475 #include "citrus_stdenc_template.h" 476