1 /* $NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)2004, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #if defined(LIB_SCCS) && !defined(lint) 32 __RCSID("$NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $"); 33 #endif /* LIB_SCCS and not lint */ 34 35 #include <sys/types.h> 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <stddef.h> 43 #include <locale.h> 44 #include <wchar.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_zw.h" 53 54 /* ---------------------------------------------------------------------- 55 * private stuffs used by templates 56 */ 57 58 typedef struct { 59 int dummy; 60 } _ZWEncodingInfo; 61 62 typedef enum { 63 NONE, AMBIGIOUS, ASCII, GB2312 64 } _ZWCharset; 65 66 typedef struct { 67 int chlen; 68 char ch[4]; 69 _ZWCharset charset; 70 } _ZWState; 71 72 typedef struct { 73 _ZWEncodingInfo ei; 74 struct { 75 /* for future multi-locale facility */ 76 _ZWState s_mblen; 77 _ZWState s_mbrlen; 78 _ZWState s_mbrtowc; 79 _ZWState s_mbtowc; 80 _ZWState s_mbsrtowcs; 81 _ZWState s_wcrtomb; 82 _ZWState s_wcsrtombs; 83 _ZWState s_wctomb; 84 } states; 85 } _ZWCTypeInfo; 86 87 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 88 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 89 90 #define _FUNCNAME(m) _citrus_ZW_##m 91 #define _ENCODING_INFO _ZWEncodingInfo 92 #define _CTYPE_INFO _ZWCTypeInfo 93 #define _ENCODING_STATE _ZWState 94 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 95 #define _ENCODING_IS_STATE_DEPENDENT 1 96 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 97 98 static __inline void 99 /*ARGSUSED*/ 100 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei, 101 _ZWState * __restrict psenc) 102 { 103 /* ei my be unused */ 104 _DIAGASSERT(psenc != NULL); 105 106 psenc->chlen = 0; 107 psenc->charset = NONE; 108 } 109 110 static __inline void 111 /*ARGSUSED*/ 112 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei, 113 void *__restrict pspriv, const _ZWState * __restrict psenc) 114 { 115 /* ei may be unused */ 116 _DIAGASSERT(pspriv != NULL); 117 _DIAGASSERT(psenc != NULL); 118 119 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 120 } 121 122 static __inline void 123 /*ARGSUSED*/ 124 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei, 125 _ZWState * __restrict psenc, const void * __restrict pspriv) 126 { 127 /* ei may be unused */ 128 _DIAGASSERT(psenc != NULL); 129 _DIAGASSERT(pspriv != NULL); 130 131 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 132 } 133 134 static int 135 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 136 wchar_t * __restrict pwc, const char **__restrict s, size_t n, 137 _ZWState * __restrict psenc, size_t * __restrict nresult) 138 { 139 const char *s0; 140 int ch, len; 141 wchar_t wc; 142 143 /* ei may be unused */ 144 /* pwc may be null */ 145 _DIAGASSERT(s != NULL); 146 _DIAGASSERT(psenc != NULL); 147 _DIAGASSERT(nresult != NULL); 148 149 if (*s == NULL) { 150 _citrus_ZW_init_state(ei, psenc); 151 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 152 return 0; 153 } 154 s0 = *s; 155 len = 0; 156 157 #define STORE \ 158 do { \ 159 if (n-- < 1) { \ 160 *nresult = (size_t)-2; \ 161 *s = s0; \ 162 return 0; \ 163 } \ 164 ch = (unsigned char)*s0++; \ 165 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 166 goto ilseq; \ 167 psenc->ch[psenc->chlen++] = ch; \ 168 } while (/*CONSTCOND*/0) 169 170 loop: 171 switch (psenc->charset) { 172 case ASCII: 173 switch (psenc->chlen) { 174 case 0: 175 STORE; 176 switch (psenc->ch[0]) { 177 case '\0': case '\n': 178 psenc->charset = NONE; 179 } 180 /*FALLTHROUGH*/ 181 case 1: 182 break; 183 default: 184 return EINVAL; 185 } 186 ch = (unsigned char)psenc->ch[0]; 187 if (ch > 0x7F) 188 goto ilseq; 189 wc = (wchar_t)ch; 190 psenc->chlen = 0; 191 break; 192 case NONE: 193 if (psenc->chlen != 0) 194 return EINVAL; 195 STORE; 196 ch = (unsigned char)psenc->ch[0]; 197 if (ch != 'z') { 198 if (ch != '\n' && ch != '\0') 199 psenc->charset = ASCII; 200 wc = (wchar_t)ch; 201 psenc->chlen = 0; 202 break; 203 } 204 psenc->charset = AMBIGIOUS; 205 psenc->chlen = 0; 206 /* FALLTHROUGH */ 207 case AMBIGIOUS: 208 if (psenc->chlen != 0) 209 return EINVAL; 210 STORE; 211 if (psenc->ch[0] != 'W') { 212 psenc->charset = ASCII; 213 wc = L'z'; 214 break; 215 } 216 psenc->charset = GB2312; 217 psenc->chlen = 0; 218 /* FALLTHROUGH */ 219 case GB2312: 220 switch (psenc->chlen) { 221 case 0: 222 STORE; 223 ch = (unsigned char)psenc->ch[0]; 224 if (ch == '\0') { 225 psenc->charset = NONE; 226 wc = (wchar_t)ch; 227 psenc->chlen = 0; 228 break; 229 } else if (ch == '\n') { 230 psenc->charset = NONE; 231 psenc->chlen = 0; 232 goto loop; 233 } 234 /*FALLTHROUGH*/ 235 case 1: 236 STORE; 237 if (psenc->ch[0] == ' ') { 238 ch = (unsigned char)psenc->ch[1]; 239 wc = (wchar_t)ch; 240 psenc->chlen = 0; 241 break; 242 } else if (psenc->ch[0] == '#') { 243 ch = (unsigned char)psenc->ch[1]; 244 if (ch == '\n') { 245 psenc->charset = NONE; 246 wc = (wchar_t)ch; 247 psenc->chlen = 0; 248 break; 249 } else if (ch == ' ') { 250 wc = (wchar_t)ch; 251 psenc->chlen = 0; 252 break; 253 } 254 } 255 ch = (unsigned char)psenc->ch[0]; 256 if (ch < 0x21 || ch > 0x7E) 257 goto ilseq; 258 wc = (wchar_t)(ch << 8); 259 ch = (unsigned char)psenc->ch[1]; 260 if (ch < 0x21 || ch > 0x7E) { 261 ilseq: 262 *nresult = (size_t)-1; 263 return EILSEQ; 264 } 265 wc |= (wchar_t)ch; 266 psenc->chlen = 0; 267 break; 268 default: 269 return EINVAL; 270 } 271 break; 272 default: 273 return EINVAL; 274 } 275 if (pwc != NULL) 276 *pwc = wc; 277 278 *nresult = (size_t)(wc == 0 ? 0 : len); 279 *s = s0; 280 281 return 0; 282 } 283 284 static int 285 /*ARGSUSED*/ 286 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei, 287 char *__restrict s, size_t n, wchar_t wc, 288 _ZWState * __restrict psenc, size_t * __restrict nresult) 289 { 290 int ch; 291 292 /* ei may be null */ 293 _DIAGASSERT(s != NULL); 294 _DIAGASSERT(psenc != NULL); 295 _DIAGASSERT(nresult != NULL); 296 297 if (psenc->chlen != 0) 298 return EINVAL; 299 if ((uint32_t)wc <= 0x7F) { 300 ch = (unsigned char)wc; 301 switch (psenc->charset) { 302 case NONE: 303 if (ch == '\0' || ch == '\n') { 304 psenc->ch[psenc->chlen++] = ch; 305 } else { 306 if (n < 4) 307 return E2BIG; 308 n -= 4; 309 psenc->ch[psenc->chlen++] = 'z'; 310 psenc->ch[psenc->chlen++] = 'W'; 311 psenc->ch[psenc->chlen++] = ' '; 312 psenc->ch[psenc->chlen++] = ch; 313 psenc->charset = GB2312; 314 } 315 break; 316 case GB2312: 317 if (n < 2) 318 return E2BIG; 319 n -= 2; 320 if (ch == '\0') { 321 psenc->ch[psenc->chlen++] = '\n'; 322 psenc->ch[psenc->chlen++] = '\0'; 323 psenc->charset = NONE; 324 } else if (ch == '\n') { 325 psenc->ch[psenc->chlen++] = '#'; 326 psenc->ch[psenc->chlen++] = '\n'; 327 psenc->charset = NONE; 328 } else { 329 psenc->ch[psenc->chlen++] = ' '; 330 psenc->ch[psenc->chlen++] = ch; 331 } 332 break; 333 default: 334 return EINVAL; 335 } 336 } else if ((uint32_t)wc <= 0x7E7E) { 337 switch (psenc->charset) { 338 case NONE: 339 if (n < 2) 340 return E2BIG; 341 n -= 2; 342 psenc->ch[psenc->chlen++] = 'z'; 343 psenc->ch[psenc->chlen++] = 'W'; 344 psenc->charset = GB2312; 345 /* FALLTHROUGH*/ 346 case GB2312: 347 if (n < 2) 348 return E2BIG; 349 n -= 2; 350 ch = (wc >> 8) & 0xFF; 351 if (ch < 0x21 || ch > 0x7E) 352 goto ilseq; 353 psenc->ch[psenc->chlen++] = ch; 354 ch = wc & 0xFF; 355 if (ch < 0x21 || ch > 0x7E) 356 goto ilseq; 357 psenc->ch[psenc->chlen++] = ch; 358 break; 359 default: 360 return EINVAL; 361 } 362 } else { 363 ilseq: 364 *nresult = (size_t)-1; 365 return EILSEQ; 366 } 367 memcpy(s, psenc->ch, psenc->chlen); 368 *nresult = psenc->chlen; 369 psenc->chlen = 0; 370 371 return 0; 372 } 373 374 static int 375 /*ARGSUSED*/ 376 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei, 377 char * __restrict s, size_t n, 378 _ZWState * __restrict psenc, size_t * __restrict nresult) 379 { 380 /* ei may be unused */ 381 _DIAGASSERT(s != NULL); 382 _DIAGASSERT(psenc != NULL); 383 _DIAGASSERT(nresult != NULL); 384 385 if (psenc->chlen != 0) 386 return EINVAL; 387 switch (psenc->charset) { 388 case GB2312: 389 if (n-- < 1) 390 return E2BIG; 391 psenc->ch[psenc->chlen++] = '\n'; 392 psenc->charset = NONE; 393 /*FALLTHROUGH*/ 394 case NONE: 395 *nresult = psenc->chlen; 396 if (psenc->chlen > 0) { 397 memcpy(s, psenc->ch, psenc->chlen); 398 psenc->chlen = 0; 399 } 400 break; 401 default: 402 return EINVAL; 403 } 404 405 return 0; 406 } 407 408 static __inline int 409 /*ARGSUSED*/ 410 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei, 411 _ZWState * __restrict psenc, int * __restrict rstate) 412 { 413 /* ei may be unused */ 414 _DIAGASSERT(psenc != NULL); 415 _DIAGASSERT(rstate != NULL); 416 417 switch (psenc->charset) { 418 case NONE: 419 if (psenc->chlen != 0) 420 return EINVAL; 421 *rstate = _STDENC_SDGEN_INITIAL; 422 break; 423 case AMBIGIOUS: 424 if (psenc->chlen != 0) 425 return EINVAL; 426 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 427 break; 428 case ASCII: 429 case GB2312: 430 switch (psenc->chlen) { 431 case 0: 432 *rstate = _STDENC_SDGEN_STABLE; 433 break; 434 case 1: 435 *rstate = (psenc->ch[0] == '#') 436 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 437 : _STDENC_SDGEN_INCOMPLETE_CHAR; 438 break; 439 default: 440 return EINVAL; 441 } 442 break; 443 default: 444 return EINVAL; 445 } 446 return 0; 447 } 448 449 static __inline int 450 /*ARGSUSED*/ 451 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei, 452 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 453 { 454 /* ei seems to be unused */ 455 _DIAGASSERT(csid != NULL); 456 _DIAGASSERT(idx != NULL); 457 458 *csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1; 459 *idx = (_index_t)wc; 460 461 return 0; 462 } 463 464 static __inline int 465 /*ARGSUSED*/ 466 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei, 467 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 468 { 469 /* ei seems to be unused */ 470 _DIAGASSERT(wc != NULL); 471 472 switch (csid) { 473 case 0: case 1: 474 break; 475 default: 476 return EINVAL; 477 } 478 *wc = (wchar_t)idx; 479 480 return 0; 481 } 482 483 static void 484 /*ARGSUSED*/ 485 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei) 486 { 487 } 488 489 static int 490 /*ARGSUSED*/ 491 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei, 492 const void *__restrict var, size_t lenvar) 493 { 494 return 0; 495 } 496 497 /* ---------------------------------------------------------------------- 498 * public interface for ctype 499 */ 500 501 _CITRUS_CTYPE_DECLS(ZW); 502 _CITRUS_CTYPE_DEF_OPS(ZW); 503 504 #include "citrus_ctype_template.h" 505 506 /* ---------------------------------------------------------------------- 507 * public interface for stdenc 508 */ 509 510 _CITRUS_STDENC_DECLS(ZW); 511 _CITRUS_STDENC_DEF_OPS(ZW); 512 513 #include "citrus_stdenc_template.h" 514