1 /* $NetBSD: citrus_iso2022.c,v 1.18 2007/11/21 14:19:32 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.18 2007/11/21 14:19:32 tnozaki Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_iso2022.h" 53 54 55 /* ---------------------------------------------------------------------- 56 * private stuffs used by templates 57 */ 58 59 60 /* 61 * wchar_t mappings: 62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 70 * 94x94 charset (ESC & V ESC $ ( F) 71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 74 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 75 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 76 */ 77 78 typedef struct { 79 u_char type; 80 #define CS94 (0U) 81 #define CS96 (1U) 82 #define CS94MULTI (2U) 83 #define CS96MULTI (3U) 84 85 u_char final; 86 u_char interm; 87 u_char vers; 88 } _ISO2022Charset; 89 90 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 91 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 92 93 typedef struct { 94 _ISO2022Charset g[4]; 95 /* need 3 bits to hold -1, 0, ..., 3 */ 96 int gl:3, 97 gr:3, 98 singlegl:3, 99 singlegr:3; 100 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 101 int chlen; 102 int flags; 103 #define _ISO2022STATE_FLAG_INITIALIZED 1 104 } _ISO2022State; 105 106 typedef struct { 107 _ISO2022Charset *recommend[4]; 108 size_t recommendsize[4]; 109 _ISO2022Charset initg[4]; 110 int maxcharset; 111 int flags; 112 #define F_8BIT 0x0001 113 #define F_NOOLD 0x0002 114 #define F_SI 0x0010 /*0F*/ 115 #define F_SO 0x0020 /*0E*/ 116 #define F_LS0 0x0010 /*0F*/ 117 #define F_LS1 0x0020 /*0E*/ 118 #define F_LS2 0x0040 /*ESC n*/ 119 #define F_LS3 0x0080 /*ESC o*/ 120 #define F_LS1R 0x0100 /*ESC ~*/ 121 #define F_LS2R 0x0200 /*ESC }*/ 122 #define F_LS3R 0x0400 /*ESC |*/ 123 #define F_SS2 0x0800 /*ESC N*/ 124 #define F_SS3 0x1000 /*ESC O*/ 125 #define F_SS2R 0x2000 /*8E*/ 126 #define F_SS3R 0x4000 /*8F*/ 127 } _ISO2022EncodingInfo; 128 typedef struct { 129 _ISO2022EncodingInfo ei; 130 struct { 131 /* for future multi-locale facility */ 132 _ISO2022State s_mblen; 133 _ISO2022State s_mbrlen; 134 _ISO2022State s_mbrtowc; 135 _ISO2022State s_mbtowc; 136 _ISO2022State s_mbsrtowcs; 137 _ISO2022State s_wcrtomb; 138 _ISO2022State s_wcsrtombs; 139 _ISO2022State s_wctomb; 140 } states; 141 } _ISO2022CTypeInfo; 142 143 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 144 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 145 146 #define _FUNCNAME(m) _citrus_ISO2022_##m 147 #define _ENCODING_INFO _ISO2022EncodingInfo 148 #define _CTYPE_INFO _ISO2022CTypeInfo 149 #define _ENCODING_STATE _ISO2022State 150 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 151 #define _ENCODING_IS_STATE_DEPENDENT 1 152 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 153 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 154 155 156 #define _ISO2022INVALID (wchar_t)-1 157 158 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 159 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 160 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 161 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 162 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 163 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 164 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 165 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 166 167 static __inline int 168 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 169 { 170 171 _DIAGASSERT(p != NULL); 172 _DIAGASSERT(cs != NULL); 173 174 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 175 cs->final = (u_char)(p[3] & 0xff); 176 cs->interm = '\0'; 177 cs->vers = '\0'; 178 cs->type = CS94MULTI; 179 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 180 cs->final = (u_char)(p[3] & 0xff); 181 cs->interm = '\0'; 182 cs->vers = '\0'; 183 cs->type = CS96MULTI; 184 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 185 cs->final = (u_char)(p[2] & 0xff); 186 cs->interm = '\0'; 187 cs->vers = '\0'; 188 cs->type = CS94; 189 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 190 cs->final = (u_char )(p[2] & 0xff); 191 cs->interm = '\0'; 192 cs->vers = '\0'; 193 cs->type = CS96; 194 } else { 195 return 1; 196 } 197 198 return 0; 199 } 200 201 202 #define _NOTMATCH 0 203 #define _MATCH 1 204 #define _PARSEFAIL 2 205 206 static __inline int 207 get_recommend(_ISO2022EncodingInfo * __restrict ei, 208 const char * __restrict token) 209 { 210 int i; 211 _ISO2022Charset cs, *p; 212 213 if (!strchr("0123", token[0]) || token[1] != '=') 214 return (_NOTMATCH); 215 216 if (getcs(&token[2], &cs) == 0) 217 ; 218 else if (!strcmp(&token[2], "94")) { 219 cs.final = (u_char)(token[4]); 220 cs.interm = '\0'; 221 cs.vers = '\0'; 222 cs.type = CS94; 223 } else if (!strcmp(&token[2], "96")) { 224 cs.final = (u_char)(token[4]); 225 cs.interm = '\0'; 226 cs.vers = '\0'; 227 cs.type = CS96; 228 } else if (!strcmp(&token[2], "94$")) { 229 cs.final = (u_char)(token[5]); 230 cs.interm = '\0'; 231 cs.vers = '\0'; 232 cs.type = CS94MULTI; 233 } else if (!strcmp(&token[2], "96$")) { 234 cs.final = (u_char)(token[5]); 235 cs.interm = '\0'; 236 cs.vers = '\0'; 237 cs.type = CS96MULTI; 238 } else { 239 return (_PARSEFAIL); 240 } 241 242 i = token[0] - '0'; 243 if (!ei->recommend[i]) { 244 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 245 } else { 246 p = realloc(ei->recommend[i], 247 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 248 if (!p) 249 return (_PARSEFAIL); 250 ei->recommend[i] = p; 251 } 252 if (!ei->recommend[i]) 253 return (_PARSEFAIL); 254 ei->recommendsize[i]++; 255 256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 259 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 260 261 return (_MATCH); 262 } 263 264 static __inline int 265 get_initg(_ISO2022EncodingInfo * __restrict ei, 266 const char * __restrict token) 267 { 268 _ISO2022Charset cs; 269 270 if (strncmp("INIT", &token[0], 4) || 271 !strchr("0123", token[4]) || 272 token[5] != '=') 273 return (_NOTMATCH); 274 275 if (getcs(&token[6], &cs) != 0) 276 return (_PARSEFAIL); 277 278 ei->initg[token[4] - '0'].type = cs.type; 279 ei->initg[token[4] - '0'].final = cs.final; 280 ei->initg[token[4] - '0'].interm = cs.interm; 281 ei->initg[token[4] - '0'].vers = cs.vers; 282 283 return (_MATCH); 284 } 285 286 static __inline int 287 get_max(_ISO2022EncodingInfo * __restrict ei, 288 const char * __restrict token) 289 { 290 if (!strcmp(token, "MAX1")) { 291 ei->maxcharset = 1; 292 } else if (!strcmp(token, "MAX2")) { 293 ei->maxcharset = 2; 294 } else if (!strcmp(token, "MAX3")) { 295 ei->maxcharset = 3; 296 } else 297 return (_NOTMATCH); 298 299 return (_MATCH); 300 } 301 302 303 static __inline int 304 get_flags(_ISO2022EncodingInfo * __restrict ei, 305 const char * __restrict token) 306 { 307 int i; 308 static struct { 309 const char *tag; 310 int flag; 311 } const tags[] = { 312 { "DUMMY", 0 }, 313 { "8BIT", F_8BIT }, 314 { "NOOLD", F_NOOLD }, 315 { "SI", F_SI }, 316 { "SO", F_SO }, 317 { "LS0", F_LS0 }, 318 { "LS1", F_LS1 }, 319 { "LS2", F_LS2 }, 320 { "LS3", F_LS3 }, 321 { "LS1R", F_LS1R }, 322 { "LS2R", F_LS2R }, 323 { "LS3R", F_LS3R }, 324 { "SS2", F_SS2 }, 325 { "SS3", F_SS3 }, 326 { "SS2R", F_SS2R }, 327 { "SS3R", F_SS3R }, 328 { NULL, 0 } 329 }; 330 331 for (i = 0; tags[i].tag; i++) { 332 if (!strcmp(token, tags[i].tag)) { 333 ei->flags |= tags[i].flag; 334 return (_MATCH); 335 } 336 } 337 338 return (_NOTMATCH); 339 } 340 341 342 static __inline int 343 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 344 const void * __restrict var, size_t lenvar) 345 { 346 char const *v, *e; 347 char buf[20]; 348 int i, len, ret; 349 350 _DIAGASSERT(ei != NULL); 351 352 353 /* 354 * parse VARIABLE section. 355 */ 356 357 if (!var) 358 return (EFTYPE); 359 360 v = (const char *) var; 361 362 /* initialize structure */ 363 ei->maxcharset = 0; 364 for (i = 0; i < 4; i++) { 365 ei->recommend[i] = NULL; 366 ei->recommendsize[i] = 0; 367 } 368 ei->flags = 0; 369 370 while (*v) { 371 while (*v == ' ' || *v == '\t') 372 ++v; 373 374 /* find the token */ 375 e = v; 376 while (*e && *e != ' ' && *e != '\t') 377 ++e; 378 379 len = e-v; 380 if (len == 0) 381 break; 382 if (len>=sizeof(buf)) 383 goto parsefail; 384 snprintf(buf, sizeof(buf), "%.*s", len, v); 385 386 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 387 ; 388 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 389 ; 390 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 391 ; 392 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 393 ; 394 else 395 ret = _PARSEFAIL; 396 if (ret==_PARSEFAIL) 397 goto parsefail; 398 v = e; 399 400 } 401 402 return (0); 403 404 parsefail: 405 free(ei->recommend[0]); 406 free(ei->recommend[1]); 407 free(ei->recommend[2]); 408 free(ei->recommend[3]); 409 410 return (EFTYPE); 411 } 412 413 static __inline void 414 /*ARGSUSED*/ 415 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 416 _ISO2022State * __restrict s) 417 { 418 int i; 419 420 memset(s, 0, sizeof(*s)); 421 s->gl = 0; 422 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 423 424 for (i = 0; i < 4; i++) { 425 if (ei->initg[i].final) { 426 s->g[i].type = ei->initg[i].type; 427 s->g[i].final = ei->initg[i].final; 428 s->g[i].interm = ei->initg[i].interm; 429 } 430 } 431 s->singlegl = s->singlegr = -1; 432 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 433 } 434 435 static __inline void 436 /*ARGSUSED*/ 437 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 438 void * __restrict pspriv, 439 const _ISO2022State * __restrict s) 440 { 441 memcpy(pspriv, (const void *)s, sizeof(*s)); 442 } 443 444 static __inline void 445 /*ARGSUSED*/ 446 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 447 _ISO2022State * __restrict s, 448 const void * __restrict pspriv) 449 { 450 memcpy((void *)s, pspriv, sizeof(*s)); 451 } 452 453 static int 454 /*ARGSUSED*/ 455 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 456 const void * __restrict var, 457 size_t lenvar) 458 { 459 460 _DIAGASSERT(ei != NULL); 461 462 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 463 } 464 465 static void 466 /*ARGSUSED*/ 467 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 468 { 469 } 470 471 #define ESC '\033' 472 #define ECMA -1 473 #define INTERM -2 474 #define OECMA -3 475 static const struct seqtable { 476 int type; 477 int csoff; 478 int finaloff; 479 int intermoff; 480 int versoff; 481 int len; 482 int chars[10]; 483 } seqtable[] = { 484 /* G0 94MULTI special */ 485 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 486 /* G0 94MULTI special with version identification */ 487 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 488 /* G? 94 */ 489 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 490 /* G? 94 with 2nd intermediate char */ 491 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 492 /* G? 96 */ 493 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 494 /* G? 96 with 2nd intermediate char */ 495 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 496 /* G? 94MULTI */ 497 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 498 /* G? 96MULTI */ 499 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 500 /* G? 94MULTI with version specification */ 501 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 502 /* LS2/3 */ 503 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 504 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 505 /* LS1/2/3R */ 506 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 507 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 508 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 509 /* SS2/3 */ 510 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 511 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 512 /* end of records */ 513 { 0, } 514 }; 515 516 static int 517 seqmatch(const char * __restrict s, size_t n, 518 const struct seqtable * __restrict sp) 519 { 520 const int *p; 521 522 _DIAGASSERT(s != NULL); 523 _DIAGASSERT(sp != NULL); 524 525 p = sp->chars; 526 while (p - sp->chars < n && p - sp->chars < sp->len) { 527 switch (*p) { 528 case ECMA: 529 if (!isecma(*s)) 530 goto terminate; 531 break; 532 case OECMA: 533 if (*s && strchr("@AB", *s)) 534 break; 535 else 536 goto terminate; 537 case INTERM: 538 if (!isinterm(*s)) 539 goto terminate; 540 break; 541 case CS94: 542 if (*s && strchr("()*+", *s)) 543 break; 544 else 545 goto terminate; 546 case CS96: 547 if (*s && strchr(",-./", *s)) 548 break; 549 else 550 goto terminate; 551 default: 552 if (*s != *p) 553 goto terminate; 554 break; 555 } 556 557 p++; 558 s++; 559 } 560 561 terminate: 562 return p - sp->chars; 563 } 564 565 static wchar_t 566 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 567 const char * __restrict string, size_t n, 568 const char ** __restrict result, 569 _ISO2022State * __restrict psenc) 570 { 571 wchar_t wchar = 0; 572 int cur; 573 const struct seqtable *sp; 574 int nmatch; 575 int i; 576 577 _DIAGASSERT(ei != NULL); 578 _DIAGASSERT(psenc != NULL); 579 _DIAGASSERT(string != NULL); 580 /* result may be NULL */ 581 582 while (1) { 583 /* SI/SO */ 584 if (1 <= n && string[0] == '\017') { 585 psenc->gl = 0; 586 string++; 587 n--; 588 continue; 589 } 590 if (1 <= n && string[0] == '\016') { 591 psenc->gl = 1; 592 string++; 593 n--; 594 continue; 595 } 596 597 /* SS2/3R */ 598 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 599 psenc->singlegl = psenc->singlegr = 600 (string[0] - '\216') + 2; 601 string++; 602 n--; 603 continue; 604 } 605 606 /* eat the letter if this is not ESC */ 607 if (1 <= n && string[0] != '\033') 608 break; 609 610 /* look for a perfect match from escape sequences */ 611 for (sp = &seqtable[0]; sp->len; sp++) { 612 nmatch = seqmatch(string, n, sp); 613 if (sp->len == nmatch && n >= sp->len) 614 break; 615 } 616 617 if (!sp->len) 618 goto notseq; 619 620 if (sp->type != -1) { 621 if (sp->csoff == -1) 622 i = 0; 623 else { 624 switch (sp->type) { 625 case CS94: 626 case CS94MULTI: 627 i = string[sp->csoff] - '('; 628 break; 629 case CS96: 630 case CS96MULTI: 631 i = string[sp->csoff] - ','; 632 break; 633 default: 634 return (_ISO2022INVALID); 635 } 636 } 637 psenc->g[i].type = sp->type; 638 psenc->g[i].final = '\0'; 639 psenc->g[i].interm = '\0'; 640 psenc->g[i].vers = '\0'; 641 /* sp->finaloff must not be -1 */ 642 if (sp->finaloff != -1) 643 psenc->g[i].final = string[sp->finaloff]; 644 if (sp->intermoff != -1) 645 psenc->g[i].interm = string[sp->intermoff]; 646 if (sp->versoff != -1) 647 psenc->g[i].vers = string[sp->versoff]; 648 649 string += sp->len; 650 n -= sp->len; 651 continue; 652 } 653 654 /* LS2/3 */ 655 if (2 <= n && string[0] == '\033' 656 && string[1] && strchr("no", string[1])) { 657 psenc->gl = string[1] - 'n' + 2; 658 string += 2; 659 n -= 2; 660 continue; 661 } 662 663 /* LS1/2/3R */ 664 /* XXX: { for vi showmatch */ 665 if (2 <= n && string[0] == '\033' 666 && string[1] && strchr("~}|", string[1])) { 667 psenc->gr = 3 - (string[1] - '|'); 668 string += 2; 669 n -= 2; 670 continue; 671 } 672 673 /* SS2/3 */ 674 if (2 <= n && string[0] == '\033' 675 && string[1] && strchr("NO", string[1])) { 676 psenc->singlegl = (string[1] - 'N') + 2; 677 string += 2; 678 n -= 2; 679 continue; 680 } 681 682 notseq: 683 /* 684 * if we've got an unknown escape sequence, eat the ESC at the 685 * head. otherwise, wait till full escape sequence comes. 686 */ 687 for (sp = &seqtable[0]; sp->len; sp++) { 688 nmatch = seqmatch(string, n, sp); 689 if (!nmatch) 690 continue; 691 692 /* 693 * if we are in the middle of escape sequence, 694 * we still need to wait for more characters to come 695 */ 696 if (n < sp->len) { 697 if (nmatch == n) { 698 if (result) 699 *result = string; 700 return (_ISO2022INVALID); 701 } 702 } else { 703 if (nmatch == sp->len) { 704 /* this case should not happen */ 705 goto eat; 706 } 707 } 708 } 709 710 break; 711 } 712 713 eat: 714 /* no letter to eat */ 715 if (n < 1) { 716 if (result) 717 *result = string; 718 return (_ISO2022INVALID); 719 } 720 721 /* normal chars. always eat C0/C1 as is. */ 722 if (iscntl(*string & 0xff)) 723 cur = -1; 724 else if (*string & 0x80) { 725 cur = (psenc->singlegr == -1) 726 ? psenc->gr : psenc->singlegr; 727 } else { 728 cur = (psenc->singlegl == -1) 729 ? psenc->gl : psenc->singlegl; 730 } 731 732 if (cur == -1) { 733 asis: 734 wchar = *string++ & 0xff; 735 if (result) 736 *result = string; 737 /* reset single shift state */ 738 psenc->singlegr = psenc->singlegl = -1; 739 return wchar; 740 } 741 742 /* length error check */ 743 switch (psenc->g[cur].type) { 744 case CS94MULTI: 745 case CS96MULTI: 746 if (!isthree(psenc->g[cur].final)) { 747 if (2 <= n 748 && (string[0] & 0x80) == (string[1] & 0x80)) 749 break; 750 } else { 751 if (3 <= n 752 && (string[0] & 0x80) == (string[1] & 0x80) 753 && (string[0] & 0x80) == (string[2] & 0x80)) 754 break; 755 } 756 757 /* we still need to wait for more characters to come */ 758 if (result) 759 *result = string; 760 return (_ISO2022INVALID); 761 762 case CS94: 763 case CS96: 764 if (1 <= n) 765 break; 766 767 /* we still need to wait for more characters to come */ 768 if (result) 769 *result = string; 770 return (_ISO2022INVALID); 771 } 772 773 /* range check */ 774 switch (psenc->g[cur].type) { 775 case CS94: 776 if (!(is94(string[0] & 0x7f))) 777 goto asis; 778 case CS96: 779 if (!(is96(string[0] & 0x7f))) 780 goto asis; 781 break; 782 case CS94MULTI: 783 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 784 goto asis; 785 break; 786 case CS96MULTI: 787 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 788 goto asis; 789 break; 790 } 791 792 /* extract the character. */ 793 switch (psenc->g[cur].type) { 794 case CS94: 795 /* special case for ASCII. */ 796 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 797 wchar = *string++; 798 wchar &= 0x7f; 799 break; 800 } 801 wchar = psenc->g[cur].final; 802 wchar = (wchar << 8); 803 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 804 wchar = (wchar << 8); 805 wchar = (wchar << 8) | (*string++ & 0x7f); 806 break; 807 case CS96: 808 /* special case for ISO-8859-1. */ 809 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 810 wchar = *string++; 811 wchar &= 0x7f; 812 wchar |= 0x80; 813 break; 814 } 815 wchar = psenc->g[cur].final; 816 wchar = (wchar << 8); 817 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 818 wchar = (wchar << 8); 819 wchar = (wchar << 8) | (*string++ & 0x7f); 820 wchar |= 0x80; 821 break; 822 case CS94MULTI: 823 case CS96MULTI: 824 wchar = psenc->g[cur].final; 825 wchar = (wchar << 8); 826 if (isthree(psenc->g[cur].final)) 827 wchar |= (*string++ & 0x7f); 828 wchar = (wchar << 8) | (*string++ & 0x7f); 829 wchar = (wchar << 8) | (*string++ & 0x7f); 830 if (psenc->g[cur].type == CS96MULTI) 831 wchar |= 0x80; 832 break; 833 } 834 835 if (result) 836 *result = string; 837 /* reset single shift state */ 838 psenc->singlegr = psenc->singlegl = -1; 839 return wchar; 840 } 841 842 843 844 static int 845 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 846 wchar_t * __restrict pwc, 847 const char ** __restrict s, 848 size_t n, _ISO2022State * __restrict psenc, 849 size_t * __restrict nresult) 850 { 851 wchar_t wchar; 852 const char *s0, *p, *result; 853 int c; 854 int chlenbak; 855 856 _DIAGASSERT(nresult != 0); 857 _DIAGASSERT(ei != NULL); 858 _DIAGASSERT(psenc != NULL); 859 _DIAGASSERT(s != NULL); 860 861 if (*s == NULL) { 862 _citrus_ISO2022_init_state(ei, psenc); 863 *nresult = _ENCODING_IS_STATE_DEPENDENT; 864 return 0; 865 } 866 s0 = *s; 867 c = 0; 868 chlenbak = psenc->chlen; 869 870 /* 871 * if we have something in buffer, use that. 872 * otherwise, skip here 873 */ 874 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 875 /* illgeal state */ 876 _citrus_ISO2022_init_state(ei, psenc); 877 goto encoding_error; 878 } 879 if (psenc->chlen == 0) 880 goto emptybuf; 881 882 /* buffer is not empty */ 883 p = psenc->ch; 884 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 885 if (n > 0) { 886 psenc->ch[psenc->chlen++] = *s0++; 887 n--; 888 } 889 890 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 891 &result, psenc); 892 c += result - p; 893 if (wchar != _ISO2022INVALID) { 894 if (psenc->chlen > c) 895 memmove(psenc->ch, result, psenc->chlen - c); 896 if (psenc->chlen < c) 897 psenc->chlen = 0; 898 else 899 psenc->chlen -= c; 900 goto output; 901 } 902 903 if (n == 0) { 904 if ((result - p) == psenc->chlen) 905 /* complete shift sequence. */ 906 psenc->chlen = 0; 907 goto restart; 908 } 909 910 p = result; 911 } 912 913 /* escape sequence too long? */ 914 goto encoding_error; 915 916 emptybuf: 917 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 918 if (wchar != _ISO2022INVALID) { 919 c += result - s0; 920 psenc->chlen = 0; 921 s0 = result; 922 goto output; 923 } 924 if (result > s0) { 925 c += (result - s0); 926 n -= (result - s0); 927 s0 = result; 928 if (n>0) 929 goto emptybuf; 930 /* complete shift sequence. */ 931 goto restart; 932 } 933 n += c; 934 if (n < sizeof(psenc->ch)) { 935 memcpy(psenc->ch, s0 - c, n); 936 psenc->chlen = n; 937 s0 = result; 938 goto restart; 939 } 940 941 /* escape sequence too long? */ 942 943 encoding_error: 944 psenc->chlen = 0; 945 *nresult = (size_t)-1; 946 return (EILSEQ); 947 948 output: 949 *s = s0; 950 if (pwc) 951 *pwc = wchar; 952 953 if (!wchar) 954 *nresult = 0; 955 else 956 *nresult = c - chlenbak; 957 958 return (0); 959 960 restart: 961 *s = s0; 962 *nresult = (size_t)-2; 963 964 return (0); 965 } 966 967 static int 968 recommendation(_ISO2022EncodingInfo * __restrict ei, 969 _ISO2022Charset * __restrict cs) 970 { 971 int i, j; 972 _ISO2022Charset *recommend; 973 974 _DIAGASSERT(ei != NULL); 975 _DIAGASSERT(cs != NULL); 976 977 /* first, try a exact match. */ 978 for (i = 0; i < 4; i++) { 979 recommend = ei->recommend[i]; 980 for (j = 0; j < ei->recommendsize[i]; j++) { 981 if (cs->type != recommend[j].type) 982 continue; 983 if (cs->final != recommend[j].final) 984 continue; 985 if (cs->interm != recommend[j].interm) 986 continue; 987 988 return i; 989 } 990 } 991 992 /* then, try a wildcard match over final char. */ 993 for (i = 0; i < 4; i++) { 994 recommend = ei->recommend[i]; 995 for (j = 0; j < ei->recommendsize[i]; j++) { 996 if (cs->type != recommend[j].type) 997 continue; 998 if (cs->final && (cs->final != recommend[j].final)) 999 continue; 1000 if (cs->interm && (cs->interm != recommend[j].interm)) 1001 continue; 1002 1003 return i; 1004 } 1005 } 1006 1007 /* there's no recommendation. make a guess. */ 1008 if (ei->maxcharset == 0) { 1009 return 0; 1010 } else { 1011 switch (cs->type) { 1012 case CS94: 1013 case CS94MULTI: 1014 return 0; 1015 case CS96: 1016 case CS96MULTI: 1017 return 1; 1018 } 1019 } 1020 return 0; 1021 } 1022 1023 static int 1024 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1025 char * __restrict string, size_t n, 1026 char ** __restrict result, 1027 _ISO2022State * __restrict psenc, 1028 size_t * __restrict nresult) 1029 { 1030 int i = 0; 1031 size_t len; 1032 _ISO2022Charset cs; 1033 char *p; 1034 char tmp[MB_LEN_MAX]; 1035 int target; 1036 u_char mask; 1037 int bit8; 1038 1039 _DIAGASSERT(ei != NULL); 1040 _DIAGASSERT(string != NULL); 1041 /* result may be NULL */ 1042 _DIAGASSERT(psenc != NULL); 1043 _DIAGASSERT(nresult != NULL); 1044 1045 if (isc0(wc & 0xff)) { 1046 /* go back to INIT0 or ASCII on control chars */ 1047 cs = ei->initg[0].final ? ei->initg[0] : ascii; 1048 } else if (isc1(wc & 0xff)) { 1049 /* go back to INIT1 or ISO-8859-1 on control chars */ 1050 cs = ei->initg[1].final ? ei->initg[1] : iso88591; 1051 } else if (!(wc & ~0xff)) { 1052 if (wc & 0x80) { 1053 /* special treatment for ISO-8859-1 */ 1054 cs = iso88591; 1055 } else { 1056 /* special treatment for ASCII */ 1057 cs = ascii; 1058 } 1059 } else { 1060 cs.final = (wc >> 24) & 0x7f; 1061 if ((wc >> 16) & 0x80) 1062 cs.interm = (wc >> 16) & 0x7f; 1063 else 1064 cs.interm = '\0'; 1065 if (wc & 0x80) 1066 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1067 else 1068 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1069 } 1070 target = recommendation(ei, &cs); 1071 p = tmp; 1072 bit8 = ei->flags & F_8BIT; 1073 1074 /* designate the charset onto the target plane(G0/1/2/3). */ 1075 if (psenc->g[target].type == cs.type 1076 && psenc->g[target].final == cs.final 1077 && psenc->g[target].interm == cs.interm) 1078 goto planeok; 1079 1080 *p++ = '\033'; 1081 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1082 *p++ = '$'; 1083 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1084 && !cs.interm && !(ei->flags & F_NOOLD)) 1085 ; 1086 else if (cs.type == CS94 || cs.type == CS94MULTI) 1087 *p++ = "()*+"[target]; 1088 else 1089 *p++ = ",-./"[target]; 1090 if (cs.interm) 1091 *p++ = cs.interm; 1092 *p++ = cs.final; 1093 1094 psenc->g[target].type = cs.type; 1095 psenc->g[target].final = cs.final; 1096 psenc->g[target].interm = cs.interm; 1097 1098 planeok: 1099 /* invoke the plane onto GL or GR. */ 1100 if (psenc->gl == target) 1101 goto sideok; 1102 if (bit8 && psenc->gr == target) 1103 goto sideok; 1104 1105 if (target == 0 && (ei->flags & F_LS0)) { 1106 *p++ = '\017'; 1107 psenc->gl = 0; 1108 } else if (target == 1 && (ei->flags & F_LS1)) { 1109 *p++ = '\016'; 1110 psenc->gl = 1; 1111 } else if (target == 2 && (ei->flags & F_LS2)) { 1112 *p++ = '\033'; 1113 *p++ = 'n'; 1114 psenc->gl = 2; 1115 } else if (target == 3 && (ei->flags & F_LS3)) { 1116 *p++ = '\033'; 1117 *p++ = 'o'; 1118 psenc->gl = 3; 1119 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1120 *p++ = '\033'; 1121 *p++ = '~'; 1122 psenc->gr = 1; 1123 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1124 *p++ = '\033'; 1125 /*{*/ 1126 *p++ = '}'; 1127 psenc->gr = 2; 1128 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1129 *p++ = '\033'; 1130 *p++ = '|'; 1131 psenc->gr = 3; 1132 } else if (target == 2 && (ei->flags & F_SS2)) { 1133 *p++ = '\033'; 1134 *p++ = 'N'; 1135 psenc->singlegl = 2; 1136 } else if (target == 3 && (ei->flags & F_SS3)) { 1137 *p++ = '\033'; 1138 *p++ = 'O'; 1139 psenc->singlegl = 3; 1140 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1141 *p++ = '\216'; 1142 *p++ = 'N'; 1143 psenc->singlegl = psenc->singlegr = 2; 1144 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1145 *p++ = '\217'; 1146 *p++ = 'O'; 1147 psenc->singlegl = psenc->singlegr = 3; 1148 } else 1149 goto ilseq; 1150 1151 sideok: 1152 if (psenc->singlegl == target) 1153 mask = 0x00; 1154 else if (psenc->singlegr == target) 1155 mask = 0x80; 1156 else if (psenc->gl == target) 1157 mask = 0x00; 1158 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1159 mask = 0x80; 1160 else 1161 goto ilseq; 1162 1163 switch (cs.type) { 1164 case CS94: 1165 case CS96: 1166 i = 1; 1167 break; 1168 case CS94MULTI: 1169 case CS96MULTI: 1170 i = !iscntl(wc & 0xff) ? 1171 (isthree(cs.final) ? 3 : 2) : 1; 1172 break; 1173 } 1174 while (i-- > 0) 1175 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1176 1177 /* reset single shift state */ 1178 psenc->singlegl = psenc->singlegr = -1; 1179 1180 len = (size_t)(p - tmp); 1181 if (n < len) { 1182 if (result) 1183 *result = (char *)0; 1184 *nresult = (size_t)-1; 1185 return E2BIG; 1186 } 1187 if (result) 1188 *result = string + len; 1189 memcpy(string, tmp, len); 1190 *nresult = len; 1191 1192 return 0; 1193 1194 ilseq: 1195 *nresult = (size_t)-1; 1196 return EILSEQ; 1197 } 1198 1199 static int 1200 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1201 char * __restrict s, size_t n, 1202 _ISO2022State * __restrict psenc, 1203 size_t * __restrict nresult) 1204 { 1205 char buf[MB_LEN_MAX]; 1206 char *result; 1207 int ret; 1208 size_t len; 1209 1210 _DIAGASSERT(ei != NULL); 1211 _DIAGASSERT(nresult != 0); 1212 _DIAGASSERT(s != NULL); 1213 1214 /* XXX state will be modified after this operation... */ 1215 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc, 1216 &len); 1217 if (ret) { 1218 *nresult = len; 1219 return ret; 1220 } 1221 1222 if (sizeof(buf) < len || n < len-1) { 1223 /* XXX should recover state? */ 1224 *nresult = (size_t)-1; 1225 return E2BIG; 1226 } 1227 1228 memcpy(s, buf, len-1); 1229 *nresult = len-1; 1230 return (0); 1231 } 1232 1233 static int 1234 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1235 char * __restrict s, size_t n, wchar_t wc, 1236 _ISO2022State * __restrict psenc, 1237 size_t * __restrict nresult) 1238 { 1239 char buf[MB_LEN_MAX]; 1240 char *result; 1241 int ret; 1242 size_t len; 1243 1244 _DIAGASSERT(ei != NULL); 1245 _DIAGASSERT(s != NULL); 1246 _DIAGASSERT(psenc != NULL); 1247 _DIAGASSERT(nresult != 0); 1248 1249 /* XXX state will be modified after this operation... */ 1250 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc, 1251 &len); 1252 if (ret) { 1253 *nresult = len; 1254 return ret; 1255 } 1256 1257 if (sizeof(buf) < len || n < len) { 1258 /* XXX should recover state? */ 1259 *nresult = (size_t)-1; 1260 return E2BIG; 1261 } 1262 1263 memcpy(s, buf, len); 1264 *nresult = len; 1265 return (0); 1266 } 1267 1268 static __inline int 1269 /*ARGSUSED*/ 1270 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1271 _csid_t * __restrict csid, 1272 _index_t * __restrict idx, wchar_t wc) 1273 { 1274 wchar_t m, nm; 1275 1276 _DIAGASSERT(csid != NULL && idx != NULL); 1277 1278 m = wc & 0x7FFF8080; 1279 nm = wc & 0x007F7F7F; 1280 if (m & 0x00800000) { 1281 nm &= 0x00007F7F; 1282 } else { 1283 m &= 0x7F008080; 1284 } 1285 if (nm & 0x007F0000) { 1286 /* ^3 mark */ 1287 m |= 0x007F0000; 1288 } else if (nm & 0x00007F00) { 1289 /* ^2 mark */ 1290 m |= 0x00007F00; 1291 } 1292 *csid = (_csid_t)m; 1293 *idx = (_index_t)nm; 1294 1295 return (0); 1296 } 1297 1298 static __inline int 1299 /*ARGSUSED*/ 1300 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1301 wchar_t * __restrict wc, 1302 _csid_t csid, _index_t idx) 1303 { 1304 1305 _DIAGASSERT(ei != NULL && wc != NULL); 1306 1307 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1308 1309 return (0); 1310 } 1311 1312 static __inline int 1313 /*ARGSUSED*/ 1314 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei, 1315 _ISO2022State * __restrict psenc, 1316 int * __restrict rstate) 1317 { 1318 1319 if (psenc->chlen == 0) { 1320 /* XXX: it should distinguish initial and stable. */ 1321 *rstate = _STDENC_SDGEN_STABLE; 1322 } else { 1323 if (psenc->ch[0] == '\033') 1324 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 1325 else 1326 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 1327 } 1328 1329 return 0; 1330 } 1331 1332 /* ---------------------------------------------------------------------- 1333 * public interface for ctype 1334 */ 1335 1336 _CITRUS_CTYPE_DECLS(ISO2022); 1337 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1338 1339 #include "citrus_ctype_template.h" 1340 1341 /* ---------------------------------------------------------------------- 1342 * public interface for stdenc 1343 */ 1344 1345 _CITRUS_STDENC_DECLS(ISO2022); 1346 _CITRUS_STDENC_DEF_OPS(ISO2022); 1347 1348 #include "citrus_stdenc_template.h" 1349