1 /* $NetBSD: citrus_iso2022.c,v 1.16 2006/06/07 16:28:34 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.16 2006/06/07 16:28:34 tnozaki Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_iso2022.h" 53 54 55 /* ---------------------------------------------------------------------- 56 * private stuffs used by templates 57 */ 58 59 60 /* 61 * wchar_t mappings: 62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 70 * 94x94 charset (ESC & V ESC $ ( F) 71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 74 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 75 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 76 */ 77 78 typedef struct { 79 u_char type; 80 #define CS94 (0U) 81 #define CS96 (1U) 82 #define CS94MULTI (2U) 83 #define CS96MULTI (3U) 84 85 u_char final; 86 u_char interm; 87 u_char vers; 88 } _ISO2022Charset; 89 90 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 91 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 92 93 typedef struct { 94 _ISO2022Charset g[4]; 95 /* need 3 bits to hold -1, 0, ..., 3 */ 96 int gl:3, 97 gr:3, 98 singlegl:3, 99 singlegr:3; 100 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 101 int chlen; 102 int flags; 103 #define _ISO2022STATE_FLAG_INITIALIZED 1 104 } _ISO2022State; 105 106 typedef struct { 107 _ISO2022Charset *recommend[4]; 108 size_t recommendsize[4]; 109 _ISO2022Charset initg[4]; 110 int maxcharset; 111 int flags; 112 #define F_8BIT 0x0001 113 #define F_NOOLD 0x0002 114 #define F_SI 0x0010 /*0F*/ 115 #define F_SO 0x0020 /*0E*/ 116 #define F_LS0 0x0010 /*0F*/ 117 #define F_LS1 0x0020 /*0E*/ 118 #define F_LS2 0x0040 /*ESC n*/ 119 #define F_LS3 0x0080 /*ESC o*/ 120 #define F_LS1R 0x0100 /*ESC ~*/ 121 #define F_LS2R 0x0200 /*ESC }*/ 122 #define F_LS3R 0x0400 /*ESC |*/ 123 #define F_SS2 0x0800 /*ESC N*/ 124 #define F_SS3 0x1000 /*ESC O*/ 125 #define F_SS2R 0x2000 /*8E*/ 126 #define F_SS3R 0x4000 /*8F*/ 127 } _ISO2022EncodingInfo; 128 typedef struct { 129 _ISO2022EncodingInfo ei; 130 struct { 131 /* for future multi-locale facility */ 132 _ISO2022State s_mblen; 133 _ISO2022State s_mbrlen; 134 _ISO2022State s_mbrtowc; 135 _ISO2022State s_mbtowc; 136 _ISO2022State s_mbsrtowcs; 137 _ISO2022State s_wcrtomb; 138 _ISO2022State s_wcsrtombs; 139 _ISO2022State s_wctomb; 140 } states; 141 } _ISO2022CTypeInfo; 142 143 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 144 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 145 146 #define _FUNCNAME(m) _citrus_ISO2022_##m 147 #define _ENCODING_INFO _ISO2022EncodingInfo 148 #define _CTYPE_INFO _ISO2022CTypeInfo 149 #define _ENCODING_STATE _ISO2022State 150 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 151 #define _ENCODING_IS_STATE_DEPENDENT 1 152 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 153 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 154 155 156 #define _ISO2022INVALID (wchar_t)-1 157 158 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 159 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 160 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 161 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 162 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 163 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 164 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 165 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 166 167 static __inline int 168 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 169 { 170 171 _DIAGASSERT(p != NULL); 172 _DIAGASSERT(cs != NULL); 173 174 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 175 cs->final = (u_char)(p[3] & 0xff); 176 cs->interm = '\0'; 177 cs->vers = '\0'; 178 cs->type = CS94MULTI; 179 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 180 cs->final = (u_char)(p[3] & 0xff); 181 cs->interm = '\0'; 182 cs->vers = '\0'; 183 cs->type = CS96MULTI; 184 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 185 cs->final = (u_char)(p[2] & 0xff); 186 cs->interm = '\0'; 187 cs->vers = '\0'; 188 cs->type = CS94; 189 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 190 cs->final = (u_char )(p[2] & 0xff); 191 cs->interm = '\0'; 192 cs->vers = '\0'; 193 cs->type = CS96; 194 } else { 195 return 1; 196 } 197 198 return 0; 199 } 200 201 202 #define _NOTMATCH 0 203 #define _MATCH 1 204 #define _PARSEFAIL 2 205 206 static __inline int 207 get_recommend(_ISO2022EncodingInfo * __restrict ei, 208 const char * __restrict token) 209 { 210 int i; 211 _ISO2022Charset cs, *p; 212 213 if (!strchr("0123", token[0]) || token[1] != '=') 214 return (_NOTMATCH); 215 216 if (getcs(&token[2], &cs) == 0) 217 ; 218 else if (!strcmp(&token[2], "94")) { 219 cs.final = (u_char)(token[4]); 220 cs.interm = '\0'; 221 cs.vers = '\0'; 222 cs.type = CS94; 223 } else if (!strcmp(&token[2], "96")) { 224 cs.final = (u_char)(token[4]); 225 cs.interm = '\0'; 226 cs.vers = '\0'; 227 cs.type = CS96; 228 } else if (!strcmp(&token[2], "94$")) { 229 cs.final = (u_char)(token[5]); 230 cs.interm = '\0'; 231 cs.vers = '\0'; 232 cs.type = CS94MULTI; 233 } else if (!strcmp(&token[2], "96$")) { 234 cs.final = (u_char)(token[5]); 235 cs.interm = '\0'; 236 cs.vers = '\0'; 237 cs.type = CS96MULTI; 238 } else { 239 return (_PARSEFAIL); 240 } 241 242 i = token[0] - '0'; 243 if (!ei->recommend[i]) { 244 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 245 } else { 246 p = realloc(ei->recommend[i], 247 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 248 if (!p) 249 return (_PARSEFAIL); 250 ei->recommend[i] = p; 251 } 252 if (!ei->recommend[i]) 253 return (_PARSEFAIL); 254 ei->recommendsize[i]++; 255 256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 259 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 260 261 return (_MATCH); 262 } 263 264 static __inline int 265 get_initg(_ISO2022EncodingInfo * __restrict ei, 266 const char * __restrict token) 267 { 268 _ISO2022Charset cs; 269 270 if (strncmp("INIT", &token[0], 4) || 271 !strchr("0123", token[4]) || 272 token[5] != '=') 273 return (_NOTMATCH); 274 275 if (getcs(&token[6], &cs) != 0) 276 return (_PARSEFAIL); 277 278 ei->initg[token[4] - '0'].type = cs.type; 279 ei->initg[token[4] - '0'].final = cs.final; 280 ei->initg[token[4] - '0'].interm = cs.interm; 281 ei->initg[token[4] - '0'].vers = cs.vers; 282 283 return (_MATCH); 284 } 285 286 static __inline int 287 get_max(_ISO2022EncodingInfo * __restrict ei, 288 const char * __restrict token) 289 { 290 if (!strcmp(token, "MAX1")) { 291 ei->maxcharset = 1; 292 } else if (!strcmp(token, "MAX2")) { 293 ei->maxcharset = 2; 294 } else if (!strcmp(token, "MAX3")) { 295 ei->maxcharset = 3; 296 } else 297 return (_NOTMATCH); 298 299 return (_MATCH); 300 } 301 302 303 static __inline int 304 get_flags(_ISO2022EncodingInfo * __restrict ei, 305 const char * __restrict token) 306 { 307 int i; 308 static struct { 309 const char *tag; 310 int flag; 311 } const tags[] = { 312 { "DUMMY", 0 }, 313 { "8BIT", F_8BIT }, 314 { "NOOLD", F_NOOLD }, 315 { "SI", F_SI }, 316 { "SO", F_SO }, 317 { "LS0", F_LS0 }, 318 { "LS1", F_LS1 }, 319 { "LS2", F_LS2 }, 320 { "LS3", F_LS3 }, 321 { "LS1R", F_LS1R }, 322 { "LS2R", F_LS2R }, 323 { "LS3R", F_LS3R }, 324 { "SS2", F_SS2 }, 325 { "SS3", F_SS3 }, 326 { "SS2R", F_SS2R }, 327 { "SS3R", F_SS3R }, 328 { NULL, 0 } 329 }; 330 331 for (i = 0; tags[i].tag; i++) { 332 if (!strcmp(token, tags[i].tag)) { 333 ei->flags |= tags[i].flag; 334 return (_MATCH); 335 } 336 } 337 338 return (_NOTMATCH); 339 } 340 341 342 static __inline int 343 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 344 const void * __restrict var, size_t lenvar) 345 { 346 char const *v, *e; 347 char buf[20]; 348 int i, len, ret; 349 350 _DIAGASSERT(ei != NULL); 351 352 353 /* 354 * parse VARIABLE section. 355 */ 356 357 if (!var) 358 return (EFTYPE); 359 360 v = (const char *) var; 361 362 /* initialize structure */ 363 ei->maxcharset = 0; 364 for (i = 0; i < 4; i++) { 365 ei->recommend[i] = NULL; 366 ei->recommendsize[i] = 0; 367 } 368 ei->flags = 0; 369 370 while (*v) { 371 while (*v == ' ' || *v == '\t') 372 ++v; 373 374 /* find the token */ 375 e = v; 376 while (*e && *e != ' ' && *e != '\t') 377 ++e; 378 379 len = e-v; 380 if (len == 0) 381 break; 382 if (len>=sizeof(buf)) 383 goto parsefail; 384 snprintf(buf, sizeof(buf), "%.*s", len, v); 385 386 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 387 ; 388 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 389 ; 390 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 391 ; 392 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 393 ; 394 else 395 ret = _PARSEFAIL; 396 if (ret==_PARSEFAIL) 397 goto parsefail; 398 v = e; 399 400 } 401 402 return (0); 403 404 parsefail: 405 free(ei->recommend[0]); 406 free(ei->recommend[1]); 407 free(ei->recommend[2]); 408 free(ei->recommend[3]); 409 410 return (EFTYPE); 411 } 412 413 static __inline void 414 /*ARGSUSED*/ 415 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 416 _ISO2022State * __restrict s) 417 { 418 int i; 419 420 memset(s, 0, sizeof(*s)); 421 s->gl = 0; 422 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 423 424 for (i = 0; i < 4; i++) { 425 if (ei->initg[i].final) { 426 s->g[i].type = ei->initg[i].type; 427 s->g[i].final = ei->initg[i].final; 428 s->g[i].interm = ei->initg[i].interm; 429 } 430 } 431 s->singlegl = s->singlegr = -1; 432 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 433 } 434 435 static __inline void 436 /*ARGSUSED*/ 437 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 438 void * __restrict pspriv, 439 const _ISO2022State * __restrict s) 440 { 441 memcpy(pspriv, (const void *)s, sizeof(*s)); 442 } 443 444 static __inline void 445 /*ARGSUSED*/ 446 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 447 _ISO2022State * __restrict s, 448 const void * __restrict pspriv) 449 { 450 memcpy((void *)s, pspriv, sizeof(*s)); 451 } 452 453 static int 454 /*ARGSUSED*/ 455 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 456 const void * __restrict var, 457 size_t lenvar) 458 { 459 460 _DIAGASSERT(ei != NULL); 461 462 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 463 } 464 465 static void 466 /*ARGSUSED*/ 467 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 468 { 469 } 470 471 #define ESC '\033' 472 #define ECMA -1 473 #define INTERM -2 474 #define OECMA -3 475 static const struct seqtable { 476 int type; 477 int csoff; 478 int finaloff; 479 int intermoff; 480 int versoff; 481 int len; 482 int chars[10]; 483 } seqtable[] = { 484 /* G0 94MULTI special */ 485 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 486 /* G0 94MULTI special with version identification */ 487 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 488 /* G? 94 */ 489 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 490 /* G? 94 with 2nd intermediate char */ 491 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 492 /* G? 96 */ 493 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 494 /* G? 96 with 2nd intermediate char */ 495 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 496 /* G? 94MULTI */ 497 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 498 /* G? 96MULTI */ 499 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 500 /* G? 94MULTI with version specification */ 501 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 502 /* LS2/3 */ 503 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 504 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 505 /* LS1/2/3R */ 506 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 507 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 508 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 509 /* SS2/3 */ 510 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 511 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 512 /* end of records */ 513 { 0, } 514 }; 515 516 static int 517 seqmatch(const char * __restrict s, size_t n, 518 const struct seqtable * __restrict sp) 519 { 520 const int *p; 521 522 _DIAGASSERT(s != NULL); 523 _DIAGASSERT(sp != NULL); 524 525 p = sp->chars; 526 while (p - sp->chars < n && p - sp->chars < sp->len) { 527 switch (*p) { 528 case ECMA: 529 if (!isecma(*s)) 530 goto terminate; 531 break; 532 case OECMA: 533 if (*s && strchr("@AB", *s)) 534 break; 535 else 536 goto terminate; 537 case INTERM: 538 if (!isinterm(*s)) 539 goto terminate; 540 break; 541 case CS94: 542 if (*s && strchr("()*+", *s)) 543 break; 544 else 545 goto terminate; 546 case CS96: 547 if (*s && strchr(",-./", *s)) 548 break; 549 else 550 goto terminate; 551 default: 552 if (*s != *p) 553 goto terminate; 554 break; 555 } 556 557 p++; 558 s++; 559 } 560 561 terminate: 562 return p - sp->chars; 563 } 564 565 static wchar_t 566 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 567 const char * __restrict string, size_t n, 568 const char ** __restrict result, 569 _ISO2022State * __restrict psenc) 570 { 571 wchar_t wchar = 0; 572 int cur; 573 const struct seqtable *sp; 574 int nmatch; 575 int i; 576 577 _DIAGASSERT(ei != NULL); 578 _DIAGASSERT(psenc != NULL); 579 _DIAGASSERT(string != NULL); 580 /* result may be NULL */ 581 582 while (1) { 583 /* SI/SO */ 584 if (1 <= n && string[0] == '\017') { 585 psenc->gl = 0; 586 string++; 587 n--; 588 continue; 589 } 590 if (1 <= n && string[0] == '\016') { 591 psenc->gl = 1; 592 string++; 593 n--; 594 continue; 595 } 596 597 /* SS2/3R */ 598 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 599 psenc->singlegl = psenc->singlegr = 600 (string[0] - '\216') + 2; 601 string++; 602 n--; 603 continue; 604 } 605 606 /* eat the letter if this is not ESC */ 607 if (1 <= n && string[0] != '\033') 608 break; 609 610 /* look for a perfect match from escape sequences */ 611 for (sp = &seqtable[0]; sp->len; sp++) { 612 nmatch = seqmatch(string, n, sp); 613 if (sp->len == nmatch && n >= sp->len) 614 break; 615 } 616 617 if (!sp->len) 618 goto notseq; 619 620 if (sp->type != -1) { 621 if (sp->csoff == -1) 622 i = 0; 623 else { 624 switch (sp->type) { 625 case CS94: 626 case CS94MULTI: 627 i = string[sp->csoff] - '('; 628 break; 629 case CS96: 630 case CS96MULTI: 631 i = string[sp->csoff] - ','; 632 break; 633 default: 634 return (_ISO2022INVALID); 635 } 636 } 637 psenc->g[i].type = sp->type; 638 psenc->g[i].final = '\0'; 639 psenc->g[i].interm = '\0'; 640 psenc->g[i].vers = '\0'; 641 /* sp->finaloff must not be -1 */ 642 if (sp->finaloff != -1) 643 psenc->g[i].final = string[sp->finaloff]; 644 if (sp->intermoff != -1) 645 psenc->g[i].interm = string[sp->intermoff]; 646 if (sp->versoff != -1) 647 psenc->g[i].vers = string[sp->versoff]; 648 649 string += sp->len; 650 n -= sp->len; 651 continue; 652 } 653 654 /* LS2/3 */ 655 if (2 <= n && string[0] == '\033' 656 && string[1] && strchr("no", string[1])) { 657 psenc->gl = string[1] - 'n' + 2; 658 string += 2; 659 n -= 2; 660 continue; 661 } 662 663 /* LS1/2/3R */ 664 /* XXX: { for vi showmatch */ 665 if (2 <= n && string[0] == '\033' 666 && string[1] && strchr("~}|", string[1])) { 667 psenc->gr = 3 - (string[1] - '|'); 668 string += 2; 669 n -= 2; 670 continue; 671 } 672 673 /* SS2/3 */ 674 if (2 <= n && string[0] == '\033' 675 && string[1] && strchr("NO", string[1])) { 676 psenc->singlegl = (string[1] - 'N') + 2; 677 string += 2; 678 n -= 2; 679 continue; 680 } 681 682 notseq: 683 /* 684 * if we've got an unknown escape sequence, eat the ESC at the 685 * head. otherwise, wait till full escape sequence comes. 686 */ 687 for (sp = &seqtable[0]; sp->len; sp++) { 688 nmatch = seqmatch(string, n, sp); 689 if (!nmatch) 690 continue; 691 692 /* 693 * if we are in the middle of escape sequence, 694 * we still need to wait for more characters to come 695 */ 696 if (n < sp->len) { 697 if (nmatch == n) { 698 if (result) 699 *result = string; 700 return (_ISO2022INVALID); 701 } 702 } else { 703 if (nmatch == sp->len) { 704 /* this case should not happen */ 705 goto eat; 706 } 707 } 708 } 709 710 break; 711 } 712 713 eat: 714 /* no letter to eat */ 715 if (n < 1) { 716 if (result) 717 *result = string; 718 return (_ISO2022INVALID); 719 } 720 721 /* normal chars. always eat C0/C1 as is. */ 722 if (iscntl(*string & 0xff)) 723 cur = -1; 724 else if (*string & 0x80) { 725 cur = (psenc->singlegr == -1) 726 ? psenc->gr : psenc->singlegr; 727 } else { 728 cur = (psenc->singlegl == -1) 729 ? psenc->gl : psenc->singlegl; 730 } 731 732 if (cur == -1) { 733 asis: 734 wchar = *string++ & 0xff; 735 if (result) 736 *result = string; 737 /* reset single shift state */ 738 psenc->singlegr = psenc->singlegl = -1; 739 return wchar; 740 } 741 742 /* length error check */ 743 switch (psenc->g[cur].type) { 744 case CS94MULTI: 745 case CS96MULTI: 746 if (!isthree(psenc->g[cur].final)) { 747 if (2 <= n 748 && (string[0] & 0x80) == (string[1] & 0x80)) 749 break; 750 } else { 751 if (3 <= n 752 && (string[0] & 0x80) == (string[1] & 0x80) 753 && (string[0] & 0x80) == (string[2] & 0x80)) 754 break; 755 } 756 757 /* we still need to wait for more characters to come */ 758 if (result) 759 *result = string; 760 return (_ISO2022INVALID); 761 762 case CS94: 763 case CS96: 764 if (1 <= n) 765 break; 766 767 /* we still need to wait for more characters to come */ 768 if (result) 769 *result = string; 770 return (_ISO2022INVALID); 771 } 772 773 /* range check */ 774 switch (psenc->g[cur].type) { 775 case CS94: 776 if (!(is94(string[0] & 0x7f))) 777 goto asis; 778 case CS96: 779 if (!(is96(string[0] & 0x7f))) 780 goto asis; 781 break; 782 case CS94MULTI: 783 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 784 goto asis; 785 break; 786 case CS96MULTI: 787 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 788 goto asis; 789 break; 790 } 791 792 /* extract the character. */ 793 switch (psenc->g[cur].type) { 794 case CS94: 795 /* special case for ASCII. */ 796 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 797 wchar = *string++; 798 wchar &= 0x7f; 799 break; 800 } 801 wchar = psenc->g[cur].final; 802 wchar = (wchar << 8); 803 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 804 wchar = (wchar << 8); 805 wchar = (wchar << 8) | (*string++ & 0x7f); 806 break; 807 case CS96: 808 /* special case for ISO-8859-1. */ 809 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 810 wchar = *string++; 811 wchar &= 0x7f; 812 wchar |= 0x80; 813 break; 814 } 815 wchar = psenc->g[cur].final; 816 wchar = (wchar << 8); 817 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 818 wchar = (wchar << 8); 819 wchar = (wchar << 8) | (*string++ & 0x7f); 820 wchar |= 0x80; 821 break; 822 case CS94MULTI: 823 case CS96MULTI: 824 wchar = psenc->g[cur].final; 825 wchar = (wchar << 8); 826 if (isthree(psenc->g[cur].final)) 827 wchar |= (*string++ & 0x7f); 828 wchar = (wchar << 8) | (*string++ & 0x7f); 829 wchar = (wchar << 8) | (*string++ & 0x7f); 830 if (psenc->g[cur].type == CS96MULTI) 831 wchar |= 0x80; 832 break; 833 } 834 835 if (result) 836 *result = string; 837 /* reset single shift state */ 838 psenc->singlegr = psenc->singlegl = -1; 839 return wchar; 840 } 841 842 843 844 static int 845 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 846 wchar_t * __restrict pwc, 847 const char ** __restrict s, 848 size_t n, _ISO2022State * __restrict psenc, 849 size_t * __restrict nresult) 850 { 851 wchar_t wchar; 852 const char *s0, *p, *result; 853 int c; 854 int chlenbak; 855 856 _DIAGASSERT(nresult != 0); 857 _DIAGASSERT(ei != NULL); 858 _DIAGASSERT(psenc != NULL); 859 _DIAGASSERT(s != NULL); 860 861 s0 = *s; 862 c = 0; 863 chlenbak = psenc->chlen; 864 865 /* 866 * if we have something in buffer, use that. 867 * otherwise, skip here 868 */ 869 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 870 /* illgeal state */ 871 _citrus_ISO2022_init_state(ei, psenc); 872 goto encoding_error; 873 } 874 if (psenc->chlen == 0) 875 goto emptybuf; 876 877 /* buffer is not empty */ 878 p = psenc->ch; 879 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 880 if (n > 0) { 881 psenc->ch[psenc->chlen++] = *s0++; 882 n--; 883 } 884 885 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 886 &result, psenc); 887 c += result - p; 888 if (wchar != _ISO2022INVALID) { 889 if (psenc->chlen > c) 890 memmove(psenc->ch, result, psenc->chlen - c); 891 if (psenc->chlen < c) 892 psenc->chlen = 0; 893 else 894 psenc->chlen -= c; 895 goto output; 896 } 897 898 if (n == 0) { 899 if ((result - p) == psenc->chlen) 900 /* complete shift sequence. */ 901 psenc->chlen = 0; 902 goto restart; 903 } 904 905 p = result; 906 } 907 908 /* escape sequence too long? */ 909 goto encoding_error; 910 911 emptybuf: 912 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 913 if (wchar != _ISO2022INVALID) { 914 c += result - s0; 915 psenc->chlen = 0; 916 s0 = result; 917 goto output; 918 } 919 if (result > s0) { 920 c += (result - s0); 921 n -= (result - s0); 922 s0 = result; 923 if (n>0) 924 goto emptybuf; 925 /* complete shift sequence. */ 926 goto restart; 927 } 928 n += c; 929 if (n < sizeof(psenc->ch)) { 930 memcpy(psenc->ch, s0 - c, n); 931 psenc->chlen = n; 932 s0 = result; 933 goto restart; 934 } 935 936 /* escape sequence too long? */ 937 938 encoding_error: 939 psenc->chlen = 0; 940 *nresult = (size_t)-1; 941 return (EILSEQ); 942 943 output: 944 *s = s0; 945 if (pwc) 946 *pwc = wchar; 947 948 if (!wchar) 949 *nresult = 0; 950 else 951 *nresult = c - chlenbak; 952 953 return (0); 954 955 restart: 956 *s = s0; 957 *nresult = (size_t)-2; 958 959 return (0); 960 } 961 962 static int 963 recommendation(_ISO2022EncodingInfo * __restrict ei, 964 _ISO2022Charset * __restrict cs) 965 { 966 int i, j; 967 _ISO2022Charset *recommend; 968 969 _DIAGASSERT(ei != NULL); 970 _DIAGASSERT(cs != NULL); 971 972 /* first, try a exact match. */ 973 for (i = 0; i < 4; i++) { 974 recommend = ei->recommend[i]; 975 for (j = 0; j < ei->recommendsize[i]; j++) { 976 if (cs->type != recommend[j].type) 977 continue; 978 if (cs->final != recommend[j].final) 979 continue; 980 if (cs->interm != recommend[j].interm) 981 continue; 982 983 return i; 984 } 985 } 986 987 /* then, try a wildcard match over final char. */ 988 for (i = 0; i < 4; i++) { 989 recommend = ei->recommend[i]; 990 for (j = 0; j < ei->recommendsize[i]; j++) { 991 if (cs->type != recommend[j].type) 992 continue; 993 if (cs->final && (cs->final != recommend[j].final)) 994 continue; 995 if (cs->interm && (cs->interm != recommend[j].interm)) 996 continue; 997 998 return i; 999 } 1000 } 1001 1002 /* there's no recommendation. make a guess. */ 1003 if (ei->maxcharset == 0) { 1004 return 0; 1005 } else { 1006 switch (cs->type) { 1007 case CS94: 1008 case CS94MULTI: 1009 return 0; 1010 case CS96: 1011 case CS96MULTI: 1012 return 1; 1013 } 1014 } 1015 return 0; 1016 } 1017 1018 static int 1019 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1020 char * __restrict string, size_t n, 1021 char ** __restrict result, 1022 _ISO2022State * __restrict psenc) 1023 { 1024 int i = 0, len; 1025 _ISO2022Charset cs; 1026 char *p; 1027 char tmp[MB_LEN_MAX]; 1028 int target; 1029 u_char mask; 1030 int bit8; 1031 1032 _DIAGASSERT(ei != NULL); 1033 _DIAGASSERT(string != NULL); 1034 /* result may be NULL */ 1035 /* state appears to be unused */ 1036 1037 if (iscntl(wc & 0xff)) { 1038 /* go back to INIT0 or ASCII on control chars */ 1039 cs = ei->initg[0].final ? ei->initg[0] : ascii; 1040 } else if (!(wc & ~0xff)) { 1041 if (wc & 0x80) { 1042 /* special treatment for ISO-8859-1 */ 1043 cs = iso88591; 1044 } else { 1045 /* special treatment for ASCII */ 1046 cs = ascii; 1047 } 1048 } else { 1049 cs.final = (wc >> 24) & 0x7f; 1050 if ((wc >> 16) & 0x80) 1051 cs.interm = (wc >> 16) & 0x7f; 1052 else 1053 cs.interm = '\0'; 1054 if (wc & 0x80) 1055 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1056 else 1057 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1058 } 1059 target = recommendation(ei, &cs); 1060 p = tmp; 1061 bit8 = ei->flags & F_8BIT; 1062 1063 /* designate the charset onto the target plane(G0/1/2/3). */ 1064 if (psenc->g[target].type == cs.type 1065 && psenc->g[target].final == cs.final 1066 && psenc->g[target].interm == cs.interm) 1067 goto planeok; 1068 1069 *p++ = '\033'; 1070 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1071 *p++ = '$'; 1072 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1073 && !cs.interm && !(ei->flags & F_NOOLD)) 1074 ; 1075 else if (cs.type == CS94 || cs.type == CS94MULTI) 1076 *p++ = "()*+"[target]; 1077 else 1078 *p++ = ",-./"[target]; 1079 if (cs.interm) 1080 *p++ = cs.interm; 1081 *p++ = cs.final; 1082 1083 psenc->g[target].type = cs.type; 1084 psenc->g[target].final = cs.final; 1085 psenc->g[target].interm = cs.interm; 1086 1087 planeok: 1088 /* invoke the plane onto GL or GR. */ 1089 if (psenc->gl == target) 1090 goto sideok; 1091 if (bit8 && psenc->gr == target) 1092 goto sideok; 1093 1094 if (target == 0 && (ei->flags & F_LS0)) { 1095 *p++ = '\017'; 1096 psenc->gl = 0; 1097 } else if (target == 1 && (ei->flags & F_LS1)) { 1098 *p++ = '\016'; 1099 psenc->gl = 1; 1100 } else if (target == 2 && (ei->flags & F_LS2)) { 1101 *p++ = '\033'; 1102 *p++ = 'n'; 1103 psenc->gl = 2; 1104 } else if (target == 3 && (ei->flags & F_LS3)) { 1105 *p++ = '\033'; 1106 *p++ = 'o'; 1107 psenc->gl = 3; 1108 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1109 *p++ = '\033'; 1110 *p++ = '~'; 1111 psenc->gr = 1; 1112 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1113 *p++ = '\033'; 1114 /*{*/ 1115 *p++ = '}'; 1116 psenc->gr = 2; 1117 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1118 *p++ = '\033'; 1119 *p++ = '|'; 1120 psenc->gr = 3; 1121 } else if (target == 2 && (ei->flags & F_SS2)) { 1122 *p++ = '\033'; 1123 *p++ = 'N'; 1124 psenc->singlegl = 2; 1125 } else if (target == 3 && (ei->flags & F_SS3)) { 1126 *p++ = '\033'; 1127 *p++ = 'O'; 1128 psenc->singlegl = 3; 1129 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1130 *p++ = '\216'; 1131 *p++ = 'N'; 1132 psenc->singlegl = psenc->singlegr = 2; 1133 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1134 *p++ = '\217'; 1135 *p++ = 'O'; 1136 psenc->singlegl = psenc->singlegr = 3; 1137 } else 1138 abort(); 1139 1140 sideok: 1141 if (psenc->singlegl == target) 1142 mask = 0x00; 1143 else if (psenc->singlegr == target) 1144 mask = 0x80; 1145 else if (psenc->gl == target) 1146 mask = 0x00; 1147 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1148 mask = 0x80; 1149 else 1150 abort(); 1151 1152 switch (cs.type) { 1153 case CS94: 1154 case CS96: 1155 i = 1; 1156 break; 1157 case CS94MULTI: 1158 case CS96MULTI: 1159 i = !iscntl(wc & 0xff) ? 1160 (isthree(cs.final) ? 3 : 2) : 1; 1161 break; 1162 } 1163 while (i-- > 0) 1164 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1165 1166 /* reset single shift state */ 1167 psenc->singlegl = psenc->singlegr = -1; 1168 1169 len = p - tmp; 1170 if (n < len) { 1171 if (result) 1172 *result = (char *)0; 1173 } else { 1174 if (result) 1175 *result = string + len; 1176 memcpy(string, tmp, len); 1177 } 1178 return len; 1179 } 1180 1181 static int 1182 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1183 char * __restrict s, size_t n, 1184 _ISO2022State * __restrict psenc, 1185 size_t * __restrict nresult) 1186 { 1187 char buf[MB_LEN_MAX]; 1188 char *result; 1189 int len, ret; 1190 1191 _DIAGASSERT(ei != NULL); 1192 _DIAGASSERT(nresult != 0); 1193 _DIAGASSERT(s != NULL); 1194 1195 /* XXX state will be modified after this operation... */ 1196 len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc); 1197 if (len==0) { 1198 ret = EINVAL; 1199 goto err; 1200 } 1201 if (sizeof(buf) < len || n < len-1) { 1202 /* XXX should recover state? */ 1203 ret = E2BIG; 1204 goto err; 1205 } 1206 1207 memcpy(s, buf, len-1); 1208 *nresult = (size_t)(len-1); 1209 return (0); 1210 1211 err: 1212 /* bound check failure */ 1213 *nresult = (size_t)-1; 1214 return ret; 1215 } 1216 1217 static int 1218 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1219 char * __restrict s, size_t n, wchar_t wc, 1220 _ISO2022State * __restrict psenc, 1221 size_t * __restrict nresult) 1222 { 1223 char buf[MB_LEN_MAX]; 1224 char *result; 1225 int len, ret; 1226 1227 _DIAGASSERT(ei != NULL); 1228 _DIAGASSERT(nresult != 0); 1229 _DIAGASSERT(s != NULL); 1230 1231 /* XXX state will be modified after this operation... */ 1232 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc); 1233 if (sizeof(buf) < len || n < len) { 1234 /* XXX should recover state? */ 1235 ret = E2BIG; 1236 goto err; 1237 } 1238 1239 memcpy(s, buf, len); 1240 *nresult = (size_t)len; 1241 return (0); 1242 1243 err: 1244 /* bound check failure */ 1245 *nresult = (size_t)-1; 1246 return ret; 1247 } 1248 1249 static __inline int 1250 /*ARGSUSED*/ 1251 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1252 _csid_t * __restrict csid, 1253 _index_t * __restrict idx, wchar_t wc) 1254 { 1255 wchar_t m, nm; 1256 1257 _DIAGASSERT(csid != NULL && idx != NULL); 1258 1259 m = wc & 0x7FFF8080; 1260 nm = wc & 0x007F7F7F; 1261 if (m & 0x00800000) { 1262 nm &= 0x00007F7F; 1263 } else { 1264 m &= 0x7F008080; 1265 } 1266 if (nm & 0x007F0000) { 1267 /* ^3 mark */ 1268 m |= 0x007F0000; 1269 } else if (nm & 0x00007F00) { 1270 /* ^2 mark */ 1271 m |= 0x00007F00; 1272 } 1273 *csid = (_csid_t)m; 1274 *idx = (_index_t)nm; 1275 1276 return (0); 1277 } 1278 1279 static __inline int 1280 /*ARGSUSED*/ 1281 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1282 wchar_t * __restrict wc, 1283 _csid_t csid, _index_t idx) 1284 { 1285 1286 _DIAGASSERT(ei != NULL && wc != NULL); 1287 1288 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1289 1290 return (0); 1291 } 1292 1293 static __inline int 1294 /*ARGSUSED*/ 1295 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei, 1296 _ISO2022State * __restrict psenc, 1297 int * __restrict rstate) 1298 { 1299 1300 if (psenc->chlen == 0) { 1301 /* XXX: it should distinguish initial and stable. */ 1302 *rstate = _STDENC_SDGEN_STABLE; 1303 } else { 1304 if (psenc->ch[0] == '\033') 1305 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 1306 else 1307 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 1308 } 1309 1310 return 0; 1311 } 1312 1313 /* ---------------------------------------------------------------------- 1314 * public interface for ctype 1315 */ 1316 1317 _CITRUS_CTYPE_DECLS(ISO2022); 1318 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1319 1320 #include "citrus_ctype_template.h" 1321 1322 /* ---------------------------------------------------------------------- 1323 * public interface for stdenc 1324 */ 1325 1326 _CITRUS_STDENC_DECLS(ISO2022); 1327 _CITRUS_STDENC_DEF_OPS(ISO2022); 1328 1329 #include "citrus_stdenc_template.h" 1330