1 /* $NetBSD: citrus_iso2022.c,v 1.12 2004/12/21 11:25:43 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.12 2004/12/21 11:25:43 yamt Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_iso2022.h" 53 54 55 /* ---------------------------------------------------------------------- 56 * private stuffs used by templates 57 */ 58 59 60 /* 61 * wchar_t mappings: 62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 70 * 94x94 charset (ESC & V ESC $ ( F) 71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 74 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 75 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 76 */ 77 78 typedef struct { 79 u_char type; 80 #define CS94 (0U) 81 #define CS96 (1U) 82 #define CS94MULTI (2U) 83 #define CS96MULTI (3U) 84 85 u_char final; 86 u_char interm; 87 u_char vers; 88 } _ISO2022Charset; 89 90 typedef struct { 91 _ISO2022Charset g[4]; 92 /* need 3 bits to hold -1, 0, ..., 3 */ 93 int gl:3, 94 gr:3, 95 singlegl:3, 96 singlegr:3; 97 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 98 int chlen; 99 int flags; 100 #define _ISO2022STATE_FLAG_INITIALIZED 1 101 } _ISO2022State; 102 103 typedef struct { 104 _ISO2022Charset *recommend[4]; 105 size_t recommendsize[4]; 106 _ISO2022Charset initg[4]; 107 int maxcharset; 108 int flags; 109 #define F_8BIT 0x0001 110 #define F_NOOLD 0x0002 111 #define F_SI 0x0010 /*0F*/ 112 #define F_SO 0x0020 /*0E*/ 113 #define F_LS0 0x0010 /*0F*/ 114 #define F_LS1 0x0020 /*0E*/ 115 #define F_LS2 0x0040 /*ESC n*/ 116 #define F_LS3 0x0080 /*ESC o*/ 117 #define F_LS1R 0x0100 /*ESC ~*/ 118 #define F_LS2R 0x0200 /*ESC }*/ 119 #define F_LS3R 0x0400 /*ESC |*/ 120 #define F_SS2 0x0800 /*ESC N*/ 121 #define F_SS3 0x1000 /*ESC O*/ 122 #define F_SS2R 0x2000 /*8E*/ 123 #define F_SS3R 0x4000 /*8F*/ 124 } _ISO2022EncodingInfo; 125 typedef struct { 126 _ISO2022EncodingInfo ei; 127 struct { 128 /* for future multi-locale facility */ 129 _ISO2022State s_mblen; 130 _ISO2022State s_mbrlen; 131 _ISO2022State s_mbrtowc; 132 _ISO2022State s_mbtowc; 133 _ISO2022State s_mbsrtowcs; 134 _ISO2022State s_wcrtomb; 135 _ISO2022State s_wcsrtombs; 136 _ISO2022State s_wctomb; 137 } states; 138 } _ISO2022CTypeInfo; 139 140 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 141 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 142 143 #define _FUNCNAME(m) _citrus_ISO2022_##m 144 #define _ENCODING_INFO _ISO2022EncodingInfo 145 #define _CTYPE_INFO _ISO2022CTypeInfo 146 #define _ENCODING_STATE _ISO2022State 147 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 148 #define _ENCODING_IS_STATE_DEPENDENT 1 149 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 150 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 151 152 153 #define _ISO2022INVALID (wchar_t)-1 154 155 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 156 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 157 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 158 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 159 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 160 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 161 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 162 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 163 164 static __inline int 165 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 166 { 167 168 _DIAGASSERT(p != NULL); 169 _DIAGASSERT(cs != NULL); 170 171 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 172 cs->final = (u_char)(p[3] & 0xff); 173 cs->interm = '\0'; 174 cs->vers = '\0'; 175 cs->type = CS94MULTI; 176 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 177 cs->final = (u_char)(p[3] & 0xff); 178 cs->interm = '\0'; 179 cs->vers = '\0'; 180 cs->type = CS96MULTI; 181 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 182 cs->final = (u_char)(p[2] & 0xff); 183 cs->interm = '\0'; 184 cs->vers = '\0'; 185 cs->type = CS94; 186 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 187 cs->final = (u_char )(p[2] & 0xff); 188 cs->interm = '\0'; 189 cs->vers = '\0'; 190 cs->type = CS96; 191 } else { 192 return 1; 193 } 194 195 return 0; 196 } 197 198 199 #define _NOTMATCH 0 200 #define _MATCH 1 201 #define _PARSEFAIL 2 202 203 static __inline int 204 get_recommend(_ISO2022EncodingInfo * __restrict ei, 205 const char * __restrict token) 206 { 207 int i; 208 _ISO2022Charset cs, *p; 209 210 if (!strchr("0123", token[0]) || token[1] != '=') 211 return (_NOTMATCH); 212 213 if (getcs(&token[2], &cs) == 0) 214 ; 215 else if (!strcmp(&token[2], "94")) { 216 cs.final = (u_char)(token[4]); 217 cs.interm = '\0'; 218 cs.vers = '\0'; 219 cs.type = CS94; 220 } else if (!strcmp(&token[2], "96")) { 221 cs.final = (u_char)(token[4]); 222 cs.interm = '\0'; 223 cs.vers = '\0'; 224 cs.type = CS96; 225 } else if (!strcmp(&token[2], "94$")) { 226 cs.final = (u_char)(token[5]); 227 cs.interm = '\0'; 228 cs.vers = '\0'; 229 cs.type = CS94MULTI; 230 } else if (!strcmp(&token[2], "96$")) { 231 cs.final = (u_char)(token[5]); 232 cs.interm = '\0'; 233 cs.vers = '\0'; 234 cs.type = CS96MULTI; 235 } else { 236 return (_PARSEFAIL); 237 } 238 239 i = token[0] - '0'; 240 if (!ei->recommend[i]) { 241 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 242 } else { 243 p = realloc(ei->recommend[i], 244 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 245 if (!p) 246 return (_PARSEFAIL); 247 ei->recommend[i] = p; 248 } 249 if (!ei->recommend[i]) 250 return (_PARSEFAIL); 251 ei->recommendsize[i]++; 252 253 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 254 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 255 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 257 258 return (_MATCH); 259 } 260 261 static __inline int 262 get_initg(_ISO2022EncodingInfo * __restrict ei, 263 const char * __restrict token) 264 { 265 _ISO2022Charset cs; 266 267 if (strncmp("INIT", &token[0], 4) || 268 !strchr("0123", token[4]) || 269 token[5] != '=') 270 return (_NOTMATCH); 271 272 if (getcs(&token[6], &cs) != 0) 273 return (_PARSEFAIL); 274 275 ei->initg[token[4] - '0'].type = cs.type; 276 ei->initg[token[4] - '0'].final = cs.final; 277 ei->initg[token[4] - '0'].interm = cs.interm; 278 ei->initg[token[4] - '0'].vers = cs.vers; 279 280 return (_MATCH); 281 } 282 283 static __inline int 284 get_max(_ISO2022EncodingInfo * __restrict ei, 285 const char * __restrict token) 286 { 287 if (!strcmp(token, "MAX1")) { 288 ei->maxcharset = 1; 289 } else if (!strcmp(token, "MAX2")) { 290 ei->maxcharset = 2; 291 } else if (!strcmp(token, "MAX3")) { 292 ei->maxcharset = 3; 293 } else 294 return (_NOTMATCH); 295 296 return (_MATCH); 297 } 298 299 300 static __inline int 301 get_flags(_ISO2022EncodingInfo * __restrict ei, 302 const char * __restrict token) 303 { 304 int i; 305 static struct { 306 const char *tag; 307 int flag; 308 } const tags[] = { 309 { "DUMMY", 0 }, 310 { "8BIT", F_8BIT }, 311 { "NOOLD", F_NOOLD }, 312 { "SI", F_SI }, 313 { "SO", F_SO }, 314 { "LS0", F_LS0 }, 315 { "LS1", F_LS1 }, 316 { "LS2", F_LS2 }, 317 { "LS3", F_LS3 }, 318 { "LS1R", F_LS1R }, 319 { "LS2R", F_LS2R }, 320 { "LS3R", F_LS3R }, 321 { "SS2", F_SS2 }, 322 { "SS3", F_SS3 }, 323 { "SS2R", F_SS2R }, 324 { "SS3R", F_SS3R }, 325 { NULL, 0 } 326 }; 327 328 for (i = 0; tags[i].tag; i++) { 329 if (!strcmp(token, tags[i].tag)) { 330 ei->flags |= tags[i].flag; 331 return (_MATCH); 332 } 333 } 334 335 return (_NOTMATCH); 336 } 337 338 339 static __inline int 340 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 341 const void * __restrict var, size_t lenvar) 342 { 343 char const *v, *e; 344 char buf[20]; 345 int i, len, ret; 346 347 _DIAGASSERT(ei != NULL); 348 349 350 /* 351 * parse VARIABLE section. 352 */ 353 354 if (!var) 355 return (EFTYPE); 356 357 v = (const char *) var; 358 359 /* initialize structure */ 360 ei->maxcharset = 0; 361 for (i = 0; i < 4; i++) { 362 ei->recommend[i] = NULL; 363 ei->recommendsize[i] = 0; 364 } 365 ei->flags = 0; 366 367 while (*v) { 368 while (*v == ' ' || *v == '\t') 369 ++v; 370 371 /* find the token */ 372 e = v; 373 while (*e && *e != ' ' && *e != '\t') 374 ++e; 375 376 len = e-v; 377 if (len == 0) 378 break; 379 if (len>=sizeof(buf)) 380 goto parsefail; 381 snprintf(buf, sizeof(buf), "%.*s", len, v); 382 383 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 384 ; 385 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 386 ; 387 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 388 ; 389 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 390 ; 391 else 392 ret = _PARSEFAIL; 393 if (ret==_PARSEFAIL) 394 goto parsefail; 395 v = e; 396 397 } 398 399 return (0); 400 401 parsefail: 402 free(ei->recommend[0]); 403 free(ei->recommend[1]); 404 free(ei->recommend[2]); 405 free(ei->recommend[3]); 406 407 return (EFTYPE); 408 } 409 410 static __inline void 411 /*ARGSUSED*/ 412 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 413 _ISO2022State * __restrict s) 414 { 415 int i; 416 417 memset(s, 0, sizeof(*s)); 418 s->gl = 0; 419 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 420 421 for (i = 0; i < 4; i++) { 422 if (ei->initg[i].final) { 423 s->g[i].type = ei->initg[i].type; 424 s->g[i].final = ei->initg[i].final; 425 s->g[i].interm = ei->initg[i].interm; 426 } 427 } 428 s->singlegl = s->singlegr = -1; 429 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 430 } 431 432 static __inline void 433 /*ARGSUSED*/ 434 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 435 void * __restrict pspriv, 436 const _ISO2022State * __restrict s) 437 { 438 memcpy(pspriv, (const void *)s, sizeof(*s)); 439 } 440 441 static __inline void 442 /*ARGSUSED*/ 443 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 444 _ISO2022State * __restrict s, 445 const void * __restrict pspriv) 446 { 447 memcpy((void *)s, pspriv, sizeof(*s)); 448 } 449 450 static int 451 /*ARGSUSED*/ 452 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 453 const void * __restrict var, 454 size_t lenvar) 455 { 456 457 _DIAGASSERT(ei != NULL); 458 459 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 460 } 461 462 static void 463 /*ARGSUSED*/ 464 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 465 { 466 } 467 468 #define ESC '\033' 469 #define ECMA -1 470 #define INTERM -2 471 #define OECMA -3 472 static const struct seqtable { 473 int type; 474 int csoff; 475 int finaloff; 476 int intermoff; 477 int versoff; 478 int len; 479 int chars[10]; 480 } seqtable[] = { 481 /* G0 94MULTI special */ 482 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 483 /* G0 94MULTI special with version identification */ 484 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 485 /* G? 94 */ 486 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 487 /* G? 94 with 2nd intermediate char */ 488 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 489 /* G? 96 */ 490 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 491 /* G? 96 with 2nd intermediate char */ 492 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 493 /* G? 94MULTI */ 494 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 495 /* G? 96MULTI */ 496 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 497 /* G? 94MULTI with version specification */ 498 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 499 /* LS2/3 */ 500 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 501 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 502 /* LS1/2/3R */ 503 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 504 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 505 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 506 /* SS2/3 */ 507 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 508 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 509 /* end of records */ 510 { 0, } 511 }; 512 513 static int 514 seqmatch(const char * __restrict s, size_t n, 515 const struct seqtable * __restrict sp) 516 { 517 const int *p; 518 519 _DIAGASSERT(s != NULL); 520 _DIAGASSERT(sp != NULL); 521 522 p = sp->chars; 523 while (p - sp->chars < n && p - sp->chars < sp->len) { 524 switch (*p) { 525 case ECMA: 526 if (!isecma(*s)) 527 goto terminate; 528 break; 529 case OECMA: 530 if (*s && strchr("@AB", *s)) 531 break; 532 else 533 goto terminate; 534 case INTERM: 535 if (!isinterm(*s)) 536 goto terminate; 537 break; 538 case CS94: 539 if (*s && strchr("()*+", *s)) 540 break; 541 else 542 goto terminate; 543 case CS96: 544 if (*s && strchr(",-./", *s)) 545 break; 546 else 547 goto terminate; 548 default: 549 if (*s != *p) 550 goto terminate; 551 break; 552 } 553 554 p++; 555 s++; 556 } 557 558 terminate: 559 return p - sp->chars; 560 } 561 562 static wchar_t 563 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 564 const char * __restrict string, size_t n, 565 const char ** __restrict result, 566 _ISO2022State * __restrict psenc) 567 { 568 wchar_t wchar = 0; 569 int cur; 570 const struct seqtable *sp; 571 int nmatch; 572 int i; 573 574 _DIAGASSERT(ei != NULL); 575 _DIAGASSERT(state != NULL); 576 _DIAGASSERT(string != NULL); 577 /* result may be NULL */ 578 579 while (1) { 580 /* SI/SO */ 581 if (1 <= n && string[0] == '\017') { 582 psenc->gl = 0; 583 string++; 584 n--; 585 continue; 586 } 587 if (1 <= n && string[0] == '\016') { 588 psenc->gl = 1; 589 string++; 590 n--; 591 continue; 592 } 593 594 /* SS2/3R */ 595 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 596 psenc->singlegl = psenc->singlegr = 597 (string[0] - '\216') + 2; 598 string++; 599 n--; 600 continue; 601 } 602 603 /* eat the letter if this is not ESC */ 604 if (1 <= n && string[0] != '\033') 605 break; 606 607 /* look for a perfect match from escape sequences */ 608 for (sp = &seqtable[0]; sp->len; sp++) { 609 nmatch = seqmatch(string, n, sp); 610 if (sp->len == nmatch && n >= sp->len) 611 break; 612 } 613 614 if (!sp->len) 615 goto notseq; 616 617 if (sp->type != -1) { 618 if (sp->csoff == -1) 619 i = 0; 620 else { 621 switch (sp->type) { 622 case CS94: 623 case CS94MULTI: 624 i = string[sp->csoff] - '('; 625 break; 626 case CS96: 627 case CS96MULTI: 628 i = string[sp->csoff] - ','; 629 break; 630 } 631 } 632 psenc->g[i].type = sp->type; 633 psenc->g[i].final = '\0'; 634 psenc->g[i].interm = '\0'; 635 psenc->g[i].vers = '\0'; 636 /* sp->finaloff must not be -1 */ 637 if (sp->finaloff != -1) 638 psenc->g[i].final = string[sp->finaloff]; 639 if (sp->intermoff != -1) 640 psenc->g[i].interm = string[sp->intermoff]; 641 if (sp->versoff != -1) 642 psenc->g[i].vers = string[sp->versoff]; 643 644 string += sp->len; 645 n -= sp->len; 646 continue; 647 } 648 649 /* LS2/3 */ 650 if (2 <= n && string[0] == '\033' 651 && string[1] && strchr("no", string[1])) { 652 psenc->gl = string[1] - 'n' + 2; 653 string += 2; 654 n -= 2; 655 continue; 656 } 657 658 /* LS1/2/3R */ 659 /* XXX: { for vi showmatch */ 660 if (2 <= n && string[0] == '\033' 661 && string[1] && strchr("~}|", string[1])) { 662 psenc->gr = 3 - (string[1] - '|'); 663 string += 2; 664 n -= 2; 665 continue; 666 } 667 668 /* SS2/3 */ 669 if (2 <= n && string[0] == '\033' 670 && string[1] && strchr("NO", string[1])) { 671 psenc->singlegl = (string[1] - 'N') + 2; 672 string += 2; 673 n -= 2; 674 continue; 675 } 676 677 notseq: 678 /* 679 * if we've got an unknown escape sequence, eat the ESC at the 680 * head. otherwise, wait till full escape sequence comes. 681 */ 682 for (sp = &seqtable[0]; sp->len; sp++) { 683 nmatch = seqmatch(string, n, sp); 684 if (!nmatch) 685 continue; 686 687 /* 688 * if we are in the middle of escape sequence, 689 * we still need to wait for more characters to come 690 */ 691 if (n < sp->len) { 692 if (nmatch == n) { 693 if (result) 694 *result = string; 695 return (_ISO2022INVALID); 696 } 697 } else { 698 if (nmatch == sp->len) { 699 /* this case should not happen */ 700 goto eat; 701 } 702 } 703 } 704 705 break; 706 } 707 708 eat: 709 /* no letter to eat */ 710 if (n < 1) { 711 if (result) 712 *result = string; 713 return (_ISO2022INVALID); 714 } 715 716 /* normal chars. always eat C0/C1 as is. */ 717 if (iscntl(*string & 0xff)) 718 cur = -1; 719 else if (*string & 0x80) { 720 cur = (psenc->singlegr == -1) 721 ? psenc->gr : psenc->singlegr; 722 } else { 723 cur = (psenc->singlegl == -1) 724 ? psenc->gl : psenc->singlegl; 725 } 726 727 if (cur == -1) { 728 asis: 729 wchar = *string++ & 0xff; 730 if (result) 731 *result = string; 732 /* reset single shift state */ 733 psenc->singlegr = psenc->singlegl = -1; 734 return wchar; 735 } 736 737 /* length error check */ 738 switch (psenc->g[cur].type) { 739 case CS94MULTI: 740 case CS96MULTI: 741 if (!isthree(psenc->g[cur].final)) { 742 if (2 <= n 743 && (string[0] & 0x80) == (string[1] & 0x80)) 744 break; 745 } else { 746 if (3 <= n 747 && (string[0] & 0x80) == (string[1] & 0x80) 748 && (string[0] & 0x80) == (string[2] & 0x80)) 749 break; 750 } 751 752 /* we still need to wait for more characters to come */ 753 if (result) 754 *result = string; 755 return (_ISO2022INVALID); 756 757 case CS94: 758 case CS96: 759 if (1 <= n) 760 break; 761 762 /* we still need to wait for more characters to come */ 763 if (result) 764 *result = string; 765 return (_ISO2022INVALID); 766 } 767 768 /* range check */ 769 switch (psenc->g[cur].type) { 770 case CS94: 771 if (!(is94(string[0] & 0x7f))) 772 goto asis; 773 case CS96: 774 if (!(is96(string[0] & 0x7f))) 775 goto asis; 776 break; 777 case CS94MULTI: 778 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 779 goto asis; 780 break; 781 case CS96MULTI: 782 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 783 goto asis; 784 break; 785 } 786 787 /* extract the character. */ 788 switch (psenc->g[cur].type) { 789 case CS94: 790 /* special case for ASCII. */ 791 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 792 wchar = *string++; 793 wchar &= 0x7f; 794 break; 795 } 796 wchar = psenc->g[cur].final; 797 wchar = (wchar << 8); 798 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 799 wchar = (wchar << 8); 800 wchar = (wchar << 8) | (*string++ & 0x7f); 801 break; 802 case CS96: 803 /* special case for ISO-8859-1. */ 804 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 805 wchar = *string++; 806 wchar &= 0x7f; 807 wchar |= 0x80; 808 break; 809 } 810 wchar = psenc->g[cur].final; 811 wchar = (wchar << 8); 812 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 813 wchar = (wchar << 8); 814 wchar = (wchar << 8) | (*string++ & 0x7f); 815 wchar |= 0x80; 816 break; 817 case CS94MULTI: 818 case CS96MULTI: 819 wchar = psenc->g[cur].final; 820 wchar = (wchar << 8); 821 if (isthree(psenc->g[cur].final)) 822 wchar |= (*string++ & 0x7f); 823 wchar = (wchar << 8) | (*string++ & 0x7f); 824 wchar = (wchar << 8) | (*string++ & 0x7f); 825 if (psenc->g[cur].type == CS96MULTI) 826 wchar |= 0x80; 827 break; 828 } 829 830 if (result) 831 *result = string; 832 /* reset single shift state */ 833 psenc->singlegr = psenc->singlegl = -1; 834 return wchar; 835 } 836 837 838 839 static int 840 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 841 wchar_t * __restrict pwc, 842 const char ** __restrict s, 843 size_t n, _ISO2022State * __restrict psenc, 844 size_t * __restrict nresult) 845 { 846 wchar_t wchar; 847 const char *s0, *p, *result; 848 int c; 849 int chlenbak; 850 851 _DIAGASSERT(nresult != 0); 852 _DIAGASSERT(ei != NULL); 853 _DIAGASSERT(psenc != NULL); 854 _DIAGASSERT(s != NULL); 855 856 s0 = *s; 857 c = 0; 858 chlenbak = psenc->chlen; 859 860 /* 861 * if we have something in buffer, use that. 862 * otherwise, skip here 863 */ 864 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 865 /* illgeal state */ 866 _citrus_ISO2022_init_state(ei, psenc); 867 goto encoding_error; 868 } 869 if (psenc->chlen == 0) 870 goto emptybuf; 871 872 /* buffer is not empty */ 873 p = psenc->ch; 874 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 875 if (n > 0) { 876 psenc->ch[psenc->chlen++] = *s0++; 877 n--; 878 } 879 880 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 881 &result, psenc); 882 if (wchar != _ISO2022INVALID) { 883 c += result - p; 884 if (psenc->chlen > c) 885 memmove(psenc->ch, result, psenc->chlen - c); 886 if (psenc->chlen < c) 887 psenc->chlen = 0; 888 else 889 psenc->chlen -= c; 890 goto output; 891 } 892 893 c += result - p; 894 p = result; 895 896 if (n == 0) 897 goto restart; 898 } 899 900 /* escape sequence too long? */ 901 goto encoding_error; 902 903 emptybuf: 904 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 905 if (wchar != _ISO2022INVALID) { 906 c += result - s0; 907 psenc->chlen = 0; 908 s0 = result; 909 goto output; 910 } 911 if (result > s0 && n > result - s0) { 912 c += (result - s0); 913 n -= (result - s0); 914 s0 = result; 915 goto emptybuf; 916 } 917 n += c; 918 if (n < sizeof(psenc->ch)) { 919 memcpy(psenc->ch, s0 - c, n); 920 psenc->chlen = n; 921 s0 = result; 922 goto restart; 923 } 924 925 /* escape sequence too long? */ 926 927 encoding_error: 928 psenc->chlen = 0; 929 *nresult = (size_t)-1; 930 return (EILSEQ); 931 932 output: 933 *s = s0; 934 if (pwc) 935 *pwc = wchar; 936 937 if (!wchar) 938 *nresult = 0; 939 else 940 *nresult = c - chlenbak; 941 942 return (0); 943 944 restart: 945 *s = s0; 946 *nresult = (size_t)-2; 947 948 return (0); 949 } 950 951 static int 952 recommendation(_ISO2022EncodingInfo * __restrict ei, 953 _ISO2022Charset * __restrict cs) 954 { 955 int i, j; 956 _ISO2022Charset *recommend; 957 958 _DIAGASSERT(ei != NULL); 959 _DIAGASSERT(cs != NULL); 960 961 /* first, try a exact match. */ 962 for (i = 0; i < 4; i++) { 963 recommend = ei->recommend[i]; 964 for (j = 0; j < ei->recommendsize[i]; j++) { 965 if (cs->type != recommend[j].type) 966 continue; 967 if (cs->final != recommend[j].final) 968 continue; 969 if (cs->interm != recommend[j].interm) 970 continue; 971 972 return i; 973 } 974 } 975 976 /* then, try a wildcard match over final char. */ 977 for (i = 0; i < 4; i++) { 978 recommend = ei->recommend[i]; 979 for (j = 0; j < ei->recommendsize[i]; j++) { 980 if (cs->type != recommend[j].type) 981 continue; 982 if (cs->final && (cs->final != recommend[j].final)) 983 continue; 984 if (cs->interm && (cs->interm != recommend[j].interm)) 985 continue; 986 987 return i; 988 } 989 } 990 991 /* there's no recommendation. make a guess. */ 992 if (ei->maxcharset == 0) { 993 return 0; 994 } else { 995 switch (cs->type) { 996 case CS94: 997 case CS94MULTI: 998 return 0; 999 case CS96: 1000 case CS96MULTI: 1001 return 1; 1002 } 1003 } 1004 return 0; 1005 } 1006 1007 static int 1008 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1009 char * __restrict string, size_t n, 1010 char ** __restrict result, 1011 _ISO2022State * __restrict psenc) 1012 { 1013 int i = 0, len; 1014 _ISO2022Charset cs; 1015 char *p; 1016 char tmp[MB_LEN_MAX]; 1017 int target; 1018 u_char mask; 1019 int bit8; 1020 1021 _DIAGASSERT(ei != NULL); 1022 _DIAGASSERT(string != NULL); 1023 /* result may be NULL */ 1024 /* state appears to be unused */ 1025 1026 if (iscntl(wc & 0xff)) { 1027 /* go back to ASCII on control chars */ 1028 cs.type = CS94; 1029 cs.final = 'B'; 1030 cs.interm = '\0'; 1031 } else if (!(wc & ~0xff)) { 1032 if (wc & 0x80) { 1033 /* special treatment for ISO-8859-1 */ 1034 cs.type = CS96; 1035 cs.final = 'A'; 1036 cs.interm = '\0'; 1037 } else { 1038 /* special treatment for ASCII */ 1039 cs.type = CS94; 1040 cs.final = 'B'; 1041 cs.interm = '\0'; 1042 } 1043 } else { 1044 cs.final = (wc >> 24) & 0x7f; 1045 if ((wc >> 16) & 0x80) 1046 cs.interm = (wc >> 16) & 0x7f; 1047 else 1048 cs.interm = '\0'; 1049 if (wc & 0x80) 1050 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1051 else 1052 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1053 } 1054 target = recommendation(ei, &cs); 1055 p = tmp; 1056 bit8 = ei->flags & F_8BIT; 1057 1058 /* designate the charset onto the target plane(G0/1/2/3). */ 1059 if (psenc->g[target].type == cs.type 1060 && psenc->g[target].final == cs.final 1061 && psenc->g[target].interm == cs.interm) 1062 goto planeok; 1063 1064 *p++ = '\033'; 1065 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1066 *p++ = '$'; 1067 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1068 && !cs.interm && !(ei->flags & F_NOOLD)) 1069 ; 1070 else if (cs.type == CS94 || cs.type == CS94MULTI) 1071 *p++ = "()*+"[target]; 1072 else 1073 *p++ = ",-./"[target]; 1074 if (cs.interm) 1075 *p++ = cs.interm; 1076 *p++ = cs.final; 1077 1078 psenc->g[target].type = cs.type; 1079 psenc->g[target].final = cs.final; 1080 psenc->g[target].interm = cs.interm; 1081 1082 planeok: 1083 /* invoke the plane onto GL or GR. */ 1084 if (psenc->gl == target) 1085 goto sideok; 1086 if (bit8 && psenc->gr == target) 1087 goto sideok; 1088 1089 if (target == 0 && (ei->flags & F_LS0)) { 1090 *p++ = '\017'; 1091 psenc->gl = 0; 1092 } else if (target == 1 && (ei->flags & F_LS1)) { 1093 *p++ = '\016'; 1094 psenc->gl = 1; 1095 } else if (target == 2 && (ei->flags & F_LS2)) { 1096 *p++ = '\033'; 1097 *p++ = 'n'; 1098 psenc->gl = 2; 1099 } else if (target == 3 && (ei->flags & F_LS3)) { 1100 *p++ = '\033'; 1101 *p++ = 'o'; 1102 psenc->gl = 3; 1103 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1104 *p++ = '\033'; 1105 *p++ = '~'; 1106 psenc->gr = 1; 1107 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1108 *p++ = '\033'; 1109 /*{*/ 1110 *p++ = '}'; 1111 psenc->gr = 2; 1112 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1113 *p++ = '\033'; 1114 *p++ = '|'; 1115 psenc->gr = 3; 1116 } else if (target == 2 && (ei->flags & F_SS2)) { 1117 *p++ = '\033'; 1118 *p++ = 'N'; 1119 psenc->singlegl = 2; 1120 } else if (target == 3 && (ei->flags & F_SS3)) { 1121 *p++ = '\033'; 1122 *p++ = 'O'; 1123 psenc->singlegl = 3; 1124 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1125 *p++ = '\216'; 1126 *p++ = 'N'; 1127 psenc->singlegl = psenc->singlegr = 2; 1128 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1129 *p++ = '\217'; 1130 *p++ = 'O'; 1131 psenc->singlegl = psenc->singlegr = 3; 1132 } else 1133 abort(); 1134 1135 sideok: 1136 if (psenc->singlegl == target) 1137 mask = 0x00; 1138 else if (psenc->singlegr == target) 1139 mask = 0x80; 1140 else if (psenc->gl == target) 1141 mask = 0x00; 1142 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1143 mask = 0x80; 1144 else 1145 abort(); 1146 1147 switch (cs.type) { 1148 case CS94: 1149 case CS96: 1150 i = 1; 1151 break; 1152 case CS94MULTI: 1153 case CS96MULTI: 1154 i = isthree(cs.final) ? 3 : 2; 1155 break; 1156 } 1157 while (i-- > 0) 1158 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1159 1160 /* reset single shift state */ 1161 psenc->singlegl = psenc->singlegr = -1; 1162 1163 len = p - tmp; 1164 if (n < len) { 1165 if (result) 1166 *result = (char *)0; 1167 } else { 1168 if (result) 1169 *result = string + len; 1170 memcpy(string, tmp, len); 1171 } 1172 return len; 1173 } 1174 1175 static int 1176 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1177 char * __restrict s, size_t n, 1178 _ISO2022State * __restrict psenc, 1179 size_t * __restrict nresult) 1180 { 1181 char buf[MB_LEN_MAX]; 1182 char *result; 1183 int len, ret; 1184 1185 _DIAGASSERT(ei != NULL); 1186 _DIAGASSERT(nresult != 0); 1187 _DIAGASSERT(s != NULL); 1188 1189 /* XXX state will be modified after this operation... */ 1190 len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc); 1191 if (len==0) { 1192 ret = EINVAL; 1193 goto err; 1194 } 1195 if (sizeof(buf) < len || n < len-1) { 1196 /* XXX should recover state? */ 1197 ret = E2BIG; 1198 goto err; 1199 } 1200 1201 memcpy(s, buf, len-1); 1202 *nresult = (size_t)(len-1); 1203 return (0); 1204 1205 err: 1206 /* bound check failure */ 1207 *nresult = (size_t)-1; 1208 return ret; 1209 } 1210 1211 static int 1212 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1213 char * __restrict s, size_t n, wchar_t wc, 1214 _ISO2022State * __restrict psenc, 1215 size_t * __restrict nresult) 1216 { 1217 char buf[MB_LEN_MAX]; 1218 char *result; 1219 int len, ret; 1220 1221 _DIAGASSERT(ei != NULL); 1222 _DIAGASSERT(nresult != 0); 1223 _DIAGASSERT(s != NULL); 1224 1225 /* XXX state will be modified after this operation... */ 1226 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc); 1227 if (sizeof(buf) < len || n < len) { 1228 /* XXX should recover state? */ 1229 ret = E2BIG; 1230 goto err; 1231 } 1232 1233 memcpy(s, buf, len); 1234 *nresult = (size_t)len; 1235 return (0); 1236 1237 err: 1238 /* bound check failure */ 1239 *nresult = (size_t)-1; 1240 return ret; 1241 } 1242 1243 static __inline int 1244 /*ARGSUSED*/ 1245 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1246 _csid_t * __restrict csid, 1247 _index_t * __restrict idx, wchar_t wc) 1248 { 1249 wchar_t m, nm; 1250 1251 _DIAGASSERT(csid != NULL && idx != NULL); 1252 1253 m = wc & 0x7FFF8080; 1254 nm = wc & 0x007F7F7F; 1255 if (m & 0x00800000) { 1256 nm &= 0x00007F7F; 1257 } else { 1258 m &= 0x7F008080; 1259 } 1260 if (nm & 0x007F0000) { 1261 /* ^3 mark */ 1262 m |= 0x007F0000; 1263 } else if (nm & 0x00007F00) { 1264 /* ^2 mark */ 1265 m |= 0x00007F00; 1266 } 1267 *csid = (_csid_t)m; 1268 *idx = (_index_t)nm; 1269 1270 return (0); 1271 } 1272 1273 static __inline int 1274 /*ARGSUSED*/ 1275 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1276 wchar_t * __restrict wc, 1277 _csid_t csid, _index_t idx) 1278 { 1279 1280 _DIAGASSERT(ei != NULL && wc != NULL); 1281 1282 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1283 1284 return (0); 1285 } 1286 1287 /* ---------------------------------------------------------------------- 1288 * public interface for ctype 1289 */ 1290 1291 _CITRUS_CTYPE_DECLS(ISO2022); 1292 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1293 1294 #include "citrus_ctype_template.h" 1295 1296 /* ---------------------------------------------------------------------- 1297 * public interface for stdenc 1298 */ 1299 1300 _CITRUS_STDENC_DECLS(ISO2022); 1301 _CITRUS_STDENC_DEF_OPS(ISO2022); 1302 1303 #include "citrus_stdenc_template.h" 1304