1 /* $NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_iso2022.h" 53 54 55 /* ---------------------------------------------------------------------- 56 * private stuffs used by templates 57 */ 58 59 60 /* 61 * wchar_t mappings: 62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 70 * 94x94 charset (ESC & V ESC $ ( F) 71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 74 */ 75 76 typedef struct { 77 u_char type; 78 #define CS94 (0U) 79 #define CS96 (1U) 80 #define CS94MULTI (2U) 81 #define CS96MULTI (3U) 82 83 u_char final; 84 u_char interm; 85 u_char vers; 86 } _ISO2022Charset; 87 88 typedef struct { 89 _ISO2022Charset g[4]; 90 /* need 3 bits to hold -1, 0, ..., 3 */ 91 int gl:3, 92 gr:3, 93 singlegl:3, 94 singlegr:3; 95 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 96 int chlen; 97 int flags; 98 #define _ISO2022STATE_FLAG_INITIALIZED 1 99 } _ISO2022State; 100 101 typedef struct { 102 _ISO2022Charset *recommend[4]; 103 size_t recommendsize[4]; 104 _ISO2022Charset initg[4]; 105 int maxcharset; 106 int flags; 107 #define F_8BIT 0x0001 108 #define F_NOOLD 0x0002 109 #define F_SI 0x0010 /*0F*/ 110 #define F_SO 0x0020 /*0E*/ 111 #define F_LS0 0x0010 /*0F*/ 112 #define F_LS1 0x0020 /*0E*/ 113 #define F_LS2 0x0040 /*ESC n*/ 114 #define F_LS3 0x0080 /*ESC o*/ 115 #define F_LS1R 0x0100 /*ESC ~*/ 116 #define F_LS2R 0x0200 /*ESC }*/ 117 #define F_LS3R 0x0400 /*ESC |*/ 118 #define F_SS2 0x0800 /*ESC N*/ 119 #define F_SS3 0x1000 /*ESC O*/ 120 #define F_SS2R 0x2000 /*8E*/ 121 #define F_SS3R 0x4000 /*8F*/ 122 } _ISO2022EncodingInfo; 123 typedef struct { 124 _ISO2022EncodingInfo ei; 125 struct { 126 /* for future multi-locale facility */ 127 _ISO2022State s_mblen; 128 _ISO2022State s_mbrlen; 129 _ISO2022State s_mbrtowc; 130 _ISO2022State s_mbtowc; 131 _ISO2022State s_mbsrtowcs; 132 _ISO2022State s_wcrtomb; 133 _ISO2022State s_wcsrtombs; 134 _ISO2022State s_wctomb; 135 } states; 136 } _ISO2022CTypeInfo; 137 138 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 139 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 140 141 #define _FUNCNAME(m) _citrus_ISO2022_##m 142 #define _ENCODING_INFO _ISO2022EncodingInfo 143 #define _CTYPE_INFO _ISO2022CTypeInfo 144 #define _ENCODING_STATE _ISO2022State 145 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 146 #define _ENCODING_IS_STATE_DEPENDENT 1 147 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 148 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 149 150 151 #define _ISO2022INVALID (wchar_t)-1 152 153 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 154 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 155 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 156 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 157 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 158 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 159 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 160 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 161 162 static __inline int 163 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 164 { 165 166 _DIAGASSERT(p != NULL); 167 _DIAGASSERT(cs != NULL); 168 169 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 170 cs->final = (u_char)(p[3] & 0xff); 171 cs->interm = '\0'; 172 cs->vers = '\0'; 173 cs->type = CS94MULTI; 174 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 175 cs->final = (u_char)(p[3] & 0xff); 176 cs->interm = '\0'; 177 cs->vers = '\0'; 178 cs->type = CS96MULTI; 179 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 180 cs->final = (u_char)(p[2] & 0xff); 181 cs->interm = '\0'; 182 cs->vers = '\0'; 183 cs->type = CS94; 184 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 185 cs->final = (u_char )(p[2] & 0xff); 186 cs->interm = '\0'; 187 cs->vers = '\0'; 188 cs->type = CS96; 189 } else { 190 return 1; 191 } 192 193 return 0; 194 } 195 196 197 #define _NOTMATCH 0 198 #define _MATCH 1 199 #define _PARSEFAIL 2 200 201 static __inline int 202 get_recommend(_ISO2022EncodingInfo * __restrict ei, 203 const char * __restrict token) 204 { 205 int i; 206 _ISO2022Charset cs; 207 208 if (!strchr("0123", token[0]) || token[1] != '=') 209 return (_NOTMATCH); 210 211 if (getcs(&token[2], &cs) == 0) 212 ; 213 else if (!strcmp(&token[2], "94")) { 214 cs.final = (u_char)(token[4]); 215 cs.interm = '\0'; 216 cs.vers = '\0'; 217 cs.type = CS94; 218 } else if (!strcmp(&token[2], "96")) { 219 cs.final = (u_char)(token[4]); 220 cs.interm = '\0'; 221 cs.vers = '\0'; 222 cs.type = CS96; 223 } else if (!strcmp(&token[2], "94$")) { 224 cs.final = (u_char)(token[5]); 225 cs.interm = '\0'; 226 cs.vers = '\0'; 227 cs.type = CS94MULTI; 228 } else if (!strcmp(&token[2], "96$")) { 229 cs.final = (u_char)(token[5]); 230 cs.interm = '\0'; 231 cs.vers = '\0'; 232 cs.type = CS96MULTI; 233 } else { 234 return (_PARSEFAIL); 235 } 236 237 i = token[0] - '0'; 238 ei->recommendsize[i] += 1; 239 if (!ei->recommend[i]) { 240 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 241 } else { 242 ei->recommend[i] = 243 realloc(ei->recommend[i], 244 sizeof(_ISO2022Charset)* (ei->recommendsize[i])); 245 } 246 if (!ei->recommend[i]) 247 return (_PARSEFAIL); 248 249 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 250 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 251 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 252 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 253 254 return (_MATCH); 255 } 256 257 static __inline int 258 get_initg(_ISO2022EncodingInfo * __restrict ei, 259 const char * __restrict token) 260 { 261 _ISO2022Charset cs; 262 263 if (strncmp("INIT", &token[0], 4) || 264 !strchr("0123", token[4]) || 265 token[5] != '=') 266 return (_NOTMATCH); 267 268 if (getcs(&token[6], &cs) != 0) 269 return (_PARSEFAIL); 270 271 ei->initg[token[4] - '0'].type = cs.type; 272 ei->initg[token[4] - '0'].final = cs.final; 273 ei->initg[token[4] - '0'].interm = cs.interm; 274 ei->initg[token[4] - '0'].vers = cs.vers; 275 276 return (_MATCH); 277 } 278 279 static __inline int 280 get_max(_ISO2022EncodingInfo * __restrict ei, 281 const char * __restrict token) 282 { 283 if (!strcmp(token, "MAX1")) { 284 ei->maxcharset = 1; 285 } else if (!strcmp(token, "MAX2")) { 286 ei->maxcharset = 2; 287 } else if (!strcmp(token, "MAX3")) { 288 ei->maxcharset = 3; 289 } else 290 return (_NOTMATCH); 291 292 return (_MATCH); 293 } 294 295 296 static __inline int 297 get_flags(_ISO2022EncodingInfo * __restrict ei, 298 const char * __restrict token) 299 { 300 int i; 301 static struct { 302 const char *tag; 303 int flag; 304 } const tags[] = { 305 { "DUMMY", 0 }, 306 { "8BIT", F_8BIT }, 307 { "NOOLD", F_NOOLD }, 308 { "SI", F_SI }, 309 { "SO", F_SO }, 310 { "LS0", F_LS0 }, 311 { "LS1", F_LS1 }, 312 { "LS2", F_LS2 }, 313 { "LS3", F_LS3 }, 314 { "LS1R", F_LS1R }, 315 { "LS2R", F_LS2R }, 316 { "LS3R", F_LS3R }, 317 { "SS2", F_SS2 }, 318 { "SS3", F_SS3 }, 319 { "SS2R", F_SS2R }, 320 { "SS3R", F_SS3R }, 321 { NULL, 0 } 322 }; 323 324 for (i = 0; tags[i].tag; i++) { 325 if (!strcmp(token, tags[i].tag)) { 326 ei->flags |= tags[i].flag; 327 return (_MATCH); 328 } 329 } 330 331 return (_NOTMATCH); 332 } 333 334 335 static __inline int 336 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 337 const void * __restrict var, size_t lenvar) 338 { 339 char const *v, *e; 340 char buf[20]; 341 int i, len, ret; 342 343 _DIAGASSERT(ei != NULL); 344 345 346 /* 347 * parse VARIABLE section. 348 */ 349 350 if (!var) 351 return (EFTYPE); 352 353 v = (const char *) var; 354 355 /* initialize structure */ 356 ei->maxcharset = 0; 357 for (i = 0; i < 4; i++) { 358 ei->recommend[i] = NULL; 359 ei->recommendsize[i] = 0; 360 } 361 ei->flags = 0; 362 363 while (*v) { 364 while (*v == ' ' || *v == '\t') 365 ++v; 366 367 /* find the token */ 368 e = v; 369 while (*e && *e != ' ' && *e != '\t') 370 ++e; 371 372 len = e-v; 373 if (len == 0) 374 break; 375 if (len>=sizeof(buf)) 376 goto parsefail; 377 sprintf(buf, "%.*s", len, v); 378 379 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 380 ; 381 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 382 ; 383 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 384 ; 385 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 386 ; 387 else 388 ret = _PARSEFAIL; 389 if (ret==_PARSEFAIL) 390 goto parsefail; 391 v = e; 392 393 } 394 395 return (0); 396 397 parsefail: 398 free(ei->recommend[0]); 399 free(ei->recommend[1]); 400 free(ei->recommend[2]); 401 free(ei->recommend[3]); 402 403 return (EFTYPE); 404 } 405 406 static __inline void 407 /*ARGSUSED*/ 408 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 409 _ISO2022State * __restrict s) 410 { 411 int i; 412 413 memset(s, 0, sizeof(*s)); 414 s->gl = 0; 415 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 416 417 for (i = 0; i < 4; i++) { 418 if (ei->initg[i].final) { 419 s->g[i].type = ei->initg[i].type; 420 s->g[i].final = ei->initg[i].final; 421 s->g[i].interm = ei->initg[i].interm; 422 } 423 } 424 s->singlegl = s->singlegr = -1; 425 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 426 } 427 428 static __inline void 429 /*ARGSUSED*/ 430 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 431 void * __restrict pspriv, 432 const _ISO2022State * __restrict s) 433 { 434 memcpy(pspriv, (const void *)s, sizeof(*s)); 435 } 436 437 static __inline void 438 /*ARGSUSED*/ 439 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 440 _ISO2022State * __restrict s, 441 const void * __restrict pspriv) 442 { 443 memcpy((void *)s, pspriv, sizeof(*s)); 444 } 445 446 static int 447 /*ARGSUSED*/ 448 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 449 const void * __restrict var, 450 size_t lenvar) 451 { 452 453 _DIAGASSERT(ei != NULL); 454 455 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 456 } 457 458 static void 459 /*ARGSUSED*/ 460 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 461 { 462 } 463 464 #define ESC '\033' 465 #define ECMA -1 466 #define INTERM -2 467 #define OECMA -3 468 static struct seqtable { 469 int type; 470 int csoff; 471 int finaloff; 472 int intermoff; 473 int versoff; 474 int len; 475 int chars[10]; 476 } seqtable[] = { 477 /* G0 94MULTI special */ 478 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 479 /* G0 94MULTI special with version identification */ 480 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 481 /* G? 94 */ 482 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 483 /* G? 94 with 2nd intermediate char */ 484 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 485 /* G? 96 */ 486 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 487 /* G? 96 with 2nd intermediate char */ 488 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 489 /* G? 94MULTI */ 490 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 491 /* G? 96MULTI */ 492 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 493 /* G? 94MULTI with version specification */ 494 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 495 /* LS2/3 */ 496 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 497 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 498 /* LS1/2/3R */ 499 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 500 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 501 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 502 /* SS2/3 */ 503 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 504 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 505 /* end of records */ 506 { 0, } 507 }; 508 509 static int 510 seqmatch(const char * __restrict s, size_t n, 511 const struct seqtable * __restrict sp) 512 { 513 const int *p; 514 515 _DIAGASSERT(s != NULL); 516 _DIAGASSERT(sp != NULL); 517 518 p = sp->chars; 519 while (p - sp->chars < n && p - sp->chars < sp->len) { 520 switch (*p) { 521 case ECMA: 522 if (!isecma(*s)) 523 goto terminate; 524 break; 525 case OECMA: 526 if (*s && strchr("@AB", *s)) 527 break; 528 else 529 goto terminate; 530 case INTERM: 531 if (!isinterm(*s)) 532 goto terminate; 533 break; 534 case CS94: 535 if (*s && strchr("()*+", *s)) 536 break; 537 else 538 goto terminate; 539 case CS96: 540 if (*s && strchr(",-./", *s)) 541 break; 542 else 543 goto terminate; 544 default: 545 if (*s != *p) 546 goto terminate; 547 break; 548 } 549 550 p++; 551 s++; 552 } 553 554 terminate: 555 return p - sp->chars; 556 } 557 558 static wchar_t 559 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 560 const char * __restrict string, size_t n, 561 const char ** __restrict result, 562 _ISO2022State * __restrict psenc) 563 { 564 wchar_t wchar = 0; 565 int cur; 566 struct seqtable *sp; 567 int nmatch; 568 int i; 569 570 _DIAGASSERT(ei != NULL); 571 _DIAGASSERT(state != NULL); 572 _DIAGASSERT(string != NULL); 573 /* result may be NULL */ 574 575 while (1) { 576 /* SI/SO */ 577 if (1 <= n && string[0] == '\017') { 578 psenc->gl = 0; 579 string++; 580 n--; 581 continue; 582 } 583 if (1 <= n && string[0] == '\016') { 584 psenc->gl = 1; 585 string++; 586 n--; 587 continue; 588 } 589 590 /* SS2/3R */ 591 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 592 psenc->singlegl = psenc->singlegr = 593 (string[0] - '\216') + 2; 594 string++; 595 n--; 596 continue; 597 } 598 599 /* eat the letter if this is not ESC */ 600 if (1 <= n && string[0] != '\033') 601 break; 602 603 /* look for a perfect match from escape sequences */ 604 for (sp = &seqtable[0]; sp->len; sp++) { 605 nmatch = seqmatch(string, n, sp); 606 if (sp->len == nmatch && n >= sp->len) 607 break; 608 } 609 610 if (!sp->len) 611 goto notseq; 612 613 if (sp->type != -1) { 614 if (sp->csoff == -1) 615 i = 0; 616 else { 617 switch (sp->type) { 618 case CS94: 619 case CS94MULTI: 620 i = string[sp->csoff] - '('; 621 break; 622 case CS96: 623 case CS96MULTI: 624 i = string[sp->csoff] - ','; 625 break; 626 } 627 } 628 psenc->g[i].type = sp->type; 629 psenc->g[i].final = '\0'; 630 psenc->g[i].interm = '\0'; 631 psenc->g[i].vers = '\0'; 632 /* sp->finaloff must not be -1 */ 633 if (sp->finaloff != -1) 634 psenc->g[i].final = string[sp->finaloff]; 635 if (sp->intermoff != -1) 636 psenc->g[i].interm = string[sp->intermoff]; 637 if (sp->versoff != -1) 638 psenc->g[i].vers = string[sp->versoff]; 639 640 string += sp->len; 641 n -= sp->len; 642 continue; 643 } 644 645 /* LS2/3 */ 646 if (2 <= n && string[0] == '\033' 647 && string[1] && strchr("no", string[1])) { 648 psenc->gl = string[1] - 'n' + 2; 649 string += 2; 650 n -= 2; 651 continue; 652 } 653 654 /* LS1/2/3R */ 655 /* XXX: { for vi showmatch */ 656 if (2 <= n && string[0] == '\033' 657 && string[1] && strchr("~}|", string[1])) { 658 psenc->gr = 3 - (string[1] - '|'); 659 string += 2; 660 n -= 2; 661 continue; 662 } 663 664 /* SS2/3 */ 665 if (2 <= n && string[0] == '\033' 666 && string[1] && strchr("NO", string[1])) { 667 psenc->singlegl = (string[1] - 'N') + 2; 668 string += 2; 669 n -= 2; 670 continue; 671 } 672 673 notseq: 674 /* 675 * if we've got an unknown escape sequence, eat the ESC at the 676 * head. otherwise, wait till full escape sequence comes. 677 */ 678 for (sp = &seqtable[0]; sp->len; sp++) { 679 nmatch = seqmatch(string, n, sp); 680 if (!nmatch) 681 continue; 682 683 /* 684 * if we are in the middle of escape sequence, 685 * we still need to wait for more characters to come 686 */ 687 if (n < sp->len) { 688 if (nmatch == n) { 689 if (result) 690 *result = string; 691 return (_ISO2022INVALID); 692 } 693 } else { 694 if (nmatch == sp->len) { 695 /* this case should not happen */ 696 goto eat; 697 } 698 } 699 } 700 701 break; 702 } 703 704 eat: 705 /* no letter to eat */ 706 if (n < 1) { 707 if (result) 708 *result = string; 709 return (_ISO2022INVALID); 710 } 711 712 /* normal chars. always eat C0/C1 as is. */ 713 if (iscntl(*string & 0xff)) 714 cur = -1; 715 else if (*string & 0x80) { 716 cur = (psenc->singlegr == -1) 717 ? psenc->gr : psenc->singlegr; 718 } else { 719 cur = (psenc->singlegl == -1) 720 ? psenc->gl : psenc->singlegl; 721 } 722 723 if (cur == -1) { 724 asis: 725 wchar = *string++ & 0xff; 726 if (result) 727 *result = string; 728 /* reset single shift state */ 729 psenc->singlegr = psenc->singlegl = -1; 730 return wchar; 731 } 732 733 /* length error check */ 734 switch (psenc->g[cur].type) { 735 case CS94MULTI: 736 case CS96MULTI: 737 if (!isthree(psenc->g[cur].final)) { 738 if (2 <= n 739 && (string[0] & 0x80) == (string[1] & 0x80)) 740 break; 741 } else { 742 if (3 <= n 743 && (string[0] & 0x80) == (string[1] & 0x80) 744 && (string[0] & 0x80) == (string[2] & 0x80)) 745 break; 746 } 747 748 /* we still need to wait for more characters to come */ 749 if (result) 750 *result = string; 751 return (_ISO2022INVALID); 752 753 case CS94: 754 case CS96: 755 if (1 <= n) 756 break; 757 758 /* we still need to wait for more characters to come */ 759 if (result) 760 *result = string; 761 return (_ISO2022INVALID); 762 } 763 764 /* range check */ 765 switch (psenc->g[cur].type) { 766 case CS94: 767 if (!(is94(string[0] & 0x7f))) 768 goto asis; 769 case CS96: 770 if (!(is96(string[0] & 0x7f))) 771 goto asis; 772 break; 773 case CS94MULTI: 774 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 775 goto asis; 776 break; 777 case CS96MULTI: 778 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 779 goto asis; 780 break; 781 } 782 783 /* extract the character. */ 784 switch (psenc->g[cur].type) { 785 case CS94: 786 /* special case for ASCII. */ 787 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 788 wchar = *string++; 789 wchar &= 0x7f; 790 break; 791 } 792 wchar = psenc->g[cur].final; 793 wchar = (wchar << 8); 794 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 795 wchar = (wchar << 8); 796 wchar = (wchar << 8) | (*string++ & 0x7f); 797 break; 798 case CS96: 799 /* special case for ISO-8859-1. */ 800 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 801 wchar = *string++; 802 wchar &= 0x7f; 803 wchar |= 0x80; 804 break; 805 } 806 wchar = psenc->g[cur].final; 807 wchar = (wchar << 8); 808 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 809 wchar = (wchar << 8); 810 wchar = (wchar << 8) | (*string++ & 0x7f); 811 wchar |= 0x80; 812 break; 813 case CS94MULTI: 814 case CS96MULTI: 815 wchar = psenc->g[cur].final; 816 wchar = (wchar << 8); 817 if (isthree(psenc->g[cur].final)) 818 wchar |= (*string++ & 0x7f); 819 wchar = (wchar << 8) | (*string++ & 0x7f); 820 wchar = (wchar << 8) | (*string++ & 0x7f); 821 if (psenc->g[cur].type == CS96MULTI) 822 wchar |= 0x80; 823 break; 824 } 825 826 if (result) 827 *result = string; 828 /* reset single shift state */ 829 psenc->singlegr = psenc->singlegl = -1; 830 return wchar; 831 } 832 833 834 835 static int 836 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 837 wchar_t * __restrict pwc, 838 const char ** __restrict s, 839 size_t n, _ISO2022State * __restrict psenc, 840 size_t * __restrict nresult) 841 { 842 wchar_t wchar; 843 const char *s0, *p, *result; 844 int c; 845 int chlenbak; 846 847 _DIAGASSERT(nresult != 0); 848 _DIAGASSERT(ei != NULL); 849 _DIAGASSERT(psenc != NULL); 850 _DIAGASSERT(s != NULL); 851 852 s0 = *s; 853 c = 0; 854 chlenbak = psenc->chlen; 855 856 /* 857 * if we have something in buffer, use that. 858 * otherwise, skip here 859 */ 860 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 861 /* illgeal state */ 862 _citrus_ISO2022_init_state(ei, psenc); 863 goto encoding_error; 864 } 865 if (psenc->chlen == 0) 866 goto emptybuf; 867 868 /* buffer is not empty */ 869 p = psenc->ch; 870 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 871 if (n > 0) { 872 psenc->ch[psenc->chlen++] = *s0++; 873 n--; 874 } 875 876 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 877 &result, psenc); 878 if (wchar != _ISO2022INVALID) { 879 c += result - p; 880 if (psenc->chlen > c) 881 memmove(psenc->ch, result, psenc->chlen - c); 882 if (psenc->chlen < c) 883 psenc->chlen = 0; 884 else 885 psenc->chlen -= c; 886 goto output; 887 } 888 889 c += result - p; 890 p = result; 891 892 if (n == 0) 893 goto restart; 894 } 895 896 /* escape sequence too long? */ 897 goto encoding_error; 898 899 emptybuf: 900 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 901 if (wchar != _ISO2022INVALID) { 902 c += result - s0; 903 psenc->chlen = 0; 904 s0 = result; 905 goto output; 906 } 907 if (result > s0 && n > result - s0) { 908 c += (result - s0); 909 n -= (result - s0); 910 s0 = result; 911 goto emptybuf; 912 } 913 n += c; 914 if (n < sizeof(psenc->ch)) { 915 memcpy(psenc->ch, s0 - c, n); 916 psenc->chlen = n; 917 s0 = result; 918 goto restart; 919 } 920 921 /* escape sequence too long? */ 922 923 encoding_error: 924 psenc->chlen = 0; 925 *nresult = (size_t)-1; 926 return (EILSEQ); 927 928 output: 929 *s = s0; 930 if (pwc) 931 *pwc = wchar; 932 933 if (!wchar) 934 *nresult = 0; 935 else 936 *nresult = c - chlenbak; 937 938 return (0); 939 940 restart: 941 *s = s0; 942 *nresult = (size_t)-2; 943 944 return (0); 945 } 946 947 static int 948 recommendation(_ISO2022EncodingInfo * __restrict ei, 949 _ISO2022Charset * __restrict cs) 950 { 951 int i, j; 952 _ISO2022Charset *recommend; 953 954 _DIAGASSERT(ei != NULL); 955 _DIAGASSERT(cs != NULL); 956 957 /* first, try a exact match. */ 958 for (i = 0; i < 4; i++) { 959 recommend = ei->recommend[i]; 960 for (j = 0; j < ei->recommendsize[i]; j++) { 961 if (cs->type != recommend[j].type) 962 continue; 963 if (cs->final != recommend[j].final) 964 continue; 965 if (cs->interm != recommend[j].interm) 966 continue; 967 968 return i; 969 } 970 } 971 972 /* then, try a wildcard match over final char. */ 973 for (i = 0; i < 4; i++) { 974 recommend = ei->recommend[i]; 975 for (j = 0; j < ei->recommendsize[i]; j++) { 976 if (cs->type != recommend[j].type) 977 continue; 978 if (cs->final && (cs->final != recommend[j].final)) 979 continue; 980 if (cs->interm && (cs->interm != recommend[j].interm)) 981 continue; 982 983 return i; 984 } 985 } 986 987 /* there's no recommendation. make a guess. */ 988 if (ei->maxcharset == 0) { 989 return 0; 990 } else { 991 switch (cs->type) { 992 case CS94: 993 case CS94MULTI: 994 return 0; 995 case CS96: 996 case CS96MULTI: 997 return 1; 998 } 999 } 1000 return 0; 1001 } 1002 1003 static int 1004 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1005 char * __restrict string, size_t n, 1006 char ** __restrict result, 1007 _ISO2022State * __restrict psenc) 1008 { 1009 int i = 0, len; 1010 _ISO2022Charset cs; 1011 char *p; 1012 char tmp[MB_LEN_MAX]; 1013 int target; 1014 u_char mask; 1015 int bit8; 1016 1017 _DIAGASSERT(ei != NULL); 1018 _DIAGASSERT(string != NULL); 1019 /* result may be NULL */ 1020 /* state appears to be unused */ 1021 1022 if (iscntl(wc & 0xff)) { 1023 /* go back to ASCII on control chars */ 1024 cs.type = CS94; 1025 cs.final = 'B'; 1026 cs.interm = '\0'; 1027 } else if (!(wc & ~0xff)) { 1028 if (wc & 0x80) { 1029 /* special treatment for ISO-8859-1 */ 1030 cs.type = CS96; 1031 cs.final = 'A'; 1032 cs.interm = '\0'; 1033 } else { 1034 /* special treatment for ASCII */ 1035 cs.type = CS94; 1036 cs.final = 'B'; 1037 cs.interm = '\0'; 1038 } 1039 } else { 1040 cs.final = (wc >> 24) & 0x7f; 1041 if ((wc >> 16) & 0x80) 1042 cs.interm = (wc >> 16) & 0x7f; 1043 else 1044 cs.interm = '\0'; 1045 if (wc & 0x80) 1046 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1047 else 1048 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1049 } 1050 target = recommendation(ei, &cs); 1051 p = tmp; 1052 bit8 = ei->flags & F_8BIT; 1053 1054 /* designate the charset onto the target plane(G0/1/2/3). */ 1055 if (psenc->g[target].type == cs.type 1056 && psenc->g[target].final == cs.final 1057 && psenc->g[target].interm == cs.interm) 1058 goto planeok; 1059 1060 *p++ = '\033'; 1061 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1062 *p++ = '$'; 1063 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1064 && !cs.interm && !(ei->flags & F_NOOLD)) 1065 ; 1066 else if (cs.type == CS94 || cs.type == CS94MULTI) 1067 *p++ = "()*+"[target]; 1068 else 1069 *p++ = ",-./"[target]; 1070 if (cs.interm) 1071 *p++ = cs.interm; 1072 *p++ = cs.final; 1073 1074 psenc->g[target].type = cs.type; 1075 psenc->g[target].final = cs.final; 1076 psenc->g[target].interm = cs.interm; 1077 1078 planeok: 1079 /* invoke the plane onto GL or GR. */ 1080 if (psenc->gl == target) 1081 goto sideok; 1082 if (bit8 && psenc->gr == target) 1083 goto sideok; 1084 1085 if (target == 0 && (ei->flags & F_LS0)) { 1086 *p++ = '\017'; 1087 psenc->gl = 0; 1088 } else if (target == 1 && (ei->flags & F_LS1)) { 1089 *p++ = '\016'; 1090 psenc->gl = 1; 1091 } else if (target == 2 && (ei->flags & F_LS2)) { 1092 *p++ = '\033'; 1093 *p++ = 'n'; 1094 psenc->gl = 2; 1095 } else if (target == 3 && (ei->flags & F_LS3)) { 1096 *p++ = '\033'; 1097 *p++ = 'o'; 1098 psenc->gl = 3; 1099 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1100 *p++ = '\033'; 1101 *p++ = '~'; 1102 psenc->gr = 1; 1103 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1104 *p++ = '\033'; 1105 /*{*/ 1106 *p++ = '}'; 1107 psenc->gr = 2; 1108 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1109 *p++ = '\033'; 1110 *p++ = '|'; 1111 psenc->gr = 3; 1112 } else if (target == 2 && (ei->flags & F_SS2)) { 1113 *p++ = '\033'; 1114 *p++ = 'N'; 1115 psenc->singlegl = 2; 1116 } else if (target == 3 && (ei->flags & F_SS3)) { 1117 *p++ = '\033'; 1118 *p++ = 'O'; 1119 psenc->singlegl = 3; 1120 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1121 *p++ = '\216'; 1122 *p++ = 'N'; 1123 psenc->singlegl = psenc->singlegr = 2; 1124 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1125 *p++ = '\217'; 1126 *p++ = 'O'; 1127 psenc->singlegl = psenc->singlegr = 3; 1128 } else 1129 abort(); 1130 1131 sideok: 1132 if (psenc->singlegl == target) 1133 mask = 0x00; 1134 else if (psenc->singlegr == target) 1135 mask = 0x80; 1136 else if (psenc->gl == target) 1137 mask = 0x00; 1138 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1139 mask = 0x80; 1140 else 1141 abort(); 1142 1143 switch (cs.type) { 1144 case CS94: 1145 case CS96: 1146 i = 1; 1147 break; 1148 case CS94MULTI: 1149 case CS96MULTI: 1150 i = isthree(cs.final) ? 3 : 2; 1151 break; 1152 } 1153 if (wc != 0) 1154 while (i-- > 0) 1155 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1156 1157 /* reset single shift state */ 1158 psenc->singlegl = psenc->singlegr = -1; 1159 1160 len = p - tmp; 1161 if (n < len) { 1162 if (result) 1163 *result = (char *)0; 1164 } else { 1165 if (result) 1166 *result = string + len; 1167 memcpy(string, tmp, len); 1168 } 1169 return len; 1170 } 1171 1172 static int 1173 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1174 char * __restrict s, size_t n, wchar_t wc, 1175 _ISO2022State * __restrict psenc, 1176 size_t * __restrict nresult) 1177 { 1178 char buf[MB_LEN_MAX]; 1179 char *result; 1180 int len, ret; 1181 1182 _DIAGASSERT(ei != NULL); 1183 _DIAGASSERT(nresult != 0); 1184 _DIAGASSERT(s != NULL); 1185 1186 /* XXX state will be modified after this operation... */ 1187 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc); 1188 if (sizeof(buf) < len || n < len) { 1189 /* XXX should recover state? */ 1190 ret = E2BIG; 1191 goto err; 1192 } 1193 1194 memcpy(s, buf, len); 1195 *nresult = (size_t)len; 1196 return (0); 1197 1198 err: 1199 /* bound check failure */ 1200 *nresult = (size_t)-1; 1201 return ret; 1202 } 1203 1204 static __inline int 1205 /*ARGSUSED*/ 1206 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1207 _csid_t * __restrict csid, 1208 _index_t * __restrict idx, wchar_t wc) 1209 { 1210 wchar_t m, nm; 1211 1212 _DIAGASSERT(csid != NULL && idx != NULL); 1213 1214 m = wc & 0x7FFF8080; 1215 nm = wc & 0x007F7F7F; 1216 if (m & 0x00800000) { 1217 nm &= 0x00007F7F; 1218 } else { 1219 m &= 0x7F008080; 1220 } 1221 if (nm & 0x007F0000) { 1222 /* ^3 mark */ 1223 m |= 0x007F0000; 1224 } else if (nm & 0x00007F00) { 1225 /* ^2 mark */ 1226 m |= 0x00007F00; 1227 } 1228 *csid = (_csid_t)m; 1229 *idx = (_index_t)nm; 1230 1231 return (0); 1232 } 1233 1234 static __inline int 1235 /*ARGSUSED*/ 1236 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1237 wchar_t * __restrict wc, 1238 _csid_t csid, _index_t idx) 1239 { 1240 1241 _DIAGASSERT(ei != NULL && wc != NULL); 1242 1243 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1244 1245 return (0); 1246 } 1247 1248 /* ---------------------------------------------------------------------- 1249 * public interface for ctype 1250 */ 1251 1252 _CITRUS_CTYPE_DECLS(ISO2022); 1253 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1254 1255 #include "citrus_ctype_template.h" 1256 1257 /* ---------------------------------------------------------------------- 1258 * public interface for stdenc 1259 */ 1260 1261 _CITRUS_STDENC_DECLS(ISO2022); 1262 _CITRUS_STDENC_DEF_OPS(ISO2022); 1263 1264 #include "citrus_stdenc_template.h" 1265