1 /* $NetBSD: citrus_iso2022.c,v 1.20 2010/12/07 22:01:45 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.20 2010/12/07 22:01:45 joerg Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <wchar.h> 43 #include <sys/types.h> 44 #include <limits.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_module.h" 49 #include "citrus_ctype.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_iso2022.h" 52 53 54 /* ---------------------------------------------------------------------- 55 * private stuffs used by templates 56 */ 57 58 59 /* 60 * wchar_t mappings: 61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 69 * 94x94 charset (ESC & V ESC $ ( F) 70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 75 */ 76 77 typedef struct { 78 u_char type; 79 #define CS94 (0U) 80 #define CS96 (1U) 81 #define CS94MULTI (2U) 82 #define CS96MULTI (3U) 83 84 u_char final; 85 u_char interm; 86 u_char vers; 87 } _ISO2022Charset; 88 89 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 90 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 91 92 typedef struct { 93 _ISO2022Charset g[4]; 94 /* need 3 bits to hold -1, 0, ..., 3 */ 95 int gl:3, 96 gr:3, 97 singlegl:3, 98 singlegr:3; 99 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 100 int chlen; 101 int flags; 102 #define _ISO2022STATE_FLAG_INITIALIZED 1 103 } _ISO2022State; 104 105 typedef struct { 106 _ISO2022Charset *recommend[4]; 107 size_t recommendsize[4]; 108 _ISO2022Charset initg[4]; 109 int maxcharset; 110 int flags; 111 #define F_8BIT 0x0001 112 #define F_NOOLD 0x0002 113 #define F_SI 0x0010 /*0F*/ 114 #define F_SO 0x0020 /*0E*/ 115 #define F_LS0 0x0010 /*0F*/ 116 #define F_LS1 0x0020 /*0E*/ 117 #define F_LS2 0x0040 /*ESC n*/ 118 #define F_LS3 0x0080 /*ESC o*/ 119 #define F_LS1R 0x0100 /*ESC ~*/ 120 #define F_LS2R 0x0200 /*ESC }*/ 121 #define F_LS3R 0x0400 /*ESC |*/ 122 #define F_SS2 0x0800 /*ESC N*/ 123 #define F_SS3 0x1000 /*ESC O*/ 124 #define F_SS2R 0x2000 /*8E*/ 125 #define F_SS3R 0x4000 /*8F*/ 126 } _ISO2022EncodingInfo; 127 typedef struct { 128 _ISO2022EncodingInfo ei; 129 struct { 130 /* for future multi-locale facility */ 131 _ISO2022State s_mblen; 132 _ISO2022State s_mbrlen; 133 _ISO2022State s_mbrtowc; 134 _ISO2022State s_mbtowc; 135 _ISO2022State s_mbsrtowcs; 136 _ISO2022State s_wcrtomb; 137 _ISO2022State s_wcsrtombs; 138 _ISO2022State s_wctomb; 139 } states; 140 } _ISO2022CTypeInfo; 141 142 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 143 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 144 145 #define _FUNCNAME(m) _citrus_ISO2022_##m 146 #define _ENCODING_INFO _ISO2022EncodingInfo 147 #define _CTYPE_INFO _ISO2022CTypeInfo 148 #define _ENCODING_STATE _ISO2022State 149 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 150 #define _ENCODING_IS_STATE_DEPENDENT 1 151 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 152 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 153 154 155 #define _ISO2022INVALID (wchar_t)-1 156 157 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 158 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 159 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 160 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 161 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 162 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 163 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 164 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 165 166 static __inline int 167 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 168 { 169 170 _DIAGASSERT(p != NULL); 171 _DIAGASSERT(cs != NULL); 172 173 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 174 cs->final = (u_char)(p[3] & 0xff); 175 cs->interm = '\0'; 176 cs->vers = '\0'; 177 cs->type = CS94MULTI; 178 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 179 cs->final = (u_char)(p[3] & 0xff); 180 cs->interm = '\0'; 181 cs->vers = '\0'; 182 cs->type = CS96MULTI; 183 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 184 cs->final = (u_char)(p[2] & 0xff); 185 cs->interm = '\0'; 186 cs->vers = '\0'; 187 cs->type = CS94; 188 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 189 cs->final = (u_char )(p[2] & 0xff); 190 cs->interm = '\0'; 191 cs->vers = '\0'; 192 cs->type = CS96; 193 } else { 194 return 1; 195 } 196 197 return 0; 198 } 199 200 201 #define _NOTMATCH 0 202 #define _MATCH 1 203 #define _PARSEFAIL 2 204 205 static __inline int 206 get_recommend(_ISO2022EncodingInfo * __restrict ei, 207 const char * __restrict token) 208 { 209 int i; 210 _ISO2022Charset cs, *p; 211 212 if (!strchr("0123", token[0]) || token[1] != '=') 213 return (_NOTMATCH); 214 215 if (getcs(&token[2], &cs) == 0) 216 ; 217 else if (!strcmp(&token[2], "94")) { 218 cs.final = (u_char)(token[4]); 219 cs.interm = '\0'; 220 cs.vers = '\0'; 221 cs.type = CS94; 222 } else if (!strcmp(&token[2], "96")) { 223 cs.final = (u_char)(token[4]); 224 cs.interm = '\0'; 225 cs.vers = '\0'; 226 cs.type = CS96; 227 } else if (!strcmp(&token[2], "94$")) { 228 cs.final = (u_char)(token[5]); 229 cs.interm = '\0'; 230 cs.vers = '\0'; 231 cs.type = CS94MULTI; 232 } else if (!strcmp(&token[2], "96$")) { 233 cs.final = (u_char)(token[5]); 234 cs.interm = '\0'; 235 cs.vers = '\0'; 236 cs.type = CS96MULTI; 237 } else { 238 return (_PARSEFAIL); 239 } 240 241 i = token[0] - '0'; 242 if (!ei->recommend[i]) { 243 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 244 } else { 245 p = realloc(ei->recommend[i], 246 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 247 if (!p) 248 return (_PARSEFAIL); 249 ei->recommend[i] = p; 250 } 251 if (!ei->recommend[i]) 252 return (_PARSEFAIL); 253 ei->recommendsize[i]++; 254 255 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 259 260 return (_MATCH); 261 } 262 263 static __inline int 264 get_initg(_ISO2022EncodingInfo * __restrict ei, 265 const char * __restrict token) 266 { 267 _ISO2022Charset cs; 268 269 if (strncmp("INIT", &token[0], 4) || 270 !strchr("0123", token[4]) || 271 token[5] != '=') 272 return (_NOTMATCH); 273 274 if (getcs(&token[6], &cs) != 0) 275 return (_PARSEFAIL); 276 277 ei->initg[token[4] - '0'].type = cs.type; 278 ei->initg[token[4] - '0'].final = cs.final; 279 ei->initg[token[4] - '0'].interm = cs.interm; 280 ei->initg[token[4] - '0'].vers = cs.vers; 281 282 return (_MATCH); 283 } 284 285 static __inline int 286 get_max(_ISO2022EncodingInfo * __restrict ei, 287 const char * __restrict token) 288 { 289 if (!strcmp(token, "MAX1")) { 290 ei->maxcharset = 1; 291 } else if (!strcmp(token, "MAX2")) { 292 ei->maxcharset = 2; 293 } else if (!strcmp(token, "MAX3")) { 294 ei->maxcharset = 3; 295 } else 296 return (_NOTMATCH); 297 298 return (_MATCH); 299 } 300 301 302 static __inline int 303 get_flags(_ISO2022EncodingInfo * __restrict ei, 304 const char * __restrict token) 305 { 306 int i; 307 static struct { 308 const char *tag; 309 int flag; 310 } const tags[] = { 311 { "DUMMY", 0 }, 312 { "8BIT", F_8BIT }, 313 { "NOOLD", F_NOOLD }, 314 { "SI", F_SI }, 315 { "SO", F_SO }, 316 { "LS0", F_LS0 }, 317 { "LS1", F_LS1 }, 318 { "LS2", F_LS2 }, 319 { "LS3", F_LS3 }, 320 { "LS1R", F_LS1R }, 321 { "LS2R", F_LS2R }, 322 { "LS3R", F_LS3R }, 323 { "SS2", F_SS2 }, 324 { "SS3", F_SS3 }, 325 { "SS2R", F_SS2R }, 326 { "SS3R", F_SS3R }, 327 { NULL, 0 } 328 }; 329 330 for (i = 0; tags[i].tag; i++) { 331 if (!strcmp(token, tags[i].tag)) { 332 ei->flags |= tags[i].flag; 333 return (_MATCH); 334 } 335 } 336 337 return (_NOTMATCH); 338 } 339 340 341 static __inline int 342 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 343 const void * __restrict var, size_t lenvar) 344 { 345 char const *v, *e; 346 char buf[20]; 347 int i, len, ret; 348 349 _DIAGASSERT(ei != NULL); 350 351 352 /* 353 * parse VARIABLE section. 354 */ 355 356 if (!var) 357 return (EFTYPE); 358 359 v = (const char *) var; 360 361 /* initialize structure */ 362 ei->maxcharset = 0; 363 for (i = 0; i < 4; i++) { 364 ei->recommend[i] = NULL; 365 ei->recommendsize[i] = 0; 366 } 367 ei->flags = 0; 368 369 while (*v) { 370 while (*v == ' ' || *v == '\t') 371 ++v; 372 373 /* find the token */ 374 e = v; 375 while (*e && *e != ' ' && *e != '\t') 376 ++e; 377 378 len = e-v; 379 if (len == 0) 380 break; 381 if (len>=sizeof(buf)) 382 goto parsefail; 383 snprintf(buf, sizeof(buf), "%.*s", len, v); 384 385 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 386 ; 387 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 388 ; 389 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 390 ; 391 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 392 ; 393 else 394 ret = _PARSEFAIL; 395 if (ret==_PARSEFAIL) 396 goto parsefail; 397 v = e; 398 399 } 400 401 return (0); 402 403 parsefail: 404 free(ei->recommend[0]); 405 free(ei->recommend[1]); 406 free(ei->recommend[2]); 407 free(ei->recommend[3]); 408 409 return (EFTYPE); 410 } 411 412 static __inline void 413 /*ARGSUSED*/ 414 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 415 _ISO2022State * __restrict s) 416 { 417 int i; 418 419 memset(s, 0, sizeof(*s)); 420 s->gl = 0; 421 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 422 423 for (i = 0; i < 4; i++) { 424 if (ei->initg[i].final) { 425 s->g[i].type = ei->initg[i].type; 426 s->g[i].final = ei->initg[i].final; 427 s->g[i].interm = ei->initg[i].interm; 428 } 429 } 430 s->singlegl = s->singlegr = -1; 431 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 432 } 433 434 static __inline void 435 /*ARGSUSED*/ 436 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 437 void * __restrict pspriv, 438 const _ISO2022State * __restrict s) 439 { 440 memcpy(pspriv, (const void *)s, sizeof(*s)); 441 } 442 443 static __inline void 444 /*ARGSUSED*/ 445 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 446 _ISO2022State * __restrict s, 447 const void * __restrict pspriv) 448 { 449 memcpy((void *)s, pspriv, sizeof(*s)); 450 } 451 452 static int 453 /*ARGSUSED*/ 454 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 455 const void * __restrict var, 456 size_t lenvar) 457 { 458 459 _DIAGASSERT(ei != NULL); 460 461 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 462 } 463 464 static void 465 /*ARGSUSED*/ 466 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 467 { 468 } 469 470 #define ESC '\033' 471 #define ECMA -1 472 #define INTERM -2 473 #define OECMA -3 474 static const struct seqtable { 475 int type; 476 int csoff; 477 int finaloff; 478 int intermoff; 479 int versoff; 480 int len; 481 int chars[10]; 482 } seqtable[] = { 483 /* G0 94MULTI special */ 484 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 485 /* G0 94MULTI special with version identification */ 486 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 487 /* G? 94 */ 488 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 489 /* G? 94 with 2nd intermediate char */ 490 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 491 /* G? 96 */ 492 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 493 /* G? 96 with 2nd intermediate char */ 494 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 495 /* G? 94MULTI */ 496 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 497 /* G? 96MULTI */ 498 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 499 /* G? 94MULTI with version specification */ 500 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 501 /* LS2/3 */ 502 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 503 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 504 /* LS1/2/3R */ 505 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 506 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 507 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 508 /* SS2/3 */ 509 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 510 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 511 /* end of records */ 512 { 0, } 513 }; 514 515 static int 516 seqmatch(const char * __restrict s, size_t n, 517 const struct seqtable * __restrict sp) 518 { 519 const int *p; 520 521 _DIAGASSERT(s != NULL); 522 _DIAGASSERT(sp != NULL); 523 524 p = sp->chars; 525 while (p - sp->chars < n && p - sp->chars < sp->len) { 526 switch (*p) { 527 case ECMA: 528 if (!isecma(*s)) 529 goto terminate; 530 break; 531 case OECMA: 532 if (*s && strchr("@AB", *s)) 533 break; 534 else 535 goto terminate; 536 case INTERM: 537 if (!isinterm(*s)) 538 goto terminate; 539 break; 540 case CS94: 541 if (*s && strchr("()*+", *s)) 542 break; 543 else 544 goto terminate; 545 case CS96: 546 if (*s && strchr(",-./", *s)) 547 break; 548 else 549 goto terminate; 550 default: 551 if (*s != *p) 552 goto terminate; 553 break; 554 } 555 556 p++; 557 s++; 558 } 559 560 terminate: 561 return p - sp->chars; 562 } 563 564 static wchar_t 565 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 566 const char * __restrict string, size_t n, 567 const char ** __restrict result, 568 _ISO2022State * __restrict psenc) 569 { 570 wchar_t wchar = 0; 571 int cur; 572 const struct seqtable *sp; 573 int nmatch; 574 int i; 575 576 _DIAGASSERT(ei != NULL); 577 _DIAGASSERT(psenc != NULL); 578 _DIAGASSERT(string != NULL); 579 /* result may be NULL */ 580 581 while (1) { 582 /* SI/SO */ 583 if (1 <= n && string[0] == '\017') { 584 psenc->gl = 0; 585 string++; 586 n--; 587 continue; 588 } 589 if (1 <= n && string[0] == '\016') { 590 psenc->gl = 1; 591 string++; 592 n--; 593 continue; 594 } 595 596 /* SS2/3R */ 597 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 598 psenc->singlegl = psenc->singlegr = 599 (string[0] - '\216') + 2; 600 string++; 601 n--; 602 continue; 603 } 604 605 /* eat the letter if this is not ESC */ 606 if (1 <= n && string[0] != '\033') 607 break; 608 609 /* look for a perfect match from escape sequences */ 610 for (sp = &seqtable[0]; sp->len; sp++) { 611 nmatch = seqmatch(string, n, sp); 612 if (sp->len == nmatch && n >= sp->len) 613 break; 614 } 615 616 if (!sp->len) 617 goto notseq; 618 619 if (sp->type != -1) { 620 if (sp->csoff == -1) 621 i = 0; 622 else { 623 switch (sp->type) { 624 case CS94: 625 case CS94MULTI: 626 i = string[sp->csoff] - '('; 627 break; 628 case CS96: 629 case CS96MULTI: 630 i = string[sp->csoff] - ','; 631 break; 632 default: 633 return (_ISO2022INVALID); 634 } 635 } 636 psenc->g[i].type = sp->type; 637 psenc->g[i].final = '\0'; 638 psenc->g[i].interm = '\0'; 639 psenc->g[i].vers = '\0'; 640 /* sp->finaloff must not be -1 */ 641 if (sp->finaloff != -1) 642 psenc->g[i].final = string[sp->finaloff]; 643 if (sp->intermoff != -1) 644 psenc->g[i].interm = string[sp->intermoff]; 645 if (sp->versoff != -1) 646 psenc->g[i].vers = string[sp->versoff]; 647 648 string += sp->len; 649 n -= sp->len; 650 continue; 651 } 652 653 /* LS2/3 */ 654 if (2 <= n && string[0] == '\033' 655 && string[1] && strchr("no", string[1])) { 656 psenc->gl = string[1] - 'n' + 2; 657 string += 2; 658 n -= 2; 659 continue; 660 } 661 662 /* LS1/2/3R */ 663 /* XXX: { for vi showmatch */ 664 if (2 <= n && string[0] == '\033' 665 && string[1] && strchr("~}|", string[1])) { 666 psenc->gr = 3 - (string[1] - '|'); 667 string += 2; 668 n -= 2; 669 continue; 670 } 671 672 /* SS2/3 */ 673 if (2 <= n && string[0] == '\033' 674 && string[1] && strchr("NO", string[1])) { 675 psenc->singlegl = (string[1] - 'N') + 2; 676 string += 2; 677 n -= 2; 678 continue; 679 } 680 681 notseq: 682 /* 683 * if we've got an unknown escape sequence, eat the ESC at the 684 * head. otherwise, wait till full escape sequence comes. 685 */ 686 for (sp = &seqtable[0]; sp->len; sp++) { 687 nmatch = seqmatch(string, n, sp); 688 if (!nmatch) 689 continue; 690 691 /* 692 * if we are in the middle of escape sequence, 693 * we still need to wait for more characters to come 694 */ 695 if (n < sp->len) { 696 if (nmatch == n) { 697 if (result) 698 *result = string; 699 return (_ISO2022INVALID); 700 } 701 } else { 702 if (nmatch == sp->len) { 703 /* this case should not happen */ 704 goto eat; 705 } 706 } 707 } 708 709 break; 710 } 711 712 eat: 713 /* no letter to eat */ 714 if (n < 1) { 715 if (result) 716 *result = string; 717 return (_ISO2022INVALID); 718 } 719 720 /* normal chars. always eat C0/C1 as is. */ 721 if (iscntl(*string & 0xff)) 722 cur = -1; 723 else if (*string & 0x80) { 724 cur = (psenc->singlegr == -1) 725 ? psenc->gr : psenc->singlegr; 726 } else { 727 cur = (psenc->singlegl == -1) 728 ? psenc->gl : psenc->singlegl; 729 } 730 731 if (cur == -1) { 732 asis: 733 wchar = *string++ & 0xff; 734 if (result) 735 *result = string; 736 /* reset single shift state */ 737 psenc->singlegr = psenc->singlegl = -1; 738 return wchar; 739 } 740 741 /* length error check */ 742 switch (psenc->g[cur].type) { 743 case CS94MULTI: 744 case CS96MULTI: 745 if (!isthree(psenc->g[cur].final)) { 746 if (2 <= n 747 && (string[0] & 0x80) == (string[1] & 0x80)) 748 break; 749 } else { 750 if (3 <= n 751 && (string[0] & 0x80) == (string[1] & 0x80) 752 && (string[0] & 0x80) == (string[2] & 0x80)) 753 break; 754 } 755 756 /* we still need to wait for more characters to come */ 757 if (result) 758 *result = string; 759 return (_ISO2022INVALID); 760 761 case CS94: 762 case CS96: 763 if (1 <= n) 764 break; 765 766 /* we still need to wait for more characters to come */ 767 if (result) 768 *result = string; 769 return (_ISO2022INVALID); 770 } 771 772 /* range check */ 773 switch (psenc->g[cur].type) { 774 case CS94: 775 if (!(is94(string[0] & 0x7f))) 776 goto asis; 777 case CS96: 778 if (!(is96(string[0] & 0x7f))) 779 goto asis; 780 break; 781 case CS94MULTI: 782 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 783 goto asis; 784 break; 785 case CS96MULTI: 786 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 787 goto asis; 788 break; 789 } 790 791 /* extract the character. */ 792 switch (psenc->g[cur].type) { 793 case CS94: 794 /* special case for ASCII. */ 795 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 796 wchar = *string++; 797 wchar &= 0x7f; 798 break; 799 } 800 wchar = psenc->g[cur].final; 801 wchar = (wchar << 8); 802 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 803 wchar = (wchar << 8); 804 wchar = (wchar << 8) | (*string++ & 0x7f); 805 break; 806 case CS96: 807 /* special case for ISO-8859-1. */ 808 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 809 wchar = *string++; 810 wchar &= 0x7f; 811 wchar |= 0x80; 812 break; 813 } 814 wchar = psenc->g[cur].final; 815 wchar = (wchar << 8); 816 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 817 wchar = (wchar << 8); 818 wchar = (wchar << 8) | (*string++ & 0x7f); 819 wchar |= 0x80; 820 break; 821 case CS94MULTI: 822 case CS96MULTI: 823 wchar = psenc->g[cur].final; 824 wchar = (wchar << 8); 825 if (isthree(psenc->g[cur].final)) 826 wchar |= (*string++ & 0x7f); 827 wchar = (wchar << 8) | (*string++ & 0x7f); 828 wchar = (wchar << 8) | (*string++ & 0x7f); 829 if (psenc->g[cur].type == CS96MULTI) 830 wchar |= 0x80; 831 break; 832 } 833 834 if (result) 835 *result = string; 836 /* reset single shift state */ 837 psenc->singlegr = psenc->singlegl = -1; 838 return wchar; 839 } 840 841 842 843 static int 844 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 845 wchar_t * __restrict pwc, 846 const char ** __restrict s, 847 size_t n, _ISO2022State * __restrict psenc, 848 size_t * __restrict nresult) 849 { 850 wchar_t wchar; 851 const char *s0, *p, *result; 852 int c; 853 int chlenbak; 854 855 _DIAGASSERT(nresult != 0); 856 _DIAGASSERT(ei != NULL); 857 _DIAGASSERT(psenc != NULL); 858 _DIAGASSERT(s != NULL); 859 860 if (*s == NULL) { 861 _citrus_ISO2022_init_state(ei, psenc); 862 *nresult = _ENCODING_IS_STATE_DEPENDENT; 863 return 0; 864 } 865 s0 = *s; 866 c = 0; 867 chlenbak = psenc->chlen; 868 869 /* 870 * if we have something in buffer, use that. 871 * otherwise, skip here 872 */ 873 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 874 /* illgeal state */ 875 _citrus_ISO2022_init_state(ei, psenc); 876 goto encoding_error; 877 } 878 if (psenc->chlen == 0) 879 goto emptybuf; 880 881 /* buffer is not empty */ 882 p = psenc->ch; 883 while (psenc->chlen < sizeof(psenc->ch)) { 884 if (n > 0) { 885 psenc->ch[psenc->chlen++] = *s0++; 886 n--; 887 } 888 889 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 890 &result, psenc); 891 c += result - p; 892 if (wchar != _ISO2022INVALID) { 893 if (psenc->chlen > c) 894 memmove(psenc->ch, result, psenc->chlen - c); 895 if (psenc->chlen < c) 896 psenc->chlen = 0; 897 else 898 psenc->chlen -= c; 899 goto output; 900 } 901 902 if (n == 0) { 903 if ((result - p) == psenc->chlen) 904 /* complete shift sequence. */ 905 psenc->chlen = 0; 906 goto restart; 907 } 908 909 p = result; 910 } 911 912 /* escape sequence too long? */ 913 goto encoding_error; 914 915 emptybuf: 916 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 917 if (wchar != _ISO2022INVALID) { 918 c += result - s0; 919 psenc->chlen = 0; 920 s0 = result; 921 goto output; 922 } 923 if (result > s0) { 924 c += (result - s0); 925 n -= (result - s0); 926 s0 = result; 927 if (n>0) 928 goto emptybuf; 929 /* complete shift sequence. */ 930 goto restart; 931 } 932 n += c; 933 if (n < sizeof(psenc->ch)) { 934 memcpy(psenc->ch, s0 - c, n); 935 psenc->chlen = n; 936 s0 = result; 937 goto restart; 938 } 939 940 /* escape sequence too long? */ 941 942 encoding_error: 943 psenc->chlen = 0; 944 *nresult = (size_t)-1; 945 return (EILSEQ); 946 947 output: 948 *s = s0; 949 if (pwc) 950 *pwc = wchar; 951 952 if (!wchar) 953 *nresult = 0; 954 else 955 *nresult = c - chlenbak; 956 957 return (0); 958 959 restart: 960 *s = s0; 961 *nresult = (size_t)-2; 962 963 return (0); 964 } 965 966 static int 967 recommendation(_ISO2022EncodingInfo * __restrict ei, 968 _ISO2022Charset * __restrict cs) 969 { 970 int i, j; 971 _ISO2022Charset *recommend; 972 973 _DIAGASSERT(ei != NULL); 974 _DIAGASSERT(cs != NULL); 975 976 /* first, try a exact match. */ 977 for (i = 0; i < 4; i++) { 978 recommend = ei->recommend[i]; 979 for (j = 0; j < ei->recommendsize[i]; j++) { 980 if (cs->type != recommend[j].type) 981 continue; 982 if (cs->final != recommend[j].final) 983 continue; 984 if (cs->interm != recommend[j].interm) 985 continue; 986 987 return i; 988 } 989 } 990 991 /* then, try a wildcard match over final char. */ 992 for (i = 0; i < 4; i++) { 993 recommend = ei->recommend[i]; 994 for (j = 0; j < ei->recommendsize[i]; j++) { 995 if (cs->type != recommend[j].type) 996 continue; 997 if (cs->final && (cs->final != recommend[j].final)) 998 continue; 999 if (cs->interm && (cs->interm != recommend[j].interm)) 1000 continue; 1001 1002 return i; 1003 } 1004 } 1005 1006 /* there's no recommendation. make a guess. */ 1007 if (ei->maxcharset == 0) { 1008 return 0; 1009 } else { 1010 switch (cs->type) { 1011 case CS94: 1012 case CS94MULTI: 1013 return 0; 1014 case CS96: 1015 case CS96MULTI: 1016 return 1; 1017 } 1018 } 1019 return 0; 1020 } 1021 1022 static int 1023 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1024 char * __restrict string, size_t n, 1025 char ** __restrict result, 1026 _ISO2022State * __restrict psenc, 1027 size_t * __restrict nresult) 1028 { 1029 int i = 0; 1030 size_t len; 1031 _ISO2022Charset cs; 1032 char *p; 1033 char tmp[MB_LEN_MAX]; 1034 int target; 1035 u_char mask; 1036 int bit8; 1037 1038 _DIAGASSERT(ei != NULL); 1039 _DIAGASSERT(string != NULL); 1040 /* result may be NULL */ 1041 _DIAGASSERT(psenc != NULL); 1042 _DIAGASSERT(nresult != NULL); 1043 1044 if (isc0(wc & 0xff)) { 1045 /* go back to INIT0 or ASCII on control chars */ 1046 cs = ei->initg[0].final ? ei->initg[0] : ascii; 1047 } else if (isc1(wc & 0xff)) { 1048 /* go back to INIT1 or ISO-8859-1 on control chars */ 1049 cs = ei->initg[1].final ? ei->initg[1] : iso88591; 1050 } else if (!(wc & ~0xff)) { 1051 if (wc & 0x80) { 1052 /* special treatment for ISO-8859-1 */ 1053 cs = iso88591; 1054 } else { 1055 /* special treatment for ASCII */ 1056 cs = ascii; 1057 } 1058 } else { 1059 cs.final = (wc >> 24) & 0x7f; 1060 if ((wc >> 16) & 0x80) 1061 cs.interm = (wc >> 16) & 0x7f; 1062 else 1063 cs.interm = '\0'; 1064 if (wc & 0x80) 1065 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1066 else 1067 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1068 } 1069 target = recommendation(ei, &cs); 1070 p = tmp; 1071 bit8 = ei->flags & F_8BIT; 1072 1073 /* designate the charset onto the target plane(G0/1/2/3). */ 1074 if (psenc->g[target].type == cs.type 1075 && psenc->g[target].final == cs.final 1076 && psenc->g[target].interm == cs.interm) 1077 goto planeok; 1078 1079 *p++ = '\033'; 1080 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1081 *p++ = '$'; 1082 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1083 && !cs.interm && !(ei->flags & F_NOOLD)) 1084 ; 1085 else if (cs.type == CS94 || cs.type == CS94MULTI) 1086 *p++ = "()*+"[target]; 1087 else 1088 *p++ = ",-./"[target]; 1089 if (cs.interm) 1090 *p++ = cs.interm; 1091 *p++ = cs.final; 1092 1093 psenc->g[target].type = cs.type; 1094 psenc->g[target].final = cs.final; 1095 psenc->g[target].interm = cs.interm; 1096 1097 planeok: 1098 /* invoke the plane onto GL or GR. */ 1099 if (psenc->gl == target) 1100 goto sideok; 1101 if (bit8 && psenc->gr == target) 1102 goto sideok; 1103 1104 if (target == 0 && (ei->flags & F_LS0)) { 1105 *p++ = '\017'; 1106 psenc->gl = 0; 1107 } else if (target == 1 && (ei->flags & F_LS1)) { 1108 *p++ = '\016'; 1109 psenc->gl = 1; 1110 } else if (target == 2 && (ei->flags & F_LS2)) { 1111 *p++ = '\033'; 1112 *p++ = 'n'; 1113 psenc->gl = 2; 1114 } else if (target == 3 && (ei->flags & F_LS3)) { 1115 *p++ = '\033'; 1116 *p++ = 'o'; 1117 psenc->gl = 3; 1118 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1119 *p++ = '\033'; 1120 *p++ = '~'; 1121 psenc->gr = 1; 1122 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1123 *p++ = '\033'; 1124 /*{*/ 1125 *p++ = '}'; 1126 psenc->gr = 2; 1127 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1128 *p++ = '\033'; 1129 *p++ = '|'; 1130 psenc->gr = 3; 1131 } else if (target == 2 && (ei->flags & F_SS2)) { 1132 *p++ = '\033'; 1133 *p++ = 'N'; 1134 psenc->singlegl = 2; 1135 } else if (target == 3 && (ei->flags & F_SS3)) { 1136 *p++ = '\033'; 1137 *p++ = 'O'; 1138 psenc->singlegl = 3; 1139 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1140 *p++ = '\216'; 1141 *p++ = 'N'; 1142 psenc->singlegl = psenc->singlegr = 2; 1143 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1144 *p++ = '\217'; 1145 *p++ = 'O'; 1146 psenc->singlegl = psenc->singlegr = 3; 1147 } else 1148 goto ilseq; 1149 1150 sideok: 1151 if (psenc->singlegl == target) 1152 mask = 0x00; 1153 else if (psenc->singlegr == target) 1154 mask = 0x80; 1155 else if (psenc->gl == target) 1156 mask = 0x00; 1157 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1158 mask = 0x80; 1159 else 1160 goto ilseq; 1161 1162 switch (cs.type) { 1163 case CS94: 1164 case CS96: 1165 i = 1; 1166 break; 1167 case CS94MULTI: 1168 case CS96MULTI: 1169 i = !iscntl(wc & 0xff) ? 1170 (isthree(cs.final) ? 3 : 2) : 1; 1171 break; 1172 } 1173 while (i-- > 0) 1174 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1175 1176 /* reset single shift state */ 1177 psenc->singlegl = psenc->singlegr = -1; 1178 1179 len = (size_t)(p - tmp); 1180 if (n < len) { 1181 if (result) 1182 *result = (char *)0; 1183 *nresult = (size_t)-1; 1184 return E2BIG; 1185 } 1186 if (result) 1187 *result = string + len; 1188 memcpy(string, tmp, len); 1189 *nresult = len; 1190 1191 return 0; 1192 1193 ilseq: 1194 *nresult = (size_t)-1; 1195 return EILSEQ; 1196 } 1197 1198 static int 1199 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1200 char * __restrict s, size_t n, 1201 _ISO2022State * __restrict psenc, 1202 size_t * __restrict nresult) 1203 { 1204 char buf[MB_LEN_MAX]; 1205 char *result; 1206 int ret; 1207 size_t len; 1208 1209 _DIAGASSERT(ei != NULL); 1210 _DIAGASSERT(nresult != 0); 1211 _DIAGASSERT(s != NULL); 1212 1213 /* XXX state will be modified after this operation... */ 1214 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc, 1215 &len); 1216 if (ret) { 1217 *nresult = len; 1218 return ret; 1219 } 1220 1221 if (sizeof(buf) < len || n < len-1) { 1222 /* XXX should recover state? */ 1223 *nresult = (size_t)-1; 1224 return E2BIG; 1225 } 1226 1227 memcpy(s, buf, len-1); 1228 *nresult = len-1; 1229 return (0); 1230 } 1231 1232 static int 1233 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1234 char * __restrict s, size_t n, wchar_t wc, 1235 _ISO2022State * __restrict psenc, 1236 size_t * __restrict nresult) 1237 { 1238 char buf[MB_LEN_MAX]; 1239 char *result; 1240 int ret; 1241 size_t len; 1242 1243 _DIAGASSERT(ei != NULL); 1244 _DIAGASSERT(s != NULL); 1245 _DIAGASSERT(psenc != NULL); 1246 _DIAGASSERT(nresult != 0); 1247 1248 /* XXX state will be modified after this operation... */ 1249 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc, 1250 &len); 1251 if (ret) { 1252 *nresult = len; 1253 return ret; 1254 } 1255 1256 if (sizeof(buf) < len || n < len) { 1257 /* XXX should recover state? */ 1258 *nresult = (size_t)-1; 1259 return E2BIG; 1260 } 1261 1262 memcpy(s, buf, len); 1263 *nresult = len; 1264 return (0); 1265 } 1266 1267 static __inline int 1268 /*ARGSUSED*/ 1269 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1270 _csid_t * __restrict csid, 1271 _index_t * __restrict idx, wchar_t wc) 1272 { 1273 wchar_t m, nm; 1274 1275 _DIAGASSERT(csid != NULL && idx != NULL); 1276 1277 m = wc & 0x7FFF8080; 1278 nm = wc & 0x007F7F7F; 1279 if (m & 0x00800000) { 1280 nm &= 0x00007F7F; 1281 } else { 1282 m &= 0x7F008080; 1283 } 1284 if (nm & 0x007F0000) { 1285 /* ^3 mark */ 1286 m |= 0x007F0000; 1287 } else if (nm & 0x00007F00) { 1288 /* ^2 mark */ 1289 m |= 0x00007F00; 1290 } 1291 *csid = (_csid_t)m; 1292 *idx = (_index_t)nm; 1293 1294 return (0); 1295 } 1296 1297 static __inline int 1298 /*ARGSUSED*/ 1299 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1300 wchar_t * __restrict wc, 1301 _csid_t csid, _index_t idx) 1302 { 1303 1304 _DIAGASSERT(ei != NULL && wc != NULL); 1305 1306 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1307 1308 return (0); 1309 } 1310 1311 static __inline int 1312 /*ARGSUSED*/ 1313 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei, 1314 _ISO2022State * __restrict psenc, 1315 int * __restrict rstate) 1316 { 1317 1318 if (psenc->chlen == 0) { 1319 /* XXX: it should distinguish initial and stable. */ 1320 *rstate = _STDENC_SDGEN_STABLE; 1321 } else { 1322 if (psenc->ch[0] == '\033') 1323 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 1324 else 1325 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 1326 } 1327 1328 return 0; 1329 } 1330 1331 /* ---------------------------------------------------------------------- 1332 * public interface for ctype 1333 */ 1334 1335 _CITRUS_CTYPE_DECLS(ISO2022); 1336 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1337 1338 #include "citrus_ctype_template.h" 1339 1340 /* ---------------------------------------------------------------------- 1341 * public interface for stdenc 1342 */ 1343 1344 _CITRUS_STDENC_DECLS(ISO2022); 1345 _CITRUS_STDENC_DEF_OPS(ISO2022); 1346 1347 #include "citrus_stdenc_template.h" 1348