1 /* $NetBSD: citrus_iso2022.c,v 1.14 2005/10/29 18:02:04 tshiozak Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.14 2005/10/29 18:02:04 tshiozak Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 47 #include "citrus_namespace.h" 48 #include "citrus_types.h" 49 #include "citrus_module.h" 50 #include "citrus_ctype.h" 51 #include "citrus_stdenc.h" 52 #include "citrus_iso2022.h" 53 54 55 /* ---------------------------------------------------------------------- 56 * private stuffs used by templates 57 */ 58 59 60 /* 61 * wchar_t mappings: 62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 70 * 94x94 charset (ESC & V ESC $ ( F) 71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 74 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 75 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 76 */ 77 78 typedef struct { 79 u_char type; 80 #define CS94 (0U) 81 #define CS96 (1U) 82 #define CS94MULTI (2U) 83 #define CS96MULTI (3U) 84 85 u_char final; 86 u_char interm; 87 u_char vers; 88 } _ISO2022Charset; 89 90 typedef struct { 91 _ISO2022Charset g[4]; 92 /* need 3 bits to hold -1, 0, ..., 3 */ 93 int gl:3, 94 gr:3, 95 singlegl:3, 96 singlegr:3; 97 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 98 int chlen; 99 int flags; 100 #define _ISO2022STATE_FLAG_INITIALIZED 1 101 } _ISO2022State; 102 103 typedef struct { 104 _ISO2022Charset *recommend[4]; 105 size_t recommendsize[4]; 106 _ISO2022Charset initg[4]; 107 int maxcharset; 108 int flags; 109 #define F_8BIT 0x0001 110 #define F_NOOLD 0x0002 111 #define F_SI 0x0010 /*0F*/ 112 #define F_SO 0x0020 /*0E*/ 113 #define F_LS0 0x0010 /*0F*/ 114 #define F_LS1 0x0020 /*0E*/ 115 #define F_LS2 0x0040 /*ESC n*/ 116 #define F_LS3 0x0080 /*ESC o*/ 117 #define F_LS1R 0x0100 /*ESC ~*/ 118 #define F_LS2R 0x0200 /*ESC }*/ 119 #define F_LS3R 0x0400 /*ESC |*/ 120 #define F_SS2 0x0800 /*ESC N*/ 121 #define F_SS3 0x1000 /*ESC O*/ 122 #define F_SS2R 0x2000 /*8E*/ 123 #define F_SS3R 0x4000 /*8F*/ 124 } _ISO2022EncodingInfo; 125 typedef struct { 126 _ISO2022EncodingInfo ei; 127 struct { 128 /* for future multi-locale facility */ 129 _ISO2022State s_mblen; 130 _ISO2022State s_mbrlen; 131 _ISO2022State s_mbrtowc; 132 _ISO2022State s_mbtowc; 133 _ISO2022State s_mbsrtowcs; 134 _ISO2022State s_wcrtomb; 135 _ISO2022State s_wcsrtombs; 136 _ISO2022State s_wctomb; 137 } states; 138 } _ISO2022CTypeInfo; 139 140 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 141 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 142 143 #define _FUNCNAME(m) _citrus_ISO2022_##m 144 #define _ENCODING_INFO _ISO2022EncodingInfo 145 #define _CTYPE_INFO _ISO2022CTypeInfo 146 #define _ENCODING_STATE _ISO2022State 147 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 148 #define _ENCODING_IS_STATE_DEPENDENT 1 149 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 150 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 151 152 153 #define _ISO2022INVALID (wchar_t)-1 154 155 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 156 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 157 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 158 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 159 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 160 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 161 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 162 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 163 164 static __inline int 165 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 166 { 167 168 _DIAGASSERT(p != NULL); 169 _DIAGASSERT(cs != NULL); 170 171 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 172 cs->final = (u_char)(p[3] & 0xff); 173 cs->interm = '\0'; 174 cs->vers = '\0'; 175 cs->type = CS94MULTI; 176 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 177 cs->final = (u_char)(p[3] & 0xff); 178 cs->interm = '\0'; 179 cs->vers = '\0'; 180 cs->type = CS96MULTI; 181 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 182 cs->final = (u_char)(p[2] & 0xff); 183 cs->interm = '\0'; 184 cs->vers = '\0'; 185 cs->type = CS94; 186 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 187 cs->final = (u_char )(p[2] & 0xff); 188 cs->interm = '\0'; 189 cs->vers = '\0'; 190 cs->type = CS96; 191 } else { 192 return 1; 193 } 194 195 return 0; 196 } 197 198 199 #define _NOTMATCH 0 200 #define _MATCH 1 201 #define _PARSEFAIL 2 202 203 static __inline int 204 get_recommend(_ISO2022EncodingInfo * __restrict ei, 205 const char * __restrict token) 206 { 207 int i; 208 _ISO2022Charset cs, *p; 209 210 if (!strchr("0123", token[0]) || token[1] != '=') 211 return (_NOTMATCH); 212 213 if (getcs(&token[2], &cs) == 0) 214 ; 215 else if (!strcmp(&token[2], "94")) { 216 cs.final = (u_char)(token[4]); 217 cs.interm = '\0'; 218 cs.vers = '\0'; 219 cs.type = CS94; 220 } else if (!strcmp(&token[2], "96")) { 221 cs.final = (u_char)(token[4]); 222 cs.interm = '\0'; 223 cs.vers = '\0'; 224 cs.type = CS96; 225 } else if (!strcmp(&token[2], "94$")) { 226 cs.final = (u_char)(token[5]); 227 cs.interm = '\0'; 228 cs.vers = '\0'; 229 cs.type = CS94MULTI; 230 } else if (!strcmp(&token[2], "96$")) { 231 cs.final = (u_char)(token[5]); 232 cs.interm = '\0'; 233 cs.vers = '\0'; 234 cs.type = CS96MULTI; 235 } else { 236 return (_PARSEFAIL); 237 } 238 239 i = token[0] - '0'; 240 if (!ei->recommend[i]) { 241 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 242 } else { 243 p = realloc(ei->recommend[i], 244 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 245 if (!p) 246 return (_PARSEFAIL); 247 ei->recommend[i] = p; 248 } 249 if (!ei->recommend[i]) 250 return (_PARSEFAIL); 251 ei->recommendsize[i]++; 252 253 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 254 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 255 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 257 258 return (_MATCH); 259 } 260 261 static __inline int 262 get_initg(_ISO2022EncodingInfo * __restrict ei, 263 const char * __restrict token) 264 { 265 _ISO2022Charset cs; 266 267 if (strncmp("INIT", &token[0], 4) || 268 !strchr("0123", token[4]) || 269 token[5] != '=') 270 return (_NOTMATCH); 271 272 if (getcs(&token[6], &cs) != 0) 273 return (_PARSEFAIL); 274 275 ei->initg[token[4] - '0'].type = cs.type; 276 ei->initg[token[4] - '0'].final = cs.final; 277 ei->initg[token[4] - '0'].interm = cs.interm; 278 ei->initg[token[4] - '0'].vers = cs.vers; 279 280 return (_MATCH); 281 } 282 283 static __inline int 284 get_max(_ISO2022EncodingInfo * __restrict ei, 285 const char * __restrict token) 286 { 287 if (!strcmp(token, "MAX1")) { 288 ei->maxcharset = 1; 289 } else if (!strcmp(token, "MAX2")) { 290 ei->maxcharset = 2; 291 } else if (!strcmp(token, "MAX3")) { 292 ei->maxcharset = 3; 293 } else 294 return (_NOTMATCH); 295 296 return (_MATCH); 297 } 298 299 300 static __inline int 301 get_flags(_ISO2022EncodingInfo * __restrict ei, 302 const char * __restrict token) 303 { 304 int i; 305 static struct { 306 const char *tag; 307 int flag; 308 } const tags[] = { 309 { "DUMMY", 0 }, 310 { "8BIT", F_8BIT }, 311 { "NOOLD", F_NOOLD }, 312 { "SI", F_SI }, 313 { "SO", F_SO }, 314 { "LS0", F_LS0 }, 315 { "LS1", F_LS1 }, 316 { "LS2", F_LS2 }, 317 { "LS3", F_LS3 }, 318 { "LS1R", F_LS1R }, 319 { "LS2R", F_LS2R }, 320 { "LS3R", F_LS3R }, 321 { "SS2", F_SS2 }, 322 { "SS3", F_SS3 }, 323 { "SS2R", F_SS2R }, 324 { "SS3R", F_SS3R }, 325 { NULL, 0 } 326 }; 327 328 for (i = 0; tags[i].tag; i++) { 329 if (!strcmp(token, tags[i].tag)) { 330 ei->flags |= tags[i].flag; 331 return (_MATCH); 332 } 333 } 334 335 return (_NOTMATCH); 336 } 337 338 339 static __inline int 340 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 341 const void * __restrict var, size_t lenvar) 342 { 343 char const *v, *e; 344 char buf[20]; 345 int i, len, ret; 346 347 _DIAGASSERT(ei != NULL); 348 349 350 /* 351 * parse VARIABLE section. 352 */ 353 354 if (!var) 355 return (EFTYPE); 356 357 v = (const char *) var; 358 359 /* initialize structure */ 360 ei->maxcharset = 0; 361 for (i = 0; i < 4; i++) { 362 ei->recommend[i] = NULL; 363 ei->recommendsize[i] = 0; 364 } 365 ei->flags = 0; 366 367 while (*v) { 368 while (*v == ' ' || *v == '\t') 369 ++v; 370 371 /* find the token */ 372 e = v; 373 while (*e && *e != ' ' && *e != '\t') 374 ++e; 375 376 len = e-v; 377 if (len == 0) 378 break; 379 if (len>=sizeof(buf)) 380 goto parsefail; 381 snprintf(buf, sizeof(buf), "%.*s", len, v); 382 383 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 384 ; 385 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 386 ; 387 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 388 ; 389 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 390 ; 391 else 392 ret = _PARSEFAIL; 393 if (ret==_PARSEFAIL) 394 goto parsefail; 395 v = e; 396 397 } 398 399 return (0); 400 401 parsefail: 402 free(ei->recommend[0]); 403 free(ei->recommend[1]); 404 free(ei->recommend[2]); 405 free(ei->recommend[3]); 406 407 return (EFTYPE); 408 } 409 410 static __inline void 411 /*ARGSUSED*/ 412 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 413 _ISO2022State * __restrict s) 414 { 415 int i; 416 417 memset(s, 0, sizeof(*s)); 418 s->gl = 0; 419 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 420 421 for (i = 0; i < 4; i++) { 422 if (ei->initg[i].final) { 423 s->g[i].type = ei->initg[i].type; 424 s->g[i].final = ei->initg[i].final; 425 s->g[i].interm = ei->initg[i].interm; 426 } 427 } 428 s->singlegl = s->singlegr = -1; 429 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 430 } 431 432 static __inline void 433 /*ARGSUSED*/ 434 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 435 void * __restrict pspriv, 436 const _ISO2022State * __restrict s) 437 { 438 memcpy(pspriv, (const void *)s, sizeof(*s)); 439 } 440 441 static __inline void 442 /*ARGSUSED*/ 443 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 444 _ISO2022State * __restrict s, 445 const void * __restrict pspriv) 446 { 447 memcpy((void *)s, pspriv, sizeof(*s)); 448 } 449 450 static int 451 /*ARGSUSED*/ 452 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 453 const void * __restrict var, 454 size_t lenvar) 455 { 456 457 _DIAGASSERT(ei != NULL); 458 459 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 460 } 461 462 static void 463 /*ARGSUSED*/ 464 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 465 { 466 } 467 468 #define ESC '\033' 469 #define ECMA -1 470 #define INTERM -2 471 #define OECMA -3 472 static const struct seqtable { 473 int type; 474 int csoff; 475 int finaloff; 476 int intermoff; 477 int versoff; 478 int len; 479 int chars[10]; 480 } seqtable[] = { 481 /* G0 94MULTI special */ 482 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 483 /* G0 94MULTI special with version identification */ 484 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 485 /* G? 94 */ 486 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 487 /* G? 94 with 2nd intermediate char */ 488 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 489 /* G? 96 */ 490 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 491 /* G? 96 with 2nd intermediate char */ 492 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 493 /* G? 94MULTI */ 494 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 495 /* G? 96MULTI */ 496 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 497 /* G? 94MULTI with version specification */ 498 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 499 /* LS2/3 */ 500 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 501 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 502 /* LS1/2/3R */ 503 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 504 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 505 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 506 /* SS2/3 */ 507 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 508 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 509 /* end of records */ 510 { 0, } 511 }; 512 513 static int 514 seqmatch(const char * __restrict s, size_t n, 515 const struct seqtable * __restrict sp) 516 { 517 const int *p; 518 519 _DIAGASSERT(s != NULL); 520 _DIAGASSERT(sp != NULL); 521 522 p = sp->chars; 523 while (p - sp->chars < n && p - sp->chars < sp->len) { 524 switch (*p) { 525 case ECMA: 526 if (!isecma(*s)) 527 goto terminate; 528 break; 529 case OECMA: 530 if (*s && strchr("@AB", *s)) 531 break; 532 else 533 goto terminate; 534 case INTERM: 535 if (!isinterm(*s)) 536 goto terminate; 537 break; 538 case CS94: 539 if (*s && strchr("()*+", *s)) 540 break; 541 else 542 goto terminate; 543 case CS96: 544 if (*s && strchr(",-./", *s)) 545 break; 546 else 547 goto terminate; 548 default: 549 if (*s != *p) 550 goto terminate; 551 break; 552 } 553 554 p++; 555 s++; 556 } 557 558 terminate: 559 return p - sp->chars; 560 } 561 562 static wchar_t 563 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 564 const char * __restrict string, size_t n, 565 const char ** __restrict result, 566 _ISO2022State * __restrict psenc) 567 { 568 wchar_t wchar = 0; 569 int cur; 570 const struct seqtable *sp; 571 int nmatch; 572 int i; 573 574 _DIAGASSERT(ei != NULL); 575 _DIAGASSERT(psenc != NULL); 576 _DIAGASSERT(string != NULL); 577 /* result may be NULL */ 578 579 while (1) { 580 /* SI/SO */ 581 if (1 <= n && string[0] == '\017') { 582 psenc->gl = 0; 583 string++; 584 n--; 585 continue; 586 } 587 if (1 <= n && string[0] == '\016') { 588 psenc->gl = 1; 589 string++; 590 n--; 591 continue; 592 } 593 594 /* SS2/3R */ 595 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 596 psenc->singlegl = psenc->singlegr = 597 (string[0] - '\216') + 2; 598 string++; 599 n--; 600 continue; 601 } 602 603 /* eat the letter if this is not ESC */ 604 if (1 <= n && string[0] != '\033') 605 break; 606 607 /* look for a perfect match from escape sequences */ 608 for (sp = &seqtable[0]; sp->len; sp++) { 609 nmatch = seqmatch(string, n, sp); 610 if (sp->len == nmatch && n >= sp->len) 611 break; 612 } 613 614 if (!sp->len) 615 goto notseq; 616 617 if (sp->type != -1) { 618 if (sp->csoff == -1) 619 i = 0; 620 else { 621 switch (sp->type) { 622 case CS94: 623 case CS94MULTI: 624 i = string[sp->csoff] - '('; 625 break; 626 case CS96: 627 case CS96MULTI: 628 i = string[sp->csoff] - ','; 629 break; 630 } 631 } 632 psenc->g[i].type = sp->type; 633 psenc->g[i].final = '\0'; 634 psenc->g[i].interm = '\0'; 635 psenc->g[i].vers = '\0'; 636 /* sp->finaloff must not be -1 */ 637 if (sp->finaloff != -1) 638 psenc->g[i].final = string[sp->finaloff]; 639 if (sp->intermoff != -1) 640 psenc->g[i].interm = string[sp->intermoff]; 641 if (sp->versoff != -1) 642 psenc->g[i].vers = string[sp->versoff]; 643 644 string += sp->len; 645 n -= sp->len; 646 continue; 647 } 648 649 /* LS2/3 */ 650 if (2 <= n && string[0] == '\033' 651 && string[1] && strchr("no", string[1])) { 652 psenc->gl = string[1] - 'n' + 2; 653 string += 2; 654 n -= 2; 655 continue; 656 } 657 658 /* LS1/2/3R */ 659 /* XXX: { for vi showmatch */ 660 if (2 <= n && string[0] == '\033' 661 && string[1] && strchr("~}|", string[1])) { 662 psenc->gr = 3 - (string[1] - '|'); 663 string += 2; 664 n -= 2; 665 continue; 666 } 667 668 /* SS2/3 */ 669 if (2 <= n && string[0] == '\033' 670 && string[1] && strchr("NO", string[1])) { 671 psenc->singlegl = (string[1] - 'N') + 2; 672 string += 2; 673 n -= 2; 674 continue; 675 } 676 677 notseq: 678 /* 679 * if we've got an unknown escape sequence, eat the ESC at the 680 * head. otherwise, wait till full escape sequence comes. 681 */ 682 for (sp = &seqtable[0]; sp->len; sp++) { 683 nmatch = seqmatch(string, n, sp); 684 if (!nmatch) 685 continue; 686 687 /* 688 * if we are in the middle of escape sequence, 689 * we still need to wait for more characters to come 690 */ 691 if (n < sp->len) { 692 if (nmatch == n) { 693 if (result) 694 *result = string; 695 return (_ISO2022INVALID); 696 } 697 } else { 698 if (nmatch == sp->len) { 699 /* this case should not happen */ 700 goto eat; 701 } 702 } 703 } 704 705 break; 706 } 707 708 eat: 709 /* no letter to eat */ 710 if (n < 1) { 711 if (result) 712 *result = string; 713 return (_ISO2022INVALID); 714 } 715 716 /* normal chars. always eat C0/C1 as is. */ 717 if (iscntl(*string & 0xff)) 718 cur = -1; 719 else if (*string & 0x80) { 720 cur = (psenc->singlegr == -1) 721 ? psenc->gr : psenc->singlegr; 722 } else { 723 cur = (psenc->singlegl == -1) 724 ? psenc->gl : psenc->singlegl; 725 } 726 727 if (cur == -1) { 728 asis: 729 wchar = *string++ & 0xff; 730 if (result) 731 *result = string; 732 /* reset single shift state */ 733 psenc->singlegr = psenc->singlegl = -1; 734 return wchar; 735 } 736 737 /* length error check */ 738 switch (psenc->g[cur].type) { 739 case CS94MULTI: 740 case CS96MULTI: 741 if (!isthree(psenc->g[cur].final)) { 742 if (2 <= n 743 && (string[0] & 0x80) == (string[1] & 0x80)) 744 break; 745 } else { 746 if (3 <= n 747 && (string[0] & 0x80) == (string[1] & 0x80) 748 && (string[0] & 0x80) == (string[2] & 0x80)) 749 break; 750 } 751 752 /* we still need to wait for more characters to come */ 753 if (result) 754 *result = string; 755 return (_ISO2022INVALID); 756 757 case CS94: 758 case CS96: 759 if (1 <= n) 760 break; 761 762 /* we still need to wait for more characters to come */ 763 if (result) 764 *result = string; 765 return (_ISO2022INVALID); 766 } 767 768 /* range check */ 769 switch (psenc->g[cur].type) { 770 case CS94: 771 if (!(is94(string[0] & 0x7f))) 772 goto asis; 773 case CS96: 774 if (!(is96(string[0] & 0x7f))) 775 goto asis; 776 break; 777 case CS94MULTI: 778 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 779 goto asis; 780 break; 781 case CS96MULTI: 782 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 783 goto asis; 784 break; 785 } 786 787 /* extract the character. */ 788 switch (psenc->g[cur].type) { 789 case CS94: 790 /* special case for ASCII. */ 791 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 792 wchar = *string++; 793 wchar &= 0x7f; 794 break; 795 } 796 wchar = psenc->g[cur].final; 797 wchar = (wchar << 8); 798 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 799 wchar = (wchar << 8); 800 wchar = (wchar << 8) | (*string++ & 0x7f); 801 break; 802 case CS96: 803 /* special case for ISO-8859-1. */ 804 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 805 wchar = *string++; 806 wchar &= 0x7f; 807 wchar |= 0x80; 808 break; 809 } 810 wchar = psenc->g[cur].final; 811 wchar = (wchar << 8); 812 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 813 wchar = (wchar << 8); 814 wchar = (wchar << 8) | (*string++ & 0x7f); 815 wchar |= 0x80; 816 break; 817 case CS94MULTI: 818 case CS96MULTI: 819 wchar = psenc->g[cur].final; 820 wchar = (wchar << 8); 821 if (isthree(psenc->g[cur].final)) 822 wchar |= (*string++ & 0x7f); 823 wchar = (wchar << 8) | (*string++ & 0x7f); 824 wchar = (wchar << 8) | (*string++ & 0x7f); 825 if (psenc->g[cur].type == CS96MULTI) 826 wchar |= 0x80; 827 break; 828 } 829 830 if (result) 831 *result = string; 832 /* reset single shift state */ 833 psenc->singlegr = psenc->singlegl = -1; 834 return wchar; 835 } 836 837 838 839 static int 840 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 841 wchar_t * __restrict pwc, 842 const char ** __restrict s, 843 size_t n, _ISO2022State * __restrict psenc, 844 size_t * __restrict nresult) 845 { 846 wchar_t wchar; 847 const char *s0, *p, *result; 848 int c; 849 int chlenbak; 850 851 _DIAGASSERT(nresult != 0); 852 _DIAGASSERT(ei != NULL); 853 _DIAGASSERT(psenc != NULL); 854 _DIAGASSERT(s != NULL); 855 856 s0 = *s; 857 c = 0; 858 chlenbak = psenc->chlen; 859 860 /* 861 * if we have something in buffer, use that. 862 * otherwise, skip here 863 */ 864 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 865 /* illgeal state */ 866 _citrus_ISO2022_init_state(ei, psenc); 867 goto encoding_error; 868 } 869 if (psenc->chlen == 0) 870 goto emptybuf; 871 872 /* buffer is not empty */ 873 p = psenc->ch; 874 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 875 if (n > 0) { 876 psenc->ch[psenc->chlen++] = *s0++; 877 n--; 878 } 879 880 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 881 &result, psenc); 882 c += result - p; 883 if (wchar != _ISO2022INVALID) { 884 if (psenc->chlen > c) 885 memmove(psenc->ch, result, psenc->chlen - c); 886 if (psenc->chlen < c) 887 psenc->chlen = 0; 888 else 889 psenc->chlen -= c; 890 goto output; 891 } 892 893 if (n == 0) { 894 if ((result - p) == psenc->chlen) 895 /* complete shift sequence. */ 896 psenc->chlen = 0; 897 goto restart; 898 } 899 900 p = result; 901 } 902 903 /* escape sequence too long? */ 904 goto encoding_error; 905 906 emptybuf: 907 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 908 if (wchar != _ISO2022INVALID) { 909 c += result - s0; 910 psenc->chlen = 0; 911 s0 = result; 912 goto output; 913 } 914 if (result > s0) { 915 c += (result - s0); 916 n -= (result - s0); 917 s0 = result; 918 if (n>0) 919 goto emptybuf; 920 /* complete shift sequence. */ 921 goto restart; 922 } 923 n += c; 924 if (n < sizeof(psenc->ch)) { 925 memcpy(psenc->ch, s0 - c, n); 926 psenc->chlen = n; 927 s0 = result; 928 goto restart; 929 } 930 931 /* escape sequence too long? */ 932 933 encoding_error: 934 psenc->chlen = 0; 935 *nresult = (size_t)-1; 936 return (EILSEQ); 937 938 output: 939 *s = s0; 940 if (pwc) 941 *pwc = wchar; 942 943 if (!wchar) 944 *nresult = 0; 945 else 946 *nresult = c - chlenbak; 947 948 return (0); 949 950 restart: 951 *s = s0; 952 *nresult = (size_t)-2; 953 954 return (0); 955 } 956 957 static int 958 recommendation(_ISO2022EncodingInfo * __restrict ei, 959 _ISO2022Charset * __restrict cs) 960 { 961 int i, j; 962 _ISO2022Charset *recommend; 963 964 _DIAGASSERT(ei != NULL); 965 _DIAGASSERT(cs != NULL); 966 967 /* first, try a exact match. */ 968 for (i = 0; i < 4; i++) { 969 recommend = ei->recommend[i]; 970 for (j = 0; j < ei->recommendsize[i]; j++) { 971 if (cs->type != recommend[j].type) 972 continue; 973 if (cs->final != recommend[j].final) 974 continue; 975 if (cs->interm != recommend[j].interm) 976 continue; 977 978 return i; 979 } 980 } 981 982 /* then, try a wildcard match over final char. */ 983 for (i = 0; i < 4; i++) { 984 recommend = ei->recommend[i]; 985 for (j = 0; j < ei->recommendsize[i]; j++) { 986 if (cs->type != recommend[j].type) 987 continue; 988 if (cs->final && (cs->final != recommend[j].final)) 989 continue; 990 if (cs->interm && (cs->interm != recommend[j].interm)) 991 continue; 992 993 return i; 994 } 995 } 996 997 /* there's no recommendation. make a guess. */ 998 if (ei->maxcharset == 0) { 999 return 0; 1000 } else { 1001 switch (cs->type) { 1002 case CS94: 1003 case CS94MULTI: 1004 return 0; 1005 case CS96: 1006 case CS96MULTI: 1007 return 1; 1008 } 1009 } 1010 return 0; 1011 } 1012 1013 static int 1014 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1015 char * __restrict string, size_t n, 1016 char ** __restrict result, 1017 _ISO2022State * __restrict psenc) 1018 { 1019 int i = 0, len; 1020 _ISO2022Charset cs; 1021 char *p; 1022 char tmp[MB_LEN_MAX]; 1023 int target; 1024 u_char mask; 1025 int bit8; 1026 1027 _DIAGASSERT(ei != NULL); 1028 _DIAGASSERT(string != NULL); 1029 /* result may be NULL */ 1030 /* state appears to be unused */ 1031 1032 if (iscntl(wc & 0xff)) { 1033 /* go back to ASCII on control chars */ 1034 cs.type = CS94; 1035 cs.final = 'B'; 1036 cs.interm = '\0'; 1037 } else if (!(wc & ~0xff)) { 1038 if (wc & 0x80) { 1039 /* special treatment for ISO-8859-1 */ 1040 cs.type = CS96; 1041 cs.final = 'A'; 1042 cs.interm = '\0'; 1043 } else { 1044 /* special treatment for ASCII */ 1045 cs.type = CS94; 1046 cs.final = 'B'; 1047 cs.interm = '\0'; 1048 } 1049 } else { 1050 cs.final = (wc >> 24) & 0x7f; 1051 if ((wc >> 16) & 0x80) 1052 cs.interm = (wc >> 16) & 0x7f; 1053 else 1054 cs.interm = '\0'; 1055 if (wc & 0x80) 1056 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1057 else 1058 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1059 } 1060 target = recommendation(ei, &cs); 1061 p = tmp; 1062 bit8 = ei->flags & F_8BIT; 1063 1064 /* designate the charset onto the target plane(G0/1/2/3). */ 1065 if (psenc->g[target].type == cs.type 1066 && psenc->g[target].final == cs.final 1067 && psenc->g[target].interm == cs.interm) 1068 goto planeok; 1069 1070 *p++ = '\033'; 1071 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1072 *p++ = '$'; 1073 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1074 && !cs.interm && !(ei->flags & F_NOOLD)) 1075 ; 1076 else if (cs.type == CS94 || cs.type == CS94MULTI) 1077 *p++ = "()*+"[target]; 1078 else 1079 *p++ = ",-./"[target]; 1080 if (cs.interm) 1081 *p++ = cs.interm; 1082 *p++ = cs.final; 1083 1084 psenc->g[target].type = cs.type; 1085 psenc->g[target].final = cs.final; 1086 psenc->g[target].interm = cs.interm; 1087 1088 planeok: 1089 /* invoke the plane onto GL or GR. */ 1090 if (psenc->gl == target) 1091 goto sideok; 1092 if (bit8 && psenc->gr == target) 1093 goto sideok; 1094 1095 if (target == 0 && (ei->flags & F_LS0)) { 1096 *p++ = '\017'; 1097 psenc->gl = 0; 1098 } else if (target == 1 && (ei->flags & F_LS1)) { 1099 *p++ = '\016'; 1100 psenc->gl = 1; 1101 } else if (target == 2 && (ei->flags & F_LS2)) { 1102 *p++ = '\033'; 1103 *p++ = 'n'; 1104 psenc->gl = 2; 1105 } else if (target == 3 && (ei->flags & F_LS3)) { 1106 *p++ = '\033'; 1107 *p++ = 'o'; 1108 psenc->gl = 3; 1109 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1110 *p++ = '\033'; 1111 *p++ = '~'; 1112 psenc->gr = 1; 1113 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1114 *p++ = '\033'; 1115 /*{*/ 1116 *p++ = '}'; 1117 psenc->gr = 2; 1118 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1119 *p++ = '\033'; 1120 *p++ = '|'; 1121 psenc->gr = 3; 1122 } else if (target == 2 && (ei->flags & F_SS2)) { 1123 *p++ = '\033'; 1124 *p++ = 'N'; 1125 psenc->singlegl = 2; 1126 } else if (target == 3 && (ei->flags & F_SS3)) { 1127 *p++ = '\033'; 1128 *p++ = 'O'; 1129 psenc->singlegl = 3; 1130 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1131 *p++ = '\216'; 1132 *p++ = 'N'; 1133 psenc->singlegl = psenc->singlegr = 2; 1134 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1135 *p++ = '\217'; 1136 *p++ = 'O'; 1137 psenc->singlegl = psenc->singlegr = 3; 1138 } else 1139 abort(); 1140 1141 sideok: 1142 if (psenc->singlegl == target) 1143 mask = 0x00; 1144 else if (psenc->singlegr == target) 1145 mask = 0x80; 1146 else if (psenc->gl == target) 1147 mask = 0x00; 1148 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1149 mask = 0x80; 1150 else 1151 abort(); 1152 1153 switch (cs.type) { 1154 case CS94: 1155 case CS96: 1156 i = 1; 1157 break; 1158 case CS94MULTI: 1159 case CS96MULTI: 1160 i = isthree(cs.final) ? 3 : 2; 1161 break; 1162 } 1163 while (i-- > 0) 1164 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1165 1166 /* reset single shift state */ 1167 psenc->singlegl = psenc->singlegr = -1; 1168 1169 len = p - tmp; 1170 if (n < len) { 1171 if (result) 1172 *result = (char *)0; 1173 } else { 1174 if (result) 1175 *result = string + len; 1176 memcpy(string, tmp, len); 1177 } 1178 return len; 1179 } 1180 1181 static int 1182 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1183 char * __restrict s, size_t n, 1184 _ISO2022State * __restrict psenc, 1185 size_t * __restrict nresult) 1186 { 1187 char buf[MB_LEN_MAX]; 1188 char *result; 1189 int len, ret; 1190 1191 _DIAGASSERT(ei != NULL); 1192 _DIAGASSERT(nresult != 0); 1193 _DIAGASSERT(s != NULL); 1194 1195 /* XXX state will be modified after this operation... */ 1196 len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc); 1197 if (len==0) { 1198 ret = EINVAL; 1199 goto err; 1200 } 1201 if (sizeof(buf) < len || n < len-1) { 1202 /* XXX should recover state? */ 1203 ret = E2BIG; 1204 goto err; 1205 } 1206 1207 memcpy(s, buf, len-1); 1208 *nresult = (size_t)(len-1); 1209 return (0); 1210 1211 err: 1212 /* bound check failure */ 1213 *nresult = (size_t)-1; 1214 return ret; 1215 } 1216 1217 static int 1218 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1219 char * __restrict s, size_t n, wchar_t wc, 1220 _ISO2022State * __restrict psenc, 1221 size_t * __restrict nresult) 1222 { 1223 char buf[MB_LEN_MAX]; 1224 char *result; 1225 int len, ret; 1226 1227 _DIAGASSERT(ei != NULL); 1228 _DIAGASSERT(nresult != 0); 1229 _DIAGASSERT(s != NULL); 1230 1231 /* XXX state will be modified after this operation... */ 1232 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc); 1233 if (sizeof(buf) < len || n < len) { 1234 /* XXX should recover state? */ 1235 ret = E2BIG; 1236 goto err; 1237 } 1238 1239 memcpy(s, buf, len); 1240 *nresult = (size_t)len; 1241 return (0); 1242 1243 err: 1244 /* bound check failure */ 1245 *nresult = (size_t)-1; 1246 return ret; 1247 } 1248 1249 static __inline int 1250 /*ARGSUSED*/ 1251 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1252 _csid_t * __restrict csid, 1253 _index_t * __restrict idx, wchar_t wc) 1254 { 1255 wchar_t m, nm; 1256 1257 _DIAGASSERT(csid != NULL && idx != NULL); 1258 1259 m = wc & 0x7FFF8080; 1260 nm = wc & 0x007F7F7F; 1261 if (m & 0x00800000) { 1262 nm &= 0x00007F7F; 1263 } else { 1264 m &= 0x7F008080; 1265 } 1266 if (nm & 0x007F0000) { 1267 /* ^3 mark */ 1268 m |= 0x007F0000; 1269 } else if (nm & 0x00007F00) { 1270 /* ^2 mark */ 1271 m |= 0x00007F00; 1272 } 1273 *csid = (_csid_t)m; 1274 *idx = (_index_t)nm; 1275 1276 return (0); 1277 } 1278 1279 static __inline int 1280 /*ARGSUSED*/ 1281 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1282 wchar_t * __restrict wc, 1283 _csid_t csid, _index_t idx) 1284 { 1285 1286 _DIAGASSERT(ei != NULL && wc != NULL); 1287 1288 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1289 1290 return (0); 1291 } 1292 1293 static __inline int 1294 /*ARGSUSED*/ 1295 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei, 1296 _ISO2022State * __restrict psenc, 1297 int * __restrict rstate) 1298 { 1299 1300 if (psenc->chlen == 0) { 1301 /* XXX: it should distinguish initial and stable. */ 1302 *rstate = _STDENC_SDGEN_STABLE; 1303 } else { 1304 if (psenc->ch[0] == '\033') 1305 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 1306 else 1307 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 1308 } 1309 1310 return 0; 1311 } 1312 1313 /* ---------------------------------------------------------------------- 1314 * public interface for ctype 1315 */ 1316 1317 _CITRUS_CTYPE_DECLS(ISO2022); 1318 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1319 1320 #include "citrus_ctype_template.h" 1321 1322 /* ---------------------------------------------------------------------- 1323 * public interface for stdenc 1324 */ 1325 1326 _CITRUS_STDENC_DECLS(ISO2022); 1327 _CITRUS_STDENC_DEF_OPS(ISO2022); 1328 1329 #include "citrus_stdenc_template.h" 1330