1 /* $OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2013 Miodrag Vallat. 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice, this permission notice, and the disclaimer below 9 * appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 /* 21 * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c 22 * under the following licence: 23 */ 24 /*- 25 * Copyright (c) 2002-2004 Tim J. Robbins 26 * All rights reserved. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 40 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 */ 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/errno.h> 53 54 #include <dev/wscons/wscons_features.h> 55 #include <dev/wscons/wsconsio.h> 56 #include <dev/wscons/wsdisplayvar.h> 57 #include <dev/wscons/wsemulvar.h> 58 #include <dev/wscons/wsksymdef.h> 59 60 int wsemul_local_translate(u_int32_t, kbd_t, u_char *); 61 62 /* 63 * Get characters from an input stream and update the input state. 64 * Processing stops when the stream is empty, or a complete character 65 * sequence has been recognized, in which case it returns zero. 66 */ 67 int 68 wsemul_getchar(const u_char **inbuf, u_int *inlen, 69 struct wsemul_inputstate *state, int allow_utf8) 70 { 71 u_int len = *inlen; 72 const u_char *buf = *inbuf; 73 #ifdef HAVE_UTF8_SUPPORT 74 int rc; 75 u_int32_t tmpchar, lbound; 76 u_int mbleft; 77 #endif 78 79 if (len == 0) 80 return EAGAIN; 81 82 #ifndef HAVE_UTF8_SUPPORT 83 state->inchar = *buf++; 84 state->mbleft = 0; 85 len--; 86 *inlen = len; 87 *inbuf = buf; 88 return 0; 89 #else 90 /* 91 * If we do not allow multibyte sequences, process as quickly 92 * as possible. 93 */ 94 if (!allow_utf8) { 95 state->inchar = *buf++; 96 state->mbleft = 0; 97 len--; 98 *inlen = len; 99 *inbuf = buf; 100 return 0; 101 } 102 103 rc = EAGAIN; 104 tmpchar = state->inchar; 105 lbound = state->lbound; 106 mbleft = state->mbleft; 107 108 while (len != 0) { 109 u_int32_t frag = (u_int32_t)*buf++; 110 len--; 111 112 /* 113 * If we are in the middle of a multibyte sequence, try 114 * to complete it. 115 */ 116 117 if (mbleft != 0) { 118 if ((frag & 0xc0) != 0x80) 119 goto invalid; 120 121 tmpchar = (tmpchar << 6) | (frag & 0x3f); 122 mbleft--; 123 if (mbleft == 0) { 124 if (tmpchar < lbound) 125 goto invalid; 126 if (tmpchar >= 0xd800 && tmpchar < 0xe000) 127 goto invalid; 128 if (tmpchar >= 0x110000) 129 goto invalid; 130 rc = 0; 131 break; 132 } 133 continue; 134 } 135 136 /* 137 * Otherwise let's decide if this is the start of a new 138 * multibyte sequence, or a 7-bit character. 139 */ 140 141 if ((frag & 0x80) == 0) { 142 tmpchar = frag; 143 rc = 0; 144 break; 145 } 146 147 if ((frag & 0xe0) == 0xc0) { 148 frag &= 0x1f; 149 mbleft = 1; 150 lbound = 0x80; 151 } else if ((frag & 0xf0) == 0xe0) { 152 frag &= 0x0f; 153 mbleft = 2; 154 lbound = 0x800; 155 } else if ((frag & 0xf8) == 0xf0) { 156 frag &= 0x07; 157 mbleft = 3; 158 lbound = 0x10000; 159 } else { 160 goto invalid; 161 } 162 163 tmpchar = frag; 164 state->lbound = lbound; 165 continue; 166 167 invalid: 168 /* Abort the ill-formed sequence and continue */ 169 mbleft = 0; 170 tmpchar = 0; 171 rc = EILSEQ; 172 } 173 174 state->inchar = tmpchar; 175 state->mbleft = mbleft; 176 *inlen = len; 177 *inbuf = buf; 178 return rc; 179 #endif 180 } 181 182 /* 183 * Unicode Cyrillic to KOI8 translation table (starts at U+0400), 184 * from RFC 2319. 185 */ 186 const u_int8_t cyrillic_to_koi8[] = { 187 0x00, /* IE grave */ /* 0400 */ 188 0xb3, /* IO */ 189 0x00, /* DJE */ 190 0x00, /* GJE */ 191 0xb4, /* UKR IE */ 192 0x00, /* DZE */ 193 0xb6, /* BYE/UKR I */ 194 0xb7, /* YI */ 195 0x00, /* JE */ 196 0x00, /* LJE */ 197 0x00, /* NJE */ 198 0x00, /* TSHE */ 199 0x00, /* KJE */ 200 0x00, /* I grave */ 201 0x00, /* short U */ 202 0x00, /* DZHE */ 203 0xe1, /* A */ /* 0410 */ 204 0xe2, /* BE */ 205 0xf7, /* VE */ 206 0xe7, /* GHE */ 207 0xe4, /* DE */ 208 0xe5, /* IE */ 209 0xf6, /* ZHE */ 210 0xfa, /* ZE */ 211 0xe9, /* I */ 212 0xea, /* short I */ 213 0xeb, /* KA */ 214 0xec, /* EL */ 215 0xed, /* EM */ 216 0xee, /* EN */ 217 0xef, /* O */ 218 0xf0, /* PE */ 219 0xf2, /* ER */ /* 0420 */ 220 0xf3, /* ES */ 221 0xf4, /* TE */ 222 0xf5, /* U */ 223 0xe6, /* EF */ 224 0xe8, /* HA */ 225 0xe3, /* TSE */ 226 0xfe, /* CHE */ 227 0xfb, /* SHA */ 228 0xfd, /* SHCHA */ 229 0xff, /* HARD SIGN */ 230 0xf9, /* YERU */ 231 0xf8, /* SOFT SIGN */ 232 0xfc, /* E */ 233 0xe0, /* YU */ 234 0xf1, /* YA */ 235 0xc1, /* a */ /* 0430 */ 236 0xc2, /* be */ 237 0xd7, /* ve */ 238 0xc7, /* ghe */ 239 0xc4, /* de */ 240 0xc5, /* ie */ 241 0xd6, /* zhe */ 242 0xda, /* ze */ 243 0xc9, /* i */ 244 0xca, /* short i */ 245 0xcb, /* ka */ 246 0xcc, /* el */ 247 0xcd, /* em */ 248 0xce, /* en */ 249 0xcf, /* o */ 250 0xd0, /* pe */ 251 0xd2, /* er */ /* 0440 */ 252 0xd3, /* es */ 253 0xd4, /* te */ 254 0xd5, /* u */ 255 0xc6, /* ef */ 256 0xc8, /* ha */ 257 0xc3, /* tse */ 258 0xde, /* che */ 259 0xdb, /* sha */ 260 0xdd, /* shcha */ 261 0xdf, /* hard sign */ 262 0xd9, /* yeru */ 263 0xd8, /* soft sign */ 264 0xdc, /* e */ 265 0xc0, /* yu */ 266 0xd1, /* ya */ 267 0x00, /* ie grave */ /* 0450 */ 268 0xa3, /* io */ 269 0x00, /* dje */ 270 0x00, /* GJE */ 271 0xa4, /* UKR ie */ 272 0x00, /* DZE */ 273 0xa6, /* BYE/UKR I */ 274 0xa7, /* YI */ 275 0x00, /* JE */ 276 0x00, /* LJE */ 277 0x00, /* NJE */ 278 0x00, /* TSHE */ 279 0x00, /* KJE */ 280 0x00, /* I grave */ 281 0x00, /* short U */ 282 0x00 /* DZHE */ 283 }; 284 285 /* 286 * Europe to Latin-2 translation table (starts at U+0100). 287 */ 288 const u_int8_t unicode_to_latin2[] = { 289 0x00, /* A macron */ /* 0100 */ 290 0x00, /* a macron */ 291 0xc3, /* A breve */ 292 0xe3, /* a breve */ 293 0xa1, /* A ogonek */ 294 0xb1, /* a ogonek */ 295 0xc6, /* C acute */ 296 0xe6, /* c acute */ 297 0x00, /* C circumflex */ 298 0x00, /* c circumflex */ 299 0x00, /* C abovering */ 300 0x00, /* c abovering */ 301 0xc8, /* C caron */ 302 0xe8, /* c caron */ 303 0xcf, /* D caron */ 304 0xef, /* d caron */ 305 0xd0, /* D stroke */ /* 0110 */ 306 0xf0, /* d stroke */ 307 0x00, /* E macron */ 308 0x00, /* e macron */ 309 0x00, /* E breve */ 310 0x00, /* e breve */ 311 0x00, /* E abovering */ 312 0x00, /* e abovering */ 313 0xca, /* E ogonek */ 314 0xea, /* e ogonek */ 315 0xcc, /* E caron */ 316 0xec, /* e caron */ 317 0x00, /* G circumflex */ 318 0x00, /* g circumflex */ 319 0x00, /* G breve */ 320 0x00, /* g breve */ 321 0x00, /* G abovering */ /* 0120 */ 322 0x00, /* g abovering */ 323 0x00, /* G cedilla */ 324 0x00, /* g cedilla */ 325 0x00, /* H circumflex */ 326 0x00, /* h circumflex */ 327 0x00, /* H stroke */ 328 0x00, /* h stroke */ 329 0x00, /* I tilde */ 330 0x00, /* i tilde */ 331 0x00, /* I macron */ 332 0x00, /* i macron */ 333 0x00, /* I breve */ 334 0x00, /* i breve */ 335 0x00, /* I ogonek */ 336 0x00, /* i ogonek */ 337 0x00, /* dotted I */ /* 0130 */ 338 0x00, /* non-dotted i */ 339 0x00, /* ligature IJ */ 340 0x00, /* ligature ij */ 341 0x00, /* J circumflex */ 342 0x00, /* j circumflex */ 343 0x00, /* K cedilla */ 344 0x00, /* k cedilla */ 345 0x00, /* kra */ 346 0xc5, /* L acute */ 347 0xe5, /* l acute */ 348 0x00, /* L cedilla */ 349 0x00, /* l cedilla */ 350 0xa5, /* L caron */ 351 0xb5, /* l caron */ 352 0x00, /* L middle dot */ 353 0x00, /* l middle dot */ /* 0140 */ 354 0xa3, /* L stroke */ 355 0xb3, /* l stroke */ 356 0xd1, /* N acute */ 357 0xf1, /* n acute */ 358 0x00, /* N cedilla */ 359 0x00, /* n cedilla */ 360 0xd2, /* N caron */ 361 0xf2, /* n caron */ 362 0x00, /* N preceded by apostrophe */ 363 0x00, /* ENG */ 364 0x00, /* eng */ 365 0x00, /* O macron */ 366 0x00, /* o macron */ 367 0x00, /* O breve */ 368 0x00, /* o breve */ 369 0xd5, /* O double acute */ /* 0150 */ 370 0xf5, /* o double acute */ 371 0x00, /* ligature OE */ 372 0x00, /* ligature oe */ 373 0xc0, /* R acute */ 374 0xe0, /* r acute */ 375 0x00, /* R cedilla */ 376 0x00, /* r cedilla */ 377 0xd8, /* R caron */ 378 0xf8, /* r caron */ 379 0xa6, /* S acute */ 380 0xb6, /* s acute */ 381 0x00, /* S circumflex */ 382 0x00, /* s circumflex */ 383 0xaa, /* S cedilla */ 384 0xba, /* s cedilla */ 385 0xa9, /* S caron */ /* 0160 */ 386 0xb9, /* s caron */ 387 0xde, /* T cedilla */ 388 0xfe, /* t cedilla */ 389 0xab, /* T caron */ 390 0xbb, /* t caron */ 391 0x00, /* T stroke */ 392 0x00, /* t stroke */ 393 0x00, /* U tilde */ 394 0x00, /* u tilde */ 395 0x00, /* U macron */ 396 0x00, /* u macron */ 397 0x00, /* U breve */ 398 0x00, /* u breve */ 399 0xd9, /* U abovering */ 400 0xf9, /* u abovering */ 401 0xdb, /* U double acute */ /* 0170 */ 402 0xfb, /* u double acute */ 403 0x00, /* U ogonek */ 404 0x00, /* u ogonek */ 405 0x00, /* W circumflex */ 406 0x00, /* w circumflex */ 407 0x00, /* Y circumflex */ 408 0x00, /* y circumflex */ 409 0x00, /* Y diaeresis */ 410 0xac, /* Z acute */ 411 0xbc, /* z acute */ 412 0xaf, /* Z abovering */ 413 0xbf, /* z abovering */ 414 0xae, /* Z caron */ 415 0xbe, /* z caron */ 416 0x00 /* long s */ 417 }; 418 419 /* 420 * Baltic to Latin-7 translation table. 421 */ 422 const u_int8_t unicode_to_latin7[] = { 423 0xc2, /* A macron */ /* 0100 */ 424 0xe2, /* a macron */ 425 0x00, /* A breve */ 426 0x00, /* a breve */ 427 0xc0, /* A ogonek */ 428 0xe0, /* a ogonek */ 429 0xc3, /* C acute */ 430 0xe3, /* c acute */ 431 0x00, /* C circumflex */ 432 0x00, /* c circumflex */ 433 0x00, /* C abovering */ 434 0x00, /* c abovering */ 435 0xc8, /* C caron */ 436 0xe8, /* c caron */ 437 0x00, /* D caron */ 438 0x00, /* d caron */ 439 0x00, /* D stroke */ /* 0110 */ 440 0x00, /* d stroke */ 441 0xc7, /* E macron */ 442 0xe7, /* e macron */ 443 0x00, /* E breve */ 444 0x00, /* e breve */ 445 0xcb, /* E abovering */ 446 0xeb, /* e abovering */ 447 0xc6, /* E ogonek */ 448 0xe6, /* e ogonek */ 449 0x00, /* E caron */ 450 0x00, /* e caron */ 451 0x00, /* G circumflex */ 452 0x00, /* g circumflex */ 453 0x00, /* G breve */ 454 0x00, /* g breve */ 455 0x00, /* G abovering */ /* 0120 */ 456 0x00, /* g abovering */ 457 0xcc, /* G cedilla */ 458 0xec, /* g cedilla */ 459 0x00, /* H circumflex */ 460 0x00, /* h circumflex */ 461 0x00, /* H stroke */ 462 0x00, /* h stroke */ 463 0x00, /* I tilde */ 464 0x00, /* i tilde */ 465 0xce, /* I macron */ 466 0xee, /* i macron */ 467 0x00, /* I breve */ 468 0x00, /* i breve */ 469 0xc1, /* I ogonek */ 470 0xe1, /* i ogonek */ 471 0x00, /* dotted I */ /* 0130 */ 472 0x00, /* non-dotted I */ 473 0x00, /* ligature IJ */ 474 0x00, /* ligature ij */ 475 0x00, /* J circumflex */ 476 0x00, /* j circumflex */ 477 0xcd, /* K cedilla */ 478 0xed, /* k cedilla */ 479 0x00, /* kra */ 480 0x00, /* L acute */ 481 0x00, /* l acute */ 482 0xcf, /* L cedilla */ 483 0xef, /* l cedilla */ 484 0x00, /* L caron */ 485 0x00, /* l caron */ 486 0x00, /* L middle dot */ 487 0x00, /* l middle dot */ /* 0140 */ 488 0xd9, /* L stroke */ 489 0xf9, /* l stroke */ 490 0xd1, /* N acute */ 491 0xf1, /* n acute */ 492 0xd2, /* N cedilla */ 493 0xf2, /* n cedilla */ 494 0x00, /* N caron */ 495 0x00, /* n caron */ 496 0x00, /* N preceded by apostrophe */ 497 0x00, /* ENG */ 498 0x00, /* eng */ 499 0xd4, /* O macron */ 500 0xf4, /* o macron */ 501 0x00, /* O breve */ 502 0x00, /* o breve */ 503 0x00, /* O double acute */ /* 0150 */ 504 0x00, /* o double acute */ 505 0x00, /* ligature OE */ 506 0x00, /* ligature oe */ 507 0x00, /* R acute */ 508 0x00, /* r acute */ 509 0xaa, /* R cedilla */ 510 0xba, /* r cedilla */ 511 0x00, /* R caron */ 512 0x00, /* r caron */ 513 0xda, /* S acute */ 514 0xfa, /* s acute */ 515 0x00, /* S circumflex */ 516 0x00, /* s circumflex */ 517 0x00, /* S cedilla */ 518 0x00, /* s cedilla */ 519 0xd0, /* S caron */ /* 0160 */ 520 0xf0, /* s caron */ 521 0x00, /* T cedilla */ 522 0x00, /* t cedilla */ 523 0x00, /* T caron */ 524 0x00, /* t caron */ 525 0x00, /* T stroke */ 526 0x00, /* t stroke */ 527 0x00, /* U tilde */ 528 0x00, /* u tilde */ 529 0xdb, /* U macron */ 530 0xfb, /* u macron */ 531 0x00, /* U breve */ 532 0x00, /* u breve */ 533 0x00, /* U abovering */ 534 0x00, /* u abovering */ 535 0x00, /* U double acute */ /* 0170 */ 536 0x00, /* u double acute */ 537 0xd8, /* U ogonek */ 538 0xf8, /* u ogonek */ 539 0x00, /* W circumflex */ 540 0x00, /* w circumflex */ 541 0x00, /* Y circumflex */ 542 0x00, /* y circumflex */ 543 0x00, /* Y diaeresis */ 544 0xca, /* Z acute */ 545 0xea, /* z acute */ 546 0xdd, /* Z abovering */ 547 0xfd, /* z abovering */ 548 0xde, /* Z caron */ 549 0xfe, /* z caron */ 550 0x00 /* long s */ 551 }; 552 553 /* 554 * Keysym to local 8-bit charset sequence translation function. 555 * The out buffer is at least one character long. 556 * The keyboard layout is used as a hint to decide which latin charset to 557 * assume. 558 */ 559 int 560 wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out) 561 { 562 switch (unisym >> 7) { 563 case 0x0080 >> 7: 564 switch (KB_ENCODING(layout)) { 565 case KB_LT: 566 case KB_LV: 567 switch (unisym) { 568 case KS_L7_AE: 569 unisym = 0xaf; 570 break; 571 case KS_L7_Ostroke: 572 unisym = 0xa8; 573 break; 574 case KS_L7_ae: 575 unisym = 0xbf; 576 break; 577 case KS_L7_ostroke: 578 unisym = 0xb8; 579 break; 580 } 581 } 582 break; 583 584 case 0x0100 >> 7: 585 switch (KB_ENCODING(layout)) { 586 case KB_LT: 587 case KB_LV: 588 if (unisym < 0x100 + nitems(unicode_to_latin7) && 589 unicode_to_latin7[unisym - 0x100] != 0) 590 unisym = unicode_to_latin7[unisym - 0x100]; 591 break; 592 case KB_TR: 593 switch (unisym) { 594 case KS_L5_Gbreve: 595 unisym = 0xd0; 596 break; 597 case KS_L5_gbreve: 598 unisym = 0xf0; 599 break; 600 case KS_L5_Idotabove: 601 unisym = 0xdd; 602 break; 603 case KS_L5_idotless: 604 unisym = 0xfd; 605 break; 606 case KS_L5_Scedilla: 607 unisym = 0xde; 608 break; 609 case KS_L5_scedilla: 610 unisym = 0xfe; 611 break; 612 } 613 break; 614 case KB_PL: 615 case KB_SI: 616 if (unisym < 0x100 + nitems(unicode_to_latin2) && 617 unicode_to_latin2[unisym - 0x100] != 0) 618 unisym = unicode_to_latin2[unisym - 0x100]; 619 break; 620 } 621 break; 622 623 case 0x0280 >> 7: 624 switch (KB_ENCODING(layout)) { 625 case KB_PL: 626 case KB_SI: 627 switch (unisym) { 628 case KS_L2_caron: 629 unisym = 0xb7; 630 break; 631 case KS_L2_breve: 632 unisym = 0xa2; 633 break; 634 case KS_L2_dotabove: 635 unisym = 0xff; 636 break; 637 case KS_L2_ogonek: 638 unisym = 0xb2; 639 break; 640 case KS_L2_dblacute: 641 unisym = 0xbd; 642 break; 643 } 644 break; 645 } 646 break; 647 648 case 0x0400 >> 7: 649 if (unisym < 0x400 + 650 sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) && 651 cyrillic_to_koi8[unisym - 0x400] != 0) 652 unisym = cyrillic_to_koi8[unisym - 0x400]; 653 break; 654 case 0x0480 >> 7: 655 if (unisym == KS_Cyrillic_GHEUKR) 656 unisym = 0xbd; /* ukrainian GHE */ 657 else if (unisym == KS_Cyrillic_gheukr) 658 unisym = 0xad; /* ukrainian ghe */ 659 break; 660 661 case 0x2000 >> 7: 662 switch (KB_ENCODING(layout)) { 663 case KB_LT: 664 case KB_LV: 665 switch (unisym) { 666 case KS_L7_rightsnglquot: 667 unisym = 0xff; 668 break; 669 case KS_L7_leftdblquot: 670 unisym = 0xb4; 671 break; 672 case KS_L7_rightdblquot: 673 unisym = 0xa1; 674 break; 675 case KS_L7_dbllow9quot: 676 unisym = 0xa5; 677 break; 678 } 679 } 680 break; 681 682 } 683 684 out[0] = unisym & 0xff; 685 return (1); 686 } 687 688 /* 689 * Keysym to UTF-8 sequence translation function. 690 * The out buffer is at least 4 characters long. 691 */ 692 int 693 wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out, 694 int allow_utf8) 695 { 696 #ifndef HAVE_UTF8_SUPPORT 697 return (wsemul_local_translate(unisym, layout, out)); 698 #else 699 u_int pos, length, headpat; 700 701 if (!allow_utf8) 702 return wsemul_local_translate(unisym, layout, out); 703 704 if (unisym < 0x80) { 705 /* Fast path for plain ASCII characters. */ 706 *out = (u_char)unisym; 707 return 1; 708 } 709 710 if (unisym < 0x800) { 711 headpat = 0xc0; 712 length = 2; 713 } else if (unisym < 0x10000) { 714 if (unisym >= 0xd800 && unisym < 0xe000) 715 return 0; 716 headpat = 0xe0; 717 length = 3; 718 } else { 719 if (unisym >= 0x110000) 720 return 0; 721 headpat = 0xf0; 722 length = 4; 723 } 724 725 for (pos = length - 1; pos > 0; pos--) { 726 out[pos] = 0x80 | (unisym & 0x3f); 727 unisym >>= 6; 728 } 729 out[0] = headpat | unisym; 730 731 return length; 732 #endif 733 } 734