1 /* $OpenBSD$ */ 2 3 /* 4 * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <wchar.h> 26 27 #include "compat.h" 28 #include "tmux.h" 29 30 static const wchar_t utf8_force_wide[] = { 31 0x0261D, 32 0x026F9, 33 0x0270A, 34 0x0270B, 35 0x0270C, 36 0x0270D, 37 0x1F1E6, 38 0x1F1E7, 39 0x1F1E8, 40 0x1F1E9, 41 0x1F1EA, 42 0x1F1EB, 43 0x1F1EC, 44 0x1F1ED, 45 0x1F1EE, 46 0x1F1EF, 47 0x1F1F0, 48 0x1F1F1, 49 0x1F1F2, 50 0x1F1F3, 51 0x1F1F4, 52 0x1F1F5, 53 0x1F1F6, 54 0x1F1F7, 55 0x1F1F8, 56 0x1F1F9, 57 0x1F1FA, 58 0x1F1FB, 59 0x1F1FC, 60 0x1F1FD, 61 0x1F1FE, 62 0x1F1FF, 63 0x1F385, 64 0x1F3C2, 65 0x1F3C3, 66 0x1F3C4, 67 0x1F3C7, 68 0x1F3CA, 69 0x1F3CB, 70 0x1F3CC, 71 0x1F3FB, 72 0x1F3FC, 73 0x1F3FD, 74 0x1F3FE, 75 0x1F3FF, 76 0x1F442, 77 0x1F443, 78 0x1F446, 79 0x1F447, 80 0x1F448, 81 0x1F449, 82 0x1F44A, 83 0x1F44B, 84 0x1F44C, 85 0x1F44D, 86 0x1F44E, 87 0x1F44F, 88 0x1F450, 89 0x1F466, 90 0x1F467, 91 0x1F468, 92 0x1F469, 93 0x1F46B, 94 0x1F46C, 95 0x1F46D, 96 0x1F46E, 97 0x1F470, 98 0x1F471, 99 0x1F472, 100 0x1F473, 101 0x1F474, 102 0x1F475, 103 0x1F476, 104 0x1F477, 105 0x1F478, 106 0x1F47C, 107 0x1F481, 108 0x1F482, 109 0x1F483, 110 0x1F485, 111 0x1F486, 112 0x1F487, 113 0x1F48F, 114 0x1F491, 115 0x1F4AA, 116 0x1F574, 117 0x1F575, 118 0x1F57A, 119 0x1F590, 120 0x1F595, 121 0x1F596, 122 0x1F645, 123 0x1F646, 124 0x1F647, 125 0x1F64B, 126 0x1F64C, 127 0x1F64D, 128 0x1F64E, 129 0x1F64F, 130 0x1F6A3, 131 0x1F6B4, 132 0x1F6B5, 133 0x1F6B6, 134 0x1F6C0, 135 0x1F6CC, 136 0x1F90C, 137 0x1F90F, 138 0x1F918, 139 0x1F919, 140 0x1F91A, 141 0x1F91B, 142 0x1F91C, 143 0x1F91D, 144 0x1F91E, 145 0x1F91F, 146 0x1F926, 147 0x1F930, 148 0x1F931, 149 0x1F932, 150 0x1F933, 151 0x1F934, 152 0x1F935, 153 0x1F936, 154 0x1F937, 155 0x1F938, 156 0x1F939, 157 0x1F93D, 158 0x1F93E, 159 0x1F977, 160 0x1F9B5, 161 0x1F9B6, 162 0x1F9B8, 163 0x1F9B9, 164 0x1F9BB, 165 0x1F9CD, 166 0x1F9CE, 167 0x1F9CF, 168 0x1F9D1, 169 0x1F9D2, 170 0x1F9D3, 171 0x1F9D4, 172 0x1F9D5, 173 0x1F9D6, 174 0x1F9D7, 175 0x1F9D8, 176 0x1F9D9, 177 0x1F9DA, 178 0x1F9DB, 179 0x1F9DC, 180 0x1F9DD, 181 0x1FAC3, 182 0x1FAC4, 183 0x1FAC5, 184 0x1FAF0, 185 0x1FAF1, 186 0x1FAF2, 187 0x1FAF3, 188 0x1FAF4, 189 0x1FAF5, 190 0x1FAF6, 191 0x1FAF7, 192 0x1FAF8 193 }; 194 195 struct utf8_item { 196 RB_ENTRY(utf8_item) index_entry; 197 u_int index; 198 199 RB_ENTRY(utf8_item) data_entry; 200 char data[UTF8_SIZE]; 201 u_char size; 202 }; 203 204 static int 205 utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2) 206 { 207 if (ui1->size < ui2->size) 208 return (-1); 209 if (ui1->size > ui2->size) 210 return (1); 211 return (memcmp(ui1->data, ui2->data, ui1->size)); 212 } 213 RB_HEAD(utf8_data_tree, utf8_item); 214 RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp); 215 static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree); 216 217 static int 218 utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2) 219 { 220 if (ui1->index < ui2->index) 221 return (-1); 222 if (ui1->index > ui2->index) 223 return (1); 224 return (0); 225 } 226 RB_HEAD(utf8_index_tree, utf8_item); 227 RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp); 228 static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree); 229 230 static u_int utf8_next_index; 231 232 #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f) 233 #define UTF8_GET_WIDTH(uc) (((uc) >> 29) - 1) 234 235 #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24) 236 #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29) 237 238 /* Get a UTF-8 item from data. */ 239 static struct utf8_item * 240 utf8_item_by_data(const u_char *data, size_t size) 241 { 242 struct utf8_item ui; 243 244 memcpy(ui.data, data, size); 245 ui.size = size; 246 247 return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui)); 248 } 249 250 /* Get a UTF-8 item from data. */ 251 static struct utf8_item * 252 utf8_item_by_index(u_int index) 253 { 254 struct utf8_item ui; 255 256 ui.index = index; 257 258 return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui)); 259 } 260 261 /* Add a UTF-8 item. */ 262 static int 263 utf8_put_item(const u_char *data, size_t size, u_int *index) 264 { 265 struct utf8_item *ui; 266 267 ui = utf8_item_by_data((const unsigned char *)data, size); 268 if (ui != NULL) { 269 *index = ui->index; 270 log_debug("%s: found %.*s = %u", __func__, (int)size, data, 271 *index); 272 return (0); 273 } 274 275 if (utf8_next_index == 0xffffff + 1) 276 return (-1); 277 278 ui = xcalloc(1, sizeof *ui); 279 ui->index = utf8_next_index++; 280 RB_INSERT(utf8_index_tree, &utf8_index_tree, ui); 281 282 memcpy(ui->data, data, size); 283 ui->size = size; 284 RB_INSERT(utf8_data_tree, &utf8_data_tree, ui); 285 286 *index = ui->index; 287 log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index); 288 return (0); 289 } 290 291 static int 292 utf8_table_cmp(const void *vp1, const void *vp2) 293 { 294 const wchar_t *wc1 = vp1, *wc2 = vp2; 295 296 if (*wc1 < *wc2) 297 return (-1); 298 if (*wc1 > *wc2) 299 return (1); 300 return (0); 301 } 302 303 /* Check if character in table. */ 304 int 305 utf8_in_table(wchar_t find, const wchar_t *table, u_int count) 306 { 307 wchar_t *found; 308 309 found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp); 310 return (found != NULL); 311 } 312 313 /* Get UTF-8 character from data. */ 314 enum utf8_state 315 utf8_from_data(const struct utf8_data *ud, utf8_char *uc) 316 { 317 u_int index; 318 319 if (ud->width > 2) 320 fatalx("invalid UTF-8 width: %u", ud->width); 321 322 if (ud->size > UTF8_SIZE) 323 goto fail; 324 if (ud->size <= 3) { 325 index = (((utf8_char)ud->data[2] << 16)| 326 ((utf8_char)ud->data[1] << 8)| 327 ((utf8_char)ud->data[0])); 328 } else if (utf8_put_item(ud->data, ud->size, &index) != 0) 329 goto fail; 330 *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index; 331 log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size, 332 (int)ud->size, ud->data, *uc); 333 return (UTF8_DONE); 334 335 fail: 336 if (ud->width == 0) 337 *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0); 338 else if (ud->width == 1) 339 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20; 340 else 341 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020; 342 return (UTF8_ERROR); 343 } 344 345 /* Get UTF-8 data from character. */ 346 void 347 utf8_to_data(utf8_char uc, struct utf8_data *ud) 348 { 349 struct utf8_item *ui; 350 u_int index; 351 352 memset(ud, 0, sizeof *ud); 353 ud->size = ud->have = UTF8_GET_SIZE(uc); 354 ud->width = UTF8_GET_WIDTH(uc); 355 356 if (ud->size <= 3) { 357 ud->data[2] = (uc >> 16); 358 ud->data[1] = ((uc >> 8) & 0xff); 359 ud->data[0] = (uc & 0xff); 360 } else { 361 index = (uc & 0xffffff); 362 if ((ui = utf8_item_by_index(index)) == NULL) 363 memset(ud->data, ' ', ud->size); 364 else 365 memcpy(ud->data, ui->data, ud->size); 366 } 367 368 log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size, 369 (int)ud->size, ud->data); 370 } 371 372 /* Get UTF-8 character from a single ASCII character. */ 373 u_int 374 utf8_build_one(u_char ch) 375 { 376 return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch); 377 } 378 379 /* Set a single character. */ 380 void 381 utf8_set(struct utf8_data *ud, u_char ch) 382 { 383 static const struct utf8_data empty = { { 0 }, 1, 1, 1 }; 384 385 memcpy(ud, &empty, sizeof *ud); 386 *ud->data = ch; 387 } 388 389 /* Copy UTF-8 character. */ 390 void 391 utf8_copy(struct utf8_data *to, const struct utf8_data *from) 392 { 393 u_int i; 394 395 memcpy(to, from, sizeof *to); 396 397 for (i = to->size; i < sizeof to->data; i++) 398 to->data[i] = '\0'; 399 } 400 401 /* Get width of Unicode character. */ 402 static enum utf8_state 403 utf8_width(struct utf8_data *ud, int *width) 404 { 405 wchar_t wc; 406 407 if (utf8_towc(ud, &wc) != UTF8_DONE) 408 return (UTF8_ERROR); 409 if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) { 410 *width = 2; 411 return (UTF8_DONE); 412 } 413 #ifdef HAVE_UTF8PROC 414 *width = utf8proc_wcwidth(wc); 415 log_debug("utf8proc_wcwidth(%05X) returned %d", (u_int)wc, *width); 416 #else 417 *width = wcwidth(wc); 418 log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width); 419 if (*width < 0) { 420 /* 421 * C1 control characters are nonprintable, so they are always 422 * zero width. 423 */ 424 *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1; 425 } 426 #endif 427 if (*width >= 0 && *width <= 0xff) 428 return (UTF8_DONE); 429 return (UTF8_ERROR); 430 } 431 432 /* Convert UTF-8 character to wide character. */ 433 enum utf8_state 434 utf8_towc(const struct utf8_data *ud, wchar_t *wc) 435 { 436 #ifdef HAVE_UTF8PROC 437 switch (utf8proc_mbtowc(wc, ud->data, ud->size)) { 438 #else 439 switch (mbtowc(wc, __UNCONST(ud->data), ud->size)) { 440 #endif 441 case -1: 442 log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, 443 errno); 444 mbtowc(NULL, NULL, MB_CUR_MAX); 445 return (UTF8_ERROR); 446 case 0: 447 return (UTF8_ERROR); 448 } 449 log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc); 450 return (UTF8_DONE); 451 } 452 453 /* Convert wide character to UTF-8 character. */ 454 enum utf8_state 455 utf8_fromwc(wchar_t wc, struct utf8_data *ud) 456 { 457 int size, width; 458 459 #ifdef HAVE_UTF8PROC 460 size = utf8proc_wctomb(ud->data, wc); 461 #else 462 size = wctomb((char *)ud->data, wc); 463 #endif 464 if (size < 0) { 465 log_debug("UTF-8 %d, wctomb() %d", wc, errno); 466 wctomb(NULL, 0); 467 return (UTF8_ERROR); 468 } 469 if (size == 0) 470 return (UTF8_ERROR); 471 ud->size = ud->have = size; 472 if (utf8_width(ud, &width) == UTF8_DONE) { 473 ud->width = width; 474 return (UTF8_DONE); 475 } 476 return (UTF8_ERROR); 477 } 478 479 /* 480 * Open UTF-8 sequence. 481 * 482 * 11000010-11011111 C2-DF start of 2-byte sequence 483 * 11100000-11101111 E0-EF start of 3-byte sequence 484 * 11110000-11110100 F0-F4 start of 4-byte sequence 485 */ 486 enum utf8_state 487 utf8_open(struct utf8_data *ud, u_char ch) 488 { 489 memset(ud, 0, sizeof *ud); 490 if (ch >= 0xc2 && ch <= 0xdf) 491 ud->size = 2; 492 else if (ch >= 0xe0 && ch <= 0xef) 493 ud->size = 3; 494 else if (ch >= 0xf0 && ch <= 0xf4) 495 ud->size = 4; 496 else 497 return (UTF8_ERROR); 498 utf8_append(ud, ch); 499 return (UTF8_MORE); 500 } 501 502 /* Append character to UTF-8, closing if finished. */ 503 enum utf8_state 504 utf8_append(struct utf8_data *ud, u_char ch) 505 { 506 int width; 507 508 if (ud->have >= ud->size) 509 fatalx("UTF-8 character overflow"); 510 if (ud->size > sizeof ud->data) 511 fatalx("UTF-8 character size too large"); 512 513 if (ud->have != 0 && (ch & 0xc0) != 0x80) 514 ud->width = 0xff; 515 516 ud->data[ud->have++] = ch; 517 if (ud->have != ud->size) 518 return (UTF8_MORE); 519 520 if (ud->width == 0xff) 521 return (UTF8_ERROR); 522 if (utf8_width(ud, &width) != UTF8_DONE) 523 return (UTF8_ERROR); 524 ud->width = width; 525 526 return (UTF8_DONE); 527 } 528 529 /* 530 * Encode len characters from src into dst, which is guaranteed to have four 531 * bytes available for each character from src (for \abc or UTF-8) plus space 532 * for \0. 533 */ 534 int 535 utf8_strvis(char *dst, const char *src, size_t len, int flag) 536 { 537 struct utf8_data ud; 538 const char *start = dst, *end = src + len; 539 enum utf8_state more; 540 size_t i; 541 542 while (src < end) { 543 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) { 544 while (++src < end && more == UTF8_MORE) 545 more = utf8_append(&ud, *src); 546 if (more == UTF8_DONE) { 547 /* UTF-8 character finished. */ 548 for (i = 0; i < ud.size; i++) 549 *dst++ = ud.data[i]; 550 continue; 551 } 552 /* Not a complete, valid UTF-8 character. */ 553 src -= ud.have; 554 } 555 if ((flag & VIS_DQ) && src[0] == '$' && src < end - 1) { 556 if (isalpha((u_char)src[1]) || 557 src[1] == '_' || 558 src[1] == '{') 559 *dst++ = '\\'; 560 *dst++ = '$'; 561 } else if (src < end - 1) 562 dst = vis(dst, src[0], flag, src[1]); 563 else if (src < end) 564 dst = vis(dst, src[0], flag, '\0'); 565 src++; 566 } 567 *dst = '\0'; 568 return (dst - start); 569 } 570 571 /* Same as utf8_strvis but allocate the buffer. */ 572 int 573 utf8_stravis(char **dst, const char *src, int flag) 574 { 575 char *buf; 576 int len; 577 578 buf = xreallocarray(NULL, 4, strlen(src) + 1); 579 len = utf8_strvis(buf, src, strlen(src), flag); 580 581 *dst = xrealloc(buf, len + 1); 582 return (len); 583 } 584 585 /* Same as utf8_strvis but allocate the buffer. */ 586 int 587 utf8_stravisx(char **dst, const char *src, size_t srclen, int flag) 588 { 589 char *buf; 590 int len; 591 592 buf = xreallocarray(NULL, 4, srclen + 1); 593 len = utf8_strvis(buf, src, srclen, flag); 594 595 *dst = xrealloc(buf, len + 1); 596 return (len); 597 } 598 599 /* Does this string contain anything that isn't valid UTF-8? */ 600 int 601 utf8_isvalid(const char *s) 602 { 603 struct utf8_data ud; 604 const char *end; 605 enum utf8_state more; 606 607 end = s + strlen(s); 608 while (s < end) { 609 if ((more = utf8_open(&ud, *s)) == UTF8_MORE) { 610 while (++s < end && more == UTF8_MORE) 611 more = utf8_append(&ud, *s); 612 if (more == UTF8_DONE) 613 continue; 614 return (0); 615 } 616 if (*s < 0x20 || *s > 0x7e) 617 return (0); 618 s++; 619 } 620 return (1); 621 } 622 623 /* 624 * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free 625 * the returned string. Anything not valid printable ASCII or UTF-8 is 626 * stripped. 627 */ 628 char * 629 utf8_sanitize(const char *src) 630 { 631 char *dst = NULL; 632 size_t n = 0; 633 enum utf8_state more; 634 struct utf8_data ud; 635 u_int i; 636 637 while (*src != '\0') { 638 dst = xreallocarray(dst, n + 1, sizeof *dst); 639 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) { 640 while (*++src != '\0' && more == UTF8_MORE) 641 more = utf8_append(&ud, *src); 642 if (more == UTF8_DONE) { 643 dst = xreallocarray(dst, n + ud.width, 644 sizeof *dst); 645 for (i = 0; i < ud.width; i++) 646 dst[n++] = '_'; 647 continue; 648 } 649 src -= ud.have; 650 } 651 if (*src > 0x1f && *src < 0x7f) 652 dst[n++] = *src; 653 else 654 dst[n++] = '_'; 655 src++; 656 } 657 dst = xreallocarray(dst, n + 1, sizeof *dst); 658 dst[n] = '\0'; 659 return (dst); 660 } 661 662 /* Get UTF-8 buffer length. */ 663 size_t 664 utf8_strlen(const struct utf8_data *s) 665 { 666 size_t i; 667 668 for (i = 0; s[i].size != 0; i++) 669 /* nothing */; 670 return (i); 671 } 672 673 /* Get UTF-8 string width. */ 674 u_int 675 utf8_strwidth(const struct utf8_data *s, ssize_t n) 676 { 677 ssize_t i; 678 u_int width = 0; 679 680 for (i = 0; s[i].size != 0; i++) { 681 if (n != -1 && n == i) 682 break; 683 width += s[i].width; 684 } 685 return (width); 686 } 687 688 /* 689 * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0. 690 * Caller frees. 691 */ 692 struct utf8_data * 693 utf8_fromcstr(const char *src) 694 { 695 struct utf8_data *dst = NULL; 696 size_t n = 0; 697 enum utf8_state more; 698 699 while (*src != '\0') { 700 dst = xreallocarray(dst, n + 1, sizeof *dst); 701 if ((more = utf8_open(&dst[n], *src)) == UTF8_MORE) { 702 while (*++src != '\0' && more == UTF8_MORE) 703 more = utf8_append(&dst[n], *src); 704 if (more == UTF8_DONE) { 705 n++; 706 continue; 707 } 708 src -= dst[n].have; 709 } 710 utf8_set(&dst[n], *src); 711 n++; 712 src++; 713 } 714 dst = xreallocarray(dst, n + 1, sizeof *dst); 715 dst[n].size = 0; 716 return (dst); 717 } 718 719 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */ 720 char * 721 utf8_tocstr(struct utf8_data *src) 722 { 723 char *dst = NULL; 724 size_t n = 0; 725 726 for(; src->size != 0; src++) { 727 dst = xreallocarray(dst, n + src->size, 1); 728 memcpy(dst + n, src->data, src->size); 729 n += src->size; 730 } 731 dst = xreallocarray(dst, n + 1, 1); 732 dst[n] = '\0'; 733 return (dst); 734 } 735 736 /* Get width of UTF-8 string. */ 737 u_int 738 utf8_cstrwidth(const char *s) 739 { 740 struct utf8_data tmp; 741 u_int width; 742 enum utf8_state more; 743 744 width = 0; 745 while (*s != '\0') { 746 if ((more = utf8_open(&tmp, *s)) == UTF8_MORE) { 747 while (*++s != '\0' && more == UTF8_MORE) 748 more = utf8_append(&tmp, *s); 749 if (more == UTF8_DONE) { 750 width += tmp.width; 751 continue; 752 } 753 s -= tmp.have; 754 } 755 if (*s > 0x1f && *s != 0x7f) 756 width++; 757 s++; 758 } 759 return (width); 760 } 761 762 /* Pad UTF-8 string to width on the left. Caller frees. */ 763 char * 764 utf8_padcstr(const char *s, u_int width) 765 { 766 size_t slen; 767 char *out; 768 u_int n, i; 769 770 n = utf8_cstrwidth(s); 771 if (n >= width) 772 return (xstrdup(s)); 773 774 slen = strlen(s); 775 out = xmalloc(slen + 1 + (width - n)); 776 memcpy(out, s, slen); 777 for (i = n; i < width; i++) 778 out[slen++] = ' '; 779 out[slen] = '\0'; 780 return (out); 781 } 782 783 /* Pad UTF-8 string to width on the right. Caller frees. */ 784 char * 785 utf8_rpadcstr(const char *s, u_int width) 786 { 787 size_t slen; 788 char *out; 789 u_int n, i; 790 791 n = utf8_cstrwidth(s); 792 if (n >= width) 793 return (xstrdup(s)); 794 795 slen = strlen(s); 796 out = xmalloc(slen + 1 + (width - n)); 797 for (i = 0; i < width - n; i++) 798 out[i] = ' '; 799 memcpy(out + i, s, slen); 800 out[i + slen] = '\0'; 801 return (out); 802 } 803 804 int 805 utf8_cstrhas(const char *s, const struct utf8_data *ud) 806 { 807 struct utf8_data *copy, *loop; 808 int found = 0; 809 810 copy = utf8_fromcstr(s); 811 for (loop = copy; loop->size != 0; loop++) { 812 if (loop->size != ud->size) 813 continue; 814 if (memcmp(loop->data, ud->data, loop->size) == 0) { 815 found = 1; 816 break; 817 } 818 } 819 free(copy); 820 821 return (found); 822 } 823