1 /* $OpenBSD: utf8.c,v 1.66 2024/07/12 11:21:18 nicm Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <vis.h> 26 27 #include "tmux.h" 28 29 static const wchar_t utf8_force_wide[] = { 30 0x0261D, 31 0x026F9, 32 0x0270A, 33 0x0270B, 34 0x0270C, 35 0x0270D, 36 0x1F1E6, 37 0x1F1E7, 38 0x1F1E8, 39 0x1F1E9, 40 0x1F1EA, 41 0x1F1EB, 42 0x1F1EC, 43 0x1F1ED, 44 0x1F1EE, 45 0x1F1EF, 46 0x1F1F0, 47 0x1F1F1, 48 0x1F1F2, 49 0x1F1F3, 50 0x1F1F4, 51 0x1F1F5, 52 0x1F1F6, 53 0x1F1F7, 54 0x1F1F8, 55 0x1F1F9, 56 0x1F1FA, 57 0x1F1FB, 58 0x1F1FC, 59 0x1F1FD, 60 0x1F1FE, 61 0x1F1FF, 62 0x1F385, 63 0x1F3C2, 64 0x1F3C3, 65 0x1F3C4, 66 0x1F3C7, 67 0x1F3CA, 68 0x1F3CB, 69 0x1F3CC, 70 0x1F3FB, 71 0x1F3FC, 72 0x1F3FD, 73 0x1F3FE, 74 0x1F3FF, 75 0x1F442, 76 0x1F443, 77 0x1F446, 78 0x1F447, 79 0x1F448, 80 0x1F449, 81 0x1F44A, 82 0x1F44B, 83 0x1F44C, 84 0x1F44D, 85 0x1F44E, 86 0x1F44F, 87 0x1F450, 88 0x1F466, 89 0x1F467, 90 0x1F468, 91 0x1F469, 92 0x1F46B, 93 0x1F46C, 94 0x1F46D, 95 0x1F46E, 96 0x1F470, 97 0x1F471, 98 0x1F472, 99 0x1F473, 100 0x1F474, 101 0x1F475, 102 0x1F476, 103 0x1F477, 104 0x1F478, 105 0x1F47C, 106 0x1F481, 107 0x1F482, 108 0x1F483, 109 0x1F485, 110 0x1F486, 111 0x1F487, 112 0x1F48F, 113 0x1F491, 114 0x1F4AA, 115 0x1F574, 116 0x1F575, 117 0x1F57A, 118 0x1F590, 119 0x1F595, 120 0x1F596, 121 0x1F645, 122 0x1F646, 123 0x1F647, 124 0x1F64B, 125 0x1F64C, 126 0x1F64D, 127 0x1F64E, 128 0x1F64F, 129 0x1F6A3, 130 0x1F6B4, 131 0x1F6B5, 132 0x1F6B6, 133 0x1F6C0, 134 0x1F6CC, 135 0x1F90C, 136 0x1F90F, 137 0x1F918, 138 0x1F919, 139 0x1F91A, 140 0x1F91B, 141 0x1F91C, 142 0x1F91D, 143 0x1F91E, 144 0x1F91F, 145 0x1F926, 146 0x1F930, 147 0x1F931, 148 0x1F932, 149 0x1F933, 150 0x1F934, 151 0x1F935, 152 0x1F936, 153 0x1F937, 154 0x1F938, 155 0x1F939, 156 0x1F93D, 157 0x1F93E, 158 0x1F977, 159 0x1F9B5, 160 0x1F9B6, 161 0x1F9B8, 162 0x1F9B9, 163 0x1F9BB, 164 0x1F9CD, 165 0x1F9CE, 166 0x1F9CF, 167 0x1F9D1, 168 0x1F9D2, 169 0x1F9D3, 170 0x1F9D4, 171 0x1F9D5, 172 0x1F9D6, 173 0x1F9D7, 174 0x1F9D8, 175 0x1F9D9, 176 0x1F9DA, 177 0x1F9DB, 178 0x1F9DC, 179 0x1F9DD, 180 0x1FAC3, 181 0x1FAC4, 182 0x1FAC5, 183 0x1FAF0, 184 0x1FAF1, 185 0x1FAF2, 186 0x1FAF3, 187 0x1FAF4, 188 0x1FAF5, 189 0x1FAF6, 190 0x1FAF7, 191 0x1FAF8 192 }; 193 194 struct utf8_item { 195 RB_ENTRY(utf8_item) index_entry; 196 u_int index; 197 198 RB_ENTRY(utf8_item) data_entry; 199 char data[UTF8_SIZE]; 200 u_char size; 201 }; 202 203 static int 204 utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2) 205 { 206 if (ui1->size < ui2->size) 207 return (-1); 208 if (ui1->size > ui2->size) 209 return (1); 210 return (memcmp(ui1->data, ui2->data, ui1->size)); 211 } 212 RB_HEAD(utf8_data_tree, utf8_item); 213 RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp); 214 static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree); 215 216 static int 217 utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2) 218 { 219 if (ui1->index < ui2->index) 220 return (-1); 221 if (ui1->index > ui2->index) 222 return (1); 223 return (0); 224 } 225 RB_HEAD(utf8_index_tree, utf8_item); 226 RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp); 227 static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree); 228 229 static u_int utf8_next_index; 230 231 #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f) 232 #define UTF8_GET_WIDTH(uc) (((uc) >> 29) - 1) 233 234 #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24) 235 #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29) 236 237 /* Get a UTF-8 item from data. */ 238 static struct utf8_item * 239 utf8_item_by_data(const u_char *data, size_t size) 240 { 241 struct utf8_item ui; 242 243 memcpy(ui.data, data, size); 244 ui.size = size; 245 246 return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui)); 247 } 248 249 /* Get a UTF-8 item from data. */ 250 static struct utf8_item * 251 utf8_item_by_index(u_int index) 252 { 253 struct utf8_item ui; 254 255 ui.index = index; 256 257 return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui)); 258 } 259 260 /* Add a UTF-8 item. */ 261 static int 262 utf8_put_item(const u_char *data, size_t size, u_int *index) 263 { 264 struct utf8_item *ui; 265 266 ui = utf8_item_by_data(data, size); 267 if (ui != NULL) { 268 *index = ui->index; 269 log_debug("%s: found %.*s = %u", __func__, (int)size, data, 270 *index); 271 return (0); 272 } 273 274 if (utf8_next_index == 0xffffff + 1) 275 return (-1); 276 277 ui = xcalloc(1, sizeof *ui); 278 ui->index = utf8_next_index++; 279 RB_INSERT(utf8_index_tree, &utf8_index_tree, ui); 280 281 memcpy(ui->data, data, size); 282 ui->size = size; 283 RB_INSERT(utf8_data_tree, &utf8_data_tree, ui); 284 285 *index = ui->index; 286 log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index); 287 return (0); 288 } 289 290 static int 291 utf8_table_cmp(const void *vp1, const void *vp2) 292 { 293 const wchar_t *wc1 = vp1, *wc2 = vp2; 294 295 if (*wc1 < *wc2) 296 return (-1); 297 if (*wc1 > *wc2) 298 return (1); 299 return (0); 300 } 301 302 /* Check if character in table. */ 303 int 304 utf8_in_table(wchar_t find, const wchar_t *table, u_int count) 305 { 306 wchar_t *found; 307 308 found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp); 309 return (found != NULL); 310 } 311 312 /* Get UTF-8 character from data. */ 313 enum utf8_state 314 utf8_from_data(const struct utf8_data *ud, utf8_char *uc) 315 { 316 u_int index; 317 318 if (ud->width > 2) 319 fatalx("invalid UTF-8 width: %u", ud->width); 320 321 if (ud->size > UTF8_SIZE) 322 goto fail; 323 if (ud->size <= 3) { 324 index = (((utf8_char)ud->data[2] << 16)| 325 ((utf8_char)ud->data[1] << 8)| 326 ((utf8_char)ud->data[0])); 327 } else if (utf8_put_item(ud->data, ud->size, &index) != 0) 328 goto fail; 329 *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index; 330 log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size, 331 (int)ud->size, ud->data, *uc); 332 return (UTF8_DONE); 333 334 fail: 335 if (ud->width == 0) 336 *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0); 337 else if (ud->width == 1) 338 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20; 339 else 340 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020; 341 return (UTF8_ERROR); 342 } 343 344 /* Get UTF-8 data from character. */ 345 void 346 utf8_to_data(utf8_char uc, struct utf8_data *ud) 347 { 348 struct utf8_item *ui; 349 u_int index; 350 351 memset(ud, 0, sizeof *ud); 352 ud->size = ud->have = UTF8_GET_SIZE(uc); 353 ud->width = UTF8_GET_WIDTH(uc); 354 355 if (ud->size <= 3) { 356 ud->data[2] = (uc >> 16); 357 ud->data[1] = ((uc >> 8) & 0xff); 358 ud->data[0] = (uc & 0xff); 359 } else { 360 index = (uc & 0xffffff); 361 if ((ui = utf8_item_by_index(index)) == NULL) 362 memset(ud->data, ' ', ud->size); 363 else 364 memcpy(ud->data, ui->data, ud->size); 365 } 366 367 log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size, 368 (int)ud->size, ud->data); 369 } 370 371 /* Get UTF-8 character from a single ASCII character. */ 372 u_int 373 utf8_build_one(u_char ch) 374 { 375 return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch); 376 } 377 378 /* Set a single character. */ 379 void 380 utf8_set(struct utf8_data *ud, u_char ch) 381 { 382 static const struct utf8_data empty = { { 0 }, 1, 1, 1 }; 383 384 memcpy(ud, &empty, sizeof *ud); 385 *ud->data = ch; 386 } 387 388 /* Copy UTF-8 character. */ 389 void 390 utf8_copy(struct utf8_data *to, const struct utf8_data *from) 391 { 392 u_int i; 393 394 memcpy(to, from, sizeof *to); 395 396 for (i = to->size; i < sizeof to->data; i++) 397 to->data[i] = '\0'; 398 } 399 400 /* Get width of Unicode character. */ 401 static enum utf8_state 402 utf8_width(struct utf8_data *ud, int *width) 403 { 404 wchar_t wc; 405 406 if (utf8_towc(ud, &wc) != UTF8_DONE) 407 return (UTF8_ERROR); 408 if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) { 409 *width = 2; 410 return (UTF8_DONE); 411 } 412 413 *width = wcwidth(wc); 414 log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width); 415 if (*width < 0) { 416 /* 417 * C1 control characters are nonprintable, so they are always 418 * zero width. 419 */ 420 *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1; 421 } 422 if (*width >= 0 && *width <= 0xff) 423 return (UTF8_DONE); 424 return (UTF8_ERROR); 425 } 426 427 /* Convert UTF-8 character to wide character. */ 428 enum utf8_state 429 utf8_towc(const struct utf8_data *ud, wchar_t *wc) 430 { 431 switch (mbtowc(wc, ud->data, ud->size)) { 432 case -1: 433 log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data, 434 errno); 435 mbtowc(NULL, NULL, MB_CUR_MAX); 436 return (UTF8_ERROR); 437 case 0: 438 return (UTF8_ERROR); 439 } 440 log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc); 441 return (UTF8_DONE); 442 } 443 444 /* Convert wide character to UTF-8 character. */ 445 enum utf8_state 446 utf8_fromwc(wchar_t wc, struct utf8_data *ud) 447 { 448 int size, width; 449 450 size = wctomb(ud->data, wc); 451 if (size < 0) { 452 log_debug("UTF-8 %d, wctomb() %d", wc, errno); 453 wctomb(NULL, 0); 454 return (UTF8_ERROR); 455 } 456 if (size == 0) 457 return (UTF8_ERROR); 458 ud->size = ud->have = size; 459 if (utf8_width(ud, &width) == UTF8_DONE) { 460 ud->width = width; 461 return (UTF8_DONE); 462 } 463 return (UTF8_ERROR); 464 } 465 466 /* 467 * Open UTF-8 sequence. 468 * 469 * 11000010-11011111 C2-DF start of 2-byte sequence 470 * 11100000-11101111 E0-EF start of 3-byte sequence 471 * 11110000-11110100 F0-F4 start of 4-byte sequence 472 */ 473 enum utf8_state 474 utf8_open(struct utf8_data *ud, u_char ch) 475 { 476 memset(ud, 0, sizeof *ud); 477 if (ch >= 0xc2 && ch <= 0xdf) 478 ud->size = 2; 479 else if (ch >= 0xe0 && ch <= 0xef) 480 ud->size = 3; 481 else if (ch >= 0xf0 && ch <= 0xf4) 482 ud->size = 4; 483 else 484 return (UTF8_ERROR); 485 utf8_append(ud, ch); 486 return (UTF8_MORE); 487 } 488 489 /* Append character to UTF-8, closing if finished. */ 490 enum utf8_state 491 utf8_append(struct utf8_data *ud, u_char ch) 492 { 493 int width; 494 495 if (ud->have >= ud->size) 496 fatalx("UTF-8 character overflow"); 497 if (ud->size > sizeof ud->data) 498 fatalx("UTF-8 character size too large"); 499 500 if (ud->have != 0 && (ch & 0xc0) != 0x80) 501 ud->width = 0xff; 502 503 ud->data[ud->have++] = ch; 504 if (ud->have != ud->size) 505 return (UTF8_MORE); 506 507 if (ud->width == 0xff) 508 return (UTF8_ERROR); 509 if (utf8_width(ud, &width) != UTF8_DONE) 510 return (UTF8_ERROR); 511 ud->width = width; 512 513 return (UTF8_DONE); 514 } 515 516 /* 517 * Encode len characters from src into dst, which is guaranteed to have four 518 * bytes available for each character from src (for \abc or UTF-8) plus space 519 * for \0. 520 */ 521 int 522 utf8_strvis(char *dst, const char *src, size_t len, int flag) 523 { 524 struct utf8_data ud; 525 const char *start = dst, *end = src + len; 526 enum utf8_state more; 527 size_t i; 528 529 while (src < end) { 530 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) { 531 while (++src < end && more == UTF8_MORE) 532 more = utf8_append(&ud, *src); 533 if (more == UTF8_DONE) { 534 /* UTF-8 character finished. */ 535 for (i = 0; i < ud.size; i++) 536 *dst++ = ud.data[i]; 537 continue; 538 } 539 /* Not a complete, valid UTF-8 character. */ 540 src -= ud.have; 541 } 542 if ((flag & VIS_DQ) && src[0] == '$' && src < end - 1) { 543 if (isalpha((u_char)src[1]) || 544 src[1] == '_' || 545 src[1] == '{') 546 *dst++ = '\\'; 547 *dst++ = '$'; 548 } else if (src < end - 1) 549 dst = vis(dst, src[0], flag, src[1]); 550 else if (src < end) 551 dst = vis(dst, src[0], flag, '\0'); 552 src++; 553 } 554 *dst = '\0'; 555 return (dst - start); 556 } 557 558 /* Same as utf8_strvis but allocate the buffer. */ 559 int 560 utf8_stravis(char **dst, const char *src, int flag) 561 { 562 char *buf; 563 int len; 564 565 buf = xreallocarray(NULL, 4, strlen(src) + 1); 566 len = utf8_strvis(buf, src, strlen(src), flag); 567 568 *dst = xrealloc(buf, len + 1); 569 return (len); 570 } 571 572 /* Same as utf8_strvis but allocate the buffer. */ 573 int 574 utf8_stravisx(char **dst, const char *src, size_t srclen, int flag) 575 { 576 char *buf; 577 int len; 578 579 buf = xreallocarray(NULL, 4, srclen + 1); 580 len = utf8_strvis(buf, src, srclen, flag); 581 582 *dst = xrealloc(buf, len + 1); 583 return (len); 584 } 585 586 /* Does this string contain anything that isn't valid UTF-8? */ 587 int 588 utf8_isvalid(const char *s) 589 { 590 struct utf8_data ud; 591 const char *end; 592 enum utf8_state more; 593 594 end = s + strlen(s); 595 while (s < end) { 596 if ((more = utf8_open(&ud, *s)) == UTF8_MORE) { 597 while (++s < end && more == UTF8_MORE) 598 more = utf8_append(&ud, *s); 599 if (more == UTF8_DONE) 600 continue; 601 return (0); 602 } 603 if (*s < 0x20 || *s > 0x7e) 604 return (0); 605 s++; 606 } 607 return (1); 608 } 609 610 /* 611 * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free 612 * the returned string. Anything not valid printable ASCII or UTF-8 is 613 * stripped. 614 */ 615 char * 616 utf8_sanitize(const char *src) 617 { 618 char *dst = NULL; 619 size_t n = 0; 620 enum utf8_state more; 621 struct utf8_data ud; 622 u_int i; 623 624 while (*src != '\0') { 625 dst = xreallocarray(dst, n + 1, sizeof *dst); 626 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) { 627 while (*++src != '\0' && more == UTF8_MORE) 628 more = utf8_append(&ud, *src); 629 if (more == UTF8_DONE) { 630 dst = xreallocarray(dst, n + ud.width, 631 sizeof *dst); 632 for (i = 0; i < ud.width; i++) 633 dst[n++] = '_'; 634 continue; 635 } 636 src -= ud.have; 637 } 638 if (*src > 0x1f && *src < 0x7f) 639 dst[n++] = *src; 640 else 641 dst[n++] = '_'; 642 src++; 643 } 644 dst = xreallocarray(dst, n + 1, sizeof *dst); 645 dst[n] = '\0'; 646 return (dst); 647 } 648 649 /* Get UTF-8 buffer length. */ 650 size_t 651 utf8_strlen(const struct utf8_data *s) 652 { 653 size_t i; 654 655 for (i = 0; s[i].size != 0; i++) 656 /* nothing */; 657 return (i); 658 } 659 660 /* Get UTF-8 string width. */ 661 u_int 662 utf8_strwidth(const struct utf8_data *s, ssize_t n) 663 { 664 ssize_t i; 665 u_int width = 0; 666 667 for (i = 0; s[i].size != 0; i++) { 668 if (n != -1 && n == i) 669 break; 670 width += s[i].width; 671 } 672 return (width); 673 } 674 675 /* 676 * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0. 677 * Caller frees. 678 */ 679 struct utf8_data * 680 utf8_fromcstr(const char *src) 681 { 682 struct utf8_data *dst = NULL; 683 size_t n = 0; 684 enum utf8_state more; 685 686 while (*src != '\0') { 687 dst = xreallocarray(dst, n + 1, sizeof *dst); 688 if ((more = utf8_open(&dst[n], *src)) == UTF8_MORE) { 689 while (*++src != '\0' && more == UTF8_MORE) 690 more = utf8_append(&dst[n], *src); 691 if (more == UTF8_DONE) { 692 n++; 693 continue; 694 } 695 src -= dst[n].have; 696 } 697 utf8_set(&dst[n], *src); 698 n++; 699 src++; 700 } 701 dst = xreallocarray(dst, n + 1, sizeof *dst); 702 dst[n].size = 0; 703 return (dst); 704 } 705 706 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */ 707 char * 708 utf8_tocstr(struct utf8_data *src) 709 { 710 char *dst = NULL; 711 size_t n = 0; 712 713 for(; src->size != 0; src++) { 714 dst = xreallocarray(dst, n + src->size, 1); 715 memcpy(dst + n, src->data, src->size); 716 n += src->size; 717 } 718 dst = xreallocarray(dst, n + 1, 1); 719 dst[n] = '\0'; 720 return (dst); 721 } 722 723 /* Get width of UTF-8 string. */ 724 u_int 725 utf8_cstrwidth(const char *s) 726 { 727 struct utf8_data tmp; 728 u_int width; 729 enum utf8_state more; 730 731 width = 0; 732 while (*s != '\0') { 733 if ((more = utf8_open(&tmp, *s)) == UTF8_MORE) { 734 while (*++s != '\0' && more == UTF8_MORE) 735 more = utf8_append(&tmp, *s); 736 if (more == UTF8_DONE) { 737 width += tmp.width; 738 continue; 739 } 740 s -= tmp.have; 741 } 742 if (*s > 0x1f && *s != 0x7f) 743 width++; 744 s++; 745 } 746 return (width); 747 } 748 749 /* Pad UTF-8 string to width on the left. Caller frees. */ 750 char * 751 utf8_padcstr(const char *s, u_int width) 752 { 753 size_t slen; 754 char *out; 755 u_int n, i; 756 757 n = utf8_cstrwidth(s); 758 if (n >= width) 759 return (xstrdup(s)); 760 761 slen = strlen(s); 762 out = xmalloc(slen + 1 + (width - n)); 763 memcpy(out, s, slen); 764 for (i = n; i < width; i++) 765 out[slen++] = ' '; 766 out[slen] = '\0'; 767 return (out); 768 } 769 770 /* Pad UTF-8 string to width on the right. Caller frees. */ 771 char * 772 utf8_rpadcstr(const char *s, u_int width) 773 { 774 size_t slen; 775 char *out; 776 u_int n, i; 777 778 n = utf8_cstrwidth(s); 779 if (n >= width) 780 return (xstrdup(s)); 781 782 slen = strlen(s); 783 out = xmalloc(slen + 1 + (width - n)); 784 for (i = 0; i < width - n; i++) 785 out[i] = ' '; 786 memcpy(out + i, s, slen); 787 out[i + slen] = '\0'; 788 return (out); 789 } 790 791 int 792 utf8_cstrhas(const char *s, const struct utf8_data *ud) 793 { 794 struct utf8_data *copy, *loop; 795 int found = 0; 796 797 copy = utf8_fromcstr(s); 798 for (loop = copy; loop->size != 0; loop++) { 799 if (loop->size != ud->size) 800 continue; 801 if (memcmp(loop->data, ud->data, loop->size) == 0) { 802 found = 1; 803 break; 804 } 805 } 806 free(copy); 807 808 return (found); 809 } 810