xref: /netbsd-src/external/bsd/tmux/dist/utf8.c (revision ccd9df534e375a4366c5b55f23782053c7a98d82)
1 /* $OpenBSD$ */
2 
3 /*
4  * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26 
27 #include "tmux.h"
28 
29 static const wchar_t utf8_force_wide[] = {
30 	0x0261D,
31 	0x026F9,
32 	0x0270A,
33 	0x0270B,
34 	0x0270C,
35 	0x0270D,
36 	0x1F1E6,
37 	0x1F1E7,
38 	0x1F1E8,
39 	0x1F1E9,
40 	0x1F1EA,
41 	0x1F1EB,
42 	0x1F1EC,
43 	0x1F1ED,
44 	0x1F1EE,
45 	0x1F1EF,
46 	0x1F1F0,
47 	0x1F1F1,
48 	0x1F1F2,
49 	0x1F1F3,
50 	0x1F1F4,
51 	0x1F1F5,
52 	0x1F1F6,
53 	0x1F1F7,
54 	0x1F1F8,
55 	0x1F1F9,
56 	0x1F1FA,
57 	0x1F1FB,
58 	0x1F1FC,
59 	0x1F1FD,
60 	0x1F1FE,
61 	0x1F1FF,
62 	0x1F385,
63 	0x1F3C2,
64 	0x1F3C3,
65 	0x1F3C4,
66 	0x1F3C7,
67 	0x1F3CA,
68 	0x1F3CB,
69 	0x1F3CC,
70 	0x1F3FB,
71 	0x1F3FC,
72 	0x1F3FD,
73 	0x1F3FE,
74 	0x1F3FF,
75 	0x1F442,
76 	0x1F443,
77 	0x1F446,
78 	0x1F447,
79 	0x1F448,
80 	0x1F449,
81 	0x1F44A,
82 	0x1F44B,
83 	0x1F44C,
84 	0x1F44D,
85 	0x1F44E,
86 	0x1F44F,
87 	0x1F450,
88 	0x1F466,
89 	0x1F467,
90 	0x1F468,
91 	0x1F469,
92 	0x1F46B,
93 	0x1F46C,
94 	0x1F46D,
95 	0x1F46E,
96 	0x1F470,
97 	0x1F471,
98 	0x1F472,
99 	0x1F473,
100 	0x1F474,
101 	0x1F475,
102 	0x1F476,
103 	0x1F477,
104 	0x1F478,
105 	0x1F47C,
106 	0x1F481,
107 	0x1F482,
108 	0x1F483,
109 	0x1F485,
110 	0x1F486,
111 	0x1F487,
112 	0x1F48F,
113 	0x1F491,
114 	0x1F4AA,
115 	0x1F574,
116 	0x1F575,
117 	0x1F57A,
118 	0x1F590,
119 	0x1F595,
120 	0x1F596,
121 	0x1F645,
122 	0x1F646,
123 	0x1F647,
124 	0x1F64B,
125 	0x1F64C,
126 	0x1F64D,
127 	0x1F64E,
128 	0x1F64F,
129 	0x1F6A3,
130 	0x1F6B4,
131 	0x1F6B5,
132 	0x1F6B6,
133 	0x1F6C0,
134 	0x1F6CC,
135 	0x1F90C,
136 	0x1F90F,
137 	0x1F918,
138 	0x1F919,
139 	0x1F91A,
140 	0x1F91B,
141 	0x1F91C,
142 	0x1F91D,
143 	0x1F91E,
144 	0x1F91F,
145 	0x1F926,
146 	0x1F930,
147 	0x1F931,
148 	0x1F932,
149 	0x1F933,
150 	0x1F934,
151 	0x1F935,
152 	0x1F936,
153 	0x1F937,
154 	0x1F938,
155 	0x1F939,
156 	0x1F93D,
157 	0x1F93E,
158 	0x1F977,
159 	0x1F9B5,
160 	0x1F9B6,
161 	0x1F9B8,
162 	0x1F9B9,
163 	0x1F9BB,
164 	0x1F9CD,
165 	0x1F9CE,
166 	0x1F9CF,
167 	0x1F9D1,
168 	0x1F9D2,
169 	0x1F9D3,
170 	0x1F9D4,
171 	0x1F9D5,
172 	0x1F9D6,
173 	0x1F9D7,
174 	0x1F9D8,
175 	0x1F9D9,
176 	0x1F9DA,
177 	0x1F9DB,
178 	0x1F9DC,
179 	0x1F9DD,
180 	0x1FAC3,
181 	0x1FAC4,
182 	0x1FAC5,
183 	0x1FAF0,
184 	0x1FAF1,
185 	0x1FAF2,
186 	0x1FAF3,
187 	0x1FAF4,
188 	0x1FAF5,
189 	0x1FAF6,
190 	0x1FAF7,
191 	0x1FAF8
192 };
193 
194 struct utf8_item {
195 	RB_ENTRY(utf8_item)	index_entry;
196 	u_int			index;
197 
198 	RB_ENTRY(utf8_item)	data_entry;
199 	char			data[UTF8_SIZE];
200 	u_char			size;
201 };
202 
203 static int
204 utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
205 {
206 	if (ui1->size < ui2->size)
207 		return (-1);
208 	if (ui1->size > ui2->size)
209 		return (1);
210 	return (memcmp(ui1->data, ui2->data, ui1->size));
211 }
212 RB_HEAD(utf8_data_tree, utf8_item);
213 RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp);
214 static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree);
215 
216 static int
217 utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
218 {
219 	if (ui1->index < ui2->index)
220 		return (-1);
221 	if (ui1->index > ui2->index)
222 		return (1);
223 	return (0);
224 }
225 RB_HEAD(utf8_index_tree, utf8_item);
226 RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp);
227 static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree);
228 
229 static u_int utf8_next_index;
230 
231 #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
232 #define UTF8_GET_WIDTH(uc) (((uc) >> 29) - 1)
233 
234 #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
235 #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
236 
237 /* Get a UTF-8 item from data. */
238 static struct utf8_item *
239 utf8_item_by_data(const u_char *data, size_t size)
240 {
241 	struct utf8_item	ui;
242 
243 	memcpy(ui.data, data, size);
244 	ui.size = size;
245 
246 	return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui));
247 }
248 
249 /* Get a UTF-8 item from data. */
250 static struct utf8_item *
251 utf8_item_by_index(u_int index)
252 {
253 	struct utf8_item	ui;
254 
255 	ui.index = index;
256 
257 	return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui));
258 }
259 
260 /* Add a UTF-8 item. */
261 static int
262 utf8_put_item(const u_char *data, size_t size, u_int *index)
263 {
264 	struct utf8_item	*ui;
265 
266 	ui = utf8_item_by_data((const unsigned char *)data, size);
267 	if (ui != NULL) {
268 		*index = ui->index;
269 		log_debug("%s: found %.*s = %u", __func__, (int)size, data,
270 		    *index);
271 		return (0);
272 	}
273 
274 	if (utf8_next_index == 0xffffff + 1)
275 		return (-1);
276 
277 	ui = xcalloc(1, sizeof *ui);
278 	ui->index = utf8_next_index++;
279 	RB_INSERT(utf8_index_tree, &utf8_index_tree, ui);
280 
281 	memcpy(ui->data, data, size);
282 	ui->size = size;
283 	RB_INSERT(utf8_data_tree, &utf8_data_tree, ui);
284 
285 	*index = ui->index;
286 	log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index);
287 	return (0);
288 }
289 
290 static int
291 utf8_table_cmp(const void *vp1, const void *vp2)
292 {
293 	const wchar_t	*wc1 = vp1, *wc2 = vp2;
294 
295 	if (*wc1 < *wc2)
296 		return (-1);
297 	if (*wc1 > *wc2)
298 		return (1);
299 	return (0);
300 }
301 
302 /* Check if character in table. */
303 int
304 utf8_in_table(wchar_t find, const wchar_t *table, u_int count)
305 {
306 	wchar_t	*found;
307 
308 	found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp);
309 	return (found != NULL);
310 }
311 
312 /* Get UTF-8 character from data. */
313 enum utf8_state
314 utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
315 {
316 	u_int	index;
317 
318 	if (ud->width > 2)
319 		fatalx("invalid UTF-8 width: %u", ud->width);
320 
321 	if (ud->size > UTF8_SIZE)
322 		goto fail;
323 	if (ud->size <= 3) {
324 		index = (((utf8_char)ud->data[2] << 16)|
325 			  ((utf8_char)ud->data[1] << 8)|
326 			  ((utf8_char)ud->data[0]));
327 	} else if (utf8_put_item(ud->data, ud->size, &index) != 0)
328 		goto fail;
329 	*uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
330 	log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
331 	    (int)ud->size, ud->data, *uc);
332 	return (UTF8_DONE);
333 
334 fail:
335 	if (ud->width == 0)
336 		*uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);
337 	else if (ud->width == 1)
338 		*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;
339 	else
340 		*uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;
341 	return (UTF8_ERROR);
342 }
343 
344 /* Get UTF-8 data from character. */
345 void
346 utf8_to_data(utf8_char uc, struct utf8_data *ud)
347 {
348 	struct utf8_item	*ui;
349 	u_int			 index;
350 
351 	memset(ud, 0, sizeof *ud);
352 	ud->size = ud->have = UTF8_GET_SIZE(uc);
353 	ud->width = UTF8_GET_WIDTH(uc);
354 
355 	if (ud->size <= 3) {
356 		ud->data[2] = (uc >> 16);
357 		ud->data[1] = ((uc >> 8) & 0xff);
358 		ud->data[0] = (uc & 0xff);
359 	} else {
360 		index = (uc & 0xffffff);
361 		if ((ui = utf8_item_by_index(index)) == NULL)
362 			memset(ud->data, ' ', ud->size);
363 		else
364 			memcpy(ud->data, ui->data, ud->size);
365 	}
366 
367 	log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
368 	    (int)ud->size, ud->data);
369 }
370 
371 /* Get UTF-8 character from a single ASCII character. */
372 u_int
373 utf8_build_one(u_char ch)
374 {
375 	return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);
376 }
377 
378 /* Set a single character. */
379 void
380 utf8_set(struct utf8_data *ud, u_char ch)
381 {
382 	static const struct utf8_data empty = { { 0 }, 1, 1, 1 };
383 
384 	memcpy(ud, &empty, sizeof *ud);
385 	*ud->data = ch;
386 }
387 
388 /* Copy UTF-8 character. */
389 void
390 utf8_copy(struct utf8_data *to, const struct utf8_data *from)
391 {
392 	u_int	i;
393 
394 	memcpy(to, from, sizeof *to);
395 
396 	for (i = to->size; i < sizeof to->data; i++)
397 		to->data[i] = '\0';
398 }
399 
400 /* Get width of Unicode character. */
401 static enum utf8_state
402 utf8_width(struct utf8_data *ud, int *width)
403 {
404 	wchar_t	wc;
405 
406 	if (utf8_towc(ud, &wc) != UTF8_DONE)
407 		return (UTF8_ERROR);
408 	if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) {
409 		*width = 2;
410 		return (UTF8_DONE);
411 	}
412 #ifdef HAVE_UTF8PROC
413 	*width = utf8proc_wcwidth(wc);
414 	log_debug("utf8proc_wcwidth(%05X) returned %d", (u_int)wc, *width);
415 #else
416 	*width = wcwidth(wc);
417 	log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
418 	if (*width < 0) {
419 		/*
420 		 * C1 control characters are nonprintable, so they are always
421 		 * zero width.
422 		 */
423 		*width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1;
424 	}
425 #endif
426 	if (*width >= 0 && *width <= 0xff)
427 		return (UTF8_DONE);
428 	return (UTF8_ERROR);
429 }
430 
431 /* Convert UTF-8 character to wide character. */
432 enum utf8_state
433 utf8_towc(const struct utf8_data *ud, wchar_t *wc)
434 {
435 #ifdef HAVE_UTF8PROC
436 	switch (utf8proc_mbtowc(wc, ud->data, ud->size)) {
437 #else
438 	switch (mbtowc(wc, __UNCONST(ud->data), ud->size)) {
439 #endif
440 	case -1:
441 		log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
442 		    errno);
443 		mbtowc(NULL, NULL, MB_CUR_MAX);
444 		return (UTF8_ERROR);
445 	case 0:
446 		return (UTF8_ERROR);
447 	}
448 	log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc);
449 	return (UTF8_DONE);
450 }
451 
452 /*
453  * Open UTF-8 sequence.
454  *
455  * 11000010-11011111 C2-DF start of 2-byte sequence
456  * 11100000-11101111 E0-EF start of 3-byte sequence
457  * 11110000-11110100 F0-F4 start of 4-byte sequence
458  */
459 enum utf8_state
460 utf8_open(struct utf8_data *ud, u_char ch)
461 {
462 	memset(ud, 0, sizeof *ud);
463 	if (ch >= 0xc2 && ch <= 0xdf)
464 		ud->size = 2;
465 	else if (ch >= 0xe0 && ch <= 0xef)
466 		ud->size = 3;
467 	else if (ch >= 0xf0 && ch <= 0xf4)
468 		ud->size = 4;
469 	else
470 		return (UTF8_ERROR);
471 	utf8_append(ud, ch);
472 	return (UTF8_MORE);
473 }
474 
475 /* Append character to UTF-8, closing if finished. */
476 enum utf8_state
477 utf8_append(struct utf8_data *ud, u_char ch)
478 {
479 	int	width;
480 
481 	if (ud->have >= ud->size)
482 		fatalx("UTF-8 character overflow");
483 	if (ud->size > sizeof ud->data)
484 		fatalx("UTF-8 character size too large");
485 
486 	if (ud->have != 0 && (ch & 0xc0) != 0x80)
487 		ud->width = 0xff;
488 
489 	ud->data[ud->have++] = ch;
490 	if (ud->have != ud->size)
491 		return (UTF8_MORE);
492 
493 	if (ud->width == 0xff)
494 		return (UTF8_ERROR);
495 	if (utf8_width(ud, &width) != UTF8_DONE)
496 		return (UTF8_ERROR);
497 	ud->width = width;
498 
499 	return (UTF8_DONE);
500 }
501 
502 /*
503  * Encode len characters from src into dst, which is guaranteed to have four
504  * bytes available for each character from src (for \abc or UTF-8) plus space
505  * for \0.
506  */
507 int
508 utf8_strvis(char *dst, const char *src, size_t len, int flag)
509 {
510 	struct utf8_data	 ud;
511 	const char		*start = dst, *end = src + len;
512 	enum utf8_state		 more;
513 	size_t			 i;
514 
515 	while (src < end) {
516 		if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
517 			while (++src < end && more == UTF8_MORE)
518 				more = utf8_append(&ud, *src);
519 			if (more == UTF8_DONE) {
520 				/* UTF-8 character finished. */
521 				for (i = 0; i < ud.size; i++)
522 					*dst++ = ud.data[i];
523 				continue;
524 			}
525 			/* Not a complete, valid UTF-8 character. */
526 			src -= ud.have;
527 		}
528 		if (src[0] == '$' && src < end - 1) {
529 			if (isalpha((u_char)src[1]) ||
530 			    src[1] == '_' ||
531 			    src[1] == '{')
532 				*dst++ = '\\';
533 			*dst++ = '$';
534 		} else if (src < end - 1)
535 			dst = vis(dst, src[0], flag, src[1]);
536 		else if (src < end)
537 			dst = vis(dst, src[0], flag, '\0');
538 		src++;
539 	}
540 	*dst = '\0';
541 	return (dst - start);
542 }
543 
544 /* Same as utf8_strvis but allocate the buffer. */
545 int
546 utf8_stravis(char **dst, const char *src, int flag)
547 {
548 	char	*buf;
549 	int	 len;
550 
551 	buf = xreallocarray(NULL, 4, strlen(src) + 1);
552 	len = utf8_strvis(buf, src, strlen(src), flag);
553 
554 	*dst = xrealloc(buf, len + 1);
555 	return (len);
556 }
557 
558 /* Same as utf8_strvis but allocate the buffer. */
559 int
560 utf8_stravisx(char **dst, const char *src, size_t srclen, int flag)
561 {
562 	char	*buf;
563 	int	 len;
564 
565 	buf = xreallocarray(NULL, 4, srclen + 1);
566 	len = utf8_strvis(buf, src, srclen, flag);
567 
568 	*dst = xrealloc(buf, len + 1);
569 	return (len);
570 }
571 
572 /* Does this string contain anything that isn't valid UTF-8? */
573 int
574 utf8_isvalid(const char *s)
575 {
576 	struct utf8_data ud;
577 	const char	*end;
578 	enum utf8_state	 more;
579 
580 	end = s + strlen(s);
581 	while (s < end) {
582 		if ((more = utf8_open(&ud, *s)) == UTF8_MORE) {
583 			while (++s < end && more == UTF8_MORE)
584 				more = utf8_append(&ud, *s);
585 			if (more == UTF8_DONE)
586 				continue;
587 			return (0);
588 		}
589 		if (*s < 0x20 || *s > 0x7e)
590 			return (0);
591 		s++;
592 	}
593 	return (1);
594 }
595 
596 /*
597  * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free
598  * the returned string. Anything not valid printable ASCII or UTF-8 is
599  * stripped.
600  */
601 char *
602 utf8_sanitize(const char *src)
603 {
604 	char		*dst = NULL;
605 	size_t		 n = 0;
606 	enum utf8_state	 more;
607 	struct utf8_data ud;
608 	u_int		 i;
609 
610 	while (*src != '\0') {
611 		dst = xreallocarray(dst, n + 1, sizeof *dst);
612 		if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
613 			while (*++src != '\0' && more == UTF8_MORE)
614 				more = utf8_append(&ud, *src);
615 			if (more == UTF8_DONE) {
616 				dst = xreallocarray(dst, n + ud.width,
617 				    sizeof *dst);
618 				for (i = 0; i < ud.width; i++)
619 					dst[n++] = '_';
620 				continue;
621 			}
622 			src -= ud.have;
623 		}
624 		if (*src > 0x1f && *src < 0x7f)
625 			dst[n++] = *src;
626 		else
627 			dst[n++] = '_';
628 		src++;
629 	}
630 	dst = xreallocarray(dst, n + 1, sizeof *dst);
631 	dst[n] = '\0';
632 	return (dst);
633 }
634 
635 /* Get UTF-8 buffer length. */
636 size_t
637 utf8_strlen(const struct utf8_data *s)
638 {
639 	size_t	i;
640 
641 	for (i = 0; s[i].size != 0; i++)
642 		/* nothing */;
643 	return (i);
644 }
645 
646 /* Get UTF-8 string width. */
647 u_int
648 utf8_strwidth(const struct utf8_data *s, ssize_t n)
649 {
650 	ssize_t	i;
651 	u_int	width = 0;
652 
653 	for (i = 0; s[i].size != 0; i++) {
654 		if (n != -1 && n == i)
655 			break;
656 		width += s[i].width;
657 	}
658 	return (width);
659 }
660 
661 /*
662  * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0.
663  * Caller frees.
664  */
665 struct utf8_data *
666 utf8_fromcstr(const char *src)
667 {
668 	struct utf8_data	*dst = NULL;
669 	size_t			 n = 0;
670 	enum utf8_state		 more;
671 
672 	while (*src != '\0') {
673 		dst = xreallocarray(dst, n + 1, sizeof *dst);
674 		if ((more = utf8_open(&dst[n], *src)) == UTF8_MORE) {
675 			while (*++src != '\0' && more == UTF8_MORE)
676 				more = utf8_append(&dst[n], *src);
677 			if (more == UTF8_DONE) {
678 				n++;
679 				continue;
680 			}
681 			src -= dst[n].have;
682 		}
683 		utf8_set(&dst[n], *src);
684 		n++;
685 		src++;
686 	}
687 	dst = xreallocarray(dst, n + 1, sizeof *dst);
688 	dst[n].size = 0;
689 	return (dst);
690 }
691 
692 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */
693 char *
694 utf8_tocstr(struct utf8_data *src)
695 {
696 	char	*dst = NULL;
697 	size_t	 n = 0;
698 
699 	for(; src->size != 0; src++) {
700 		dst = xreallocarray(dst, n + src->size, 1);
701 		memcpy(dst + n, src->data, src->size);
702 		n += src->size;
703 	}
704 	dst = xreallocarray(dst, n + 1, 1);
705 	dst[n] = '\0';
706 	return (dst);
707 }
708 
709 /* Get width of UTF-8 string. */
710 u_int
711 utf8_cstrwidth(const char *s)
712 {
713 	struct utf8_data	tmp;
714 	u_int			width;
715 	enum utf8_state		more;
716 
717 	width = 0;
718 	while (*s != '\0') {
719 		if ((more = utf8_open(&tmp, *s)) == UTF8_MORE) {
720 			while (*++s != '\0' && more == UTF8_MORE)
721 				more = utf8_append(&tmp, *s);
722 			if (more == UTF8_DONE) {
723 				width += tmp.width;
724 				continue;
725 			}
726 			s -= tmp.have;
727 		}
728 		if (*s > 0x1f && *s != 0x7f)
729 			width++;
730 		s++;
731 	}
732 	return (width);
733 }
734 
735 /* Pad UTF-8 string to width on the left. Caller frees. */
736 char *
737 utf8_padcstr(const char *s, u_int width)
738 {
739 	size_t	 slen;
740 	char	*out;
741 	u_int	 n, i;
742 
743 	n = utf8_cstrwidth(s);
744 	if (n >= width)
745 		return (xstrdup(s));
746 
747 	slen = strlen(s);
748 	out = xmalloc(slen + 1 + (width - n));
749 	memcpy(out, s, slen);
750 	for (i = n; i < width; i++)
751 		out[slen++] = ' ';
752 	out[slen] = '\0';
753 	return (out);
754 }
755 
756 /* Pad UTF-8 string to width on the right. Caller frees. */
757 char *
758 utf8_rpadcstr(const char *s, u_int width)
759 {
760 	size_t	 slen;
761 	char	*out;
762 	u_int	 n, i;
763 
764 	n = utf8_cstrwidth(s);
765 	if (n >= width)
766 		return (xstrdup(s));
767 
768 	slen = strlen(s);
769 	out = xmalloc(slen + 1 + (width - n));
770 	for (i = 0; i < width - n; i++)
771 		out[i] = ' ';
772 	memcpy(out + i, s, slen);
773 	out[i + slen] = '\0';
774 	return (out);
775 }
776 
777 int
778 utf8_cstrhas(const char *s, const struct utf8_data *ud)
779 {
780 	struct utf8_data	*copy, *loop;
781 	int			 found = 0;
782 
783 	copy = utf8_fromcstr(s);
784 	for (loop = copy; loop->size != 0; loop++) {
785 		if (loop->size != ud->size)
786 			continue;
787 		if (memcmp(loop->data, ud->data, loop->size) == 0) {
788 			found = 1;
789 			break;
790 		}
791 	}
792 	free(copy);
793 
794 	return (found);
795 }
796