xref: /dflybsd-src/lib/libc/locale/utf8.c (revision aa6ac96e01825b3efcab953441f85adbf9815e0f)
1 /*
2  * Copyright 2015 Matthew Dillon <dillon@backplane.com> (mbintowcr, wcrtombin)
3  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
4  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 2002-2004 Tim J. Robbins
6  * All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * WCSBIN_EOF -		Indicate EOF on input buffer.
37  *
38  * WCSBIN_SURRO -	Pass-through surrogate space (typically if the UTF-8
39  *			has already been escaped), on bytes-to-wchars and
40  *			wchars-to-bytes.  Escaping of other illegal codes will
41  *			still occur on input but de-escaping will not occur
42  *			on output (they will remain in the surrogate space).
43  *
44  * WCSBIN_LONGCODES -	Allow 4-byte >= 0x10FFFF, 5-byte and 6-byte sequences
45  *			(normally illegal), otherwise escape it on input
46  *			and fail on output.
47  *
48  * WCSBIN_STRICT -	Allow byte-to-wide conversions to fail.
49  */
50 
51 #include <sys/param.h>
52 
53 #include <errno.h>
54 #include <limits.h>
55 #include <runetype.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <wchar.h>
59 #include "mblocal.h"
60 
61 extern int __mb_sb_limit;
62 
63 static size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
64 		    size_t, mbstate_t * __restrict);
65 static int	_UTF8_mbsinit(const mbstate_t *);
66 static size_t	_UTF8_mbsnrtowcs(wchar_t * __restrict,
67 		    const char ** __restrict, size_t, size_t,
68 		    mbstate_t * __restrict);
69 static size_t	_UTF8_wcrtomb(char * __restrict, wchar_t,
70 		    mbstate_t * __restrict);
71 static size_t	_UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
72 		    size_t, size_t, mbstate_t * __restrict);
73 static size_t	_UTF8_mbintowcr(wchar_t * __restrict dst,
74 		    const char * __restrict src,
75 		    size_t dlen, size_t *slen, int flags);
76 static size_t	_UTF8_wcrtombin(char * __restrict dst,
77 		    const wchar_t * __restrict src,
78 		    size_t dlen, size_t *slen, int flags);
79 
80 typedef struct {
81 	wchar_t	ch;
82 	int	want;
83 	wchar_t	lbound;
84 } _UTF8State;
85 
86 int
87 _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl)
88 {
89 
90 	l->__mbrtowc = _UTF8_mbrtowc;
91 	l->__wcrtomb = _UTF8_wcrtomb;
92 	l->__mbsinit = _UTF8_mbsinit;
93 	l->__mbsnrtowcs = _UTF8_mbsnrtowcs;
94 	l->__wcsnrtombs = _UTF8_wcsnrtombs;
95 	l->__mbintowcr = _UTF8_mbintowcr;
96 	l->__wcrtombin = _UTF8_wcrtombin;
97 	l->runes = rl;
98 	l->__mb_cur_max = 4;
99 	/*
100 	 * UCS-4 encoding used as the internal representation, so
101 	 * slots 0x0080-0x00FF are occuped and must be excluded
102 	 * from the single byte ctype by setting the limit.
103 	 */
104 	l->__mb_sb_limit = 128;
105 
106 	return (0);
107 }
108 
109 static int
110 _UTF8_mbsinit(const mbstate_t *ps)
111 {
112 
113 	return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
114 }
115 
116 static size_t
117 _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
118     mbstate_t * __restrict ps)
119 {
120 	_UTF8State *us;
121 	int ch, i, mask, want;
122 	wchar_t lbound, wch;
123 
124 	us = (_UTF8State *)ps;
125 
126 	if (us->want < 0 || us->want > 4) {
127 		errno = EINVAL;
128 		return ((size_t)-1);
129 	}
130 
131 	if (s == NULL) {
132 		s = "";
133 		n = 1;
134 		pwc = NULL;
135 	}
136 
137 	if (n == 0)
138 		/* Incomplete multibyte sequence */
139 		return ((size_t)-2);
140 
141 	if (us->want == 0) {
142 		/*
143 		 * Determine the number of octets that make up this character
144 		 * from the first octet, and a mask that extracts the
145 		 * interesting bits of the first octet. We already know
146 		 * the character is at least two bytes long.
147 		 *
148 		 * We also specify a lower bound for the character code to
149 		 * detect redundant, non-"shortest form" encodings. For
150 		 * example, the sequence C0 80 is _not_ a legal representation
151 		 * of the null character. This enforces a 1-to-1 mapping
152 		 * between character codes and their multibyte representations.
153 		 */
154 		ch = (unsigned char)*s;
155 		if ((ch & 0x80) == 0) {
156 			/* Fast path for plain ASCII characters. */
157 			if (pwc != NULL)
158 				*pwc = ch;
159 			return (ch != '\0' ? 1 : 0);
160 		}
161 		if ((ch & 0xe0) == 0xc0) {
162 			mask = 0x1f;
163 			want = 2;
164 			lbound = 0x80;
165 		} else if ((ch & 0xf0) == 0xe0) {
166 			mask = 0x0f;
167 			want = 3;
168 			lbound = 0x800;
169 		} else if ((ch & 0xf8) == 0xf0) {
170 			mask = 0x07;
171 			want = 4;
172 			lbound = 0x10000;
173 		} else {
174 			/*
175 			 * Malformed input; input is not UTF-8.
176 			 */
177 			errno = EILSEQ;
178 			return ((size_t)-1);
179 		}
180 	} else {
181 		want = us->want;
182 		lbound = us->lbound;
183 	}
184 
185 	/*
186 	 * Decode the octet sequence representing the character in chunks
187 	 * of 6 bits, most significant first.
188 	 */
189 	if (us->want == 0)
190 		wch = (unsigned char)*s++ & mask;
191 	else
192 		wch = us->ch;
193 
194 	for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
195 		if ((*s & 0xc0) != 0x80) {
196 			/*
197 			 * Malformed input; bad characters in the middle
198 			 * of a character.
199 			 */
200 			errno = EILSEQ;
201 			return ((size_t)-1);
202 		}
203 		wch <<= 6;
204 		wch |= *s++ & 0x3f;
205 	}
206 	if (i < want) {
207 		/* Incomplete multibyte sequence. */
208 		us->want = want - i;
209 		us->lbound = lbound;
210 		us->ch = wch;
211 		return ((size_t)-2);
212 	}
213 	if (wch < lbound || (wch & ~0x10ffff)) {
214 		/*
215 		 * Malformed input; redundant encoding or illegal
216 		 *		    code sequence.
217 		 */
218 		errno = EILSEQ;
219 		return ((size_t)-1);
220 	}
221 	if (pwc != NULL)
222 		*pwc = wch;
223 	us->want = 0;
224 	return (wch == L'\0' ? 0 : want);
225 }
226 
227 static size_t
228 _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
229     size_t nms, size_t len, mbstate_t * __restrict ps)
230 {
231 	_UTF8State *us;
232 	const char *s;
233 	size_t nchr;
234 	wchar_t wc;
235 	size_t nb;
236 
237 	us = (_UTF8State *)ps;
238 
239 	s = *src;
240 	nchr = 0;
241 
242 	if (dst == NULL) {
243 		/*
244 		 * The fast path in the loop below is not safe if an ASCII
245 		 * character appears as anything but the first byte of a
246 		 * multibyte sequence. Check now to avoid doing it in the loop.
247 		 */
248 		if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
249 			errno = EILSEQ;
250 			return ((size_t)-1);
251 		}
252 		for (;;) {
253 			if (nms > 0 && (signed char)*s > 0)
254 				/*
255 				 * Fast path for plain ASCII characters
256 				 * excluding NUL.
257 				 */
258 				nb = 1;
259 			else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
260 			    (size_t)-1)
261 				/* Invalid sequence - mbrtowc() sets errno. */
262 				return ((size_t)-1);
263 			else if (nb == 0 || nb == (size_t)-2)
264 				return (nchr);
265 			s += nb;
266 			nms -= nb;
267 			nchr++;
268 		}
269 		/*NOTREACHED*/
270 	}
271 
272 	/*
273 	 * The fast path in the loop below is not safe if an ASCII
274 	 * character appears as anything but the first byte of a
275 	 * multibyte sequence. Check now to avoid doing it in the loop.
276 	 */
277 	if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
278 		errno = EILSEQ;
279 		return ((size_t)-1);
280 	}
281 	while (len-- > 0) {
282 		if (nms > 0 && (signed char)*s > 0) {
283 			/*
284 			 * Fast path for plain ASCII characters
285 			 * excluding NUL.
286 			 */
287 			*dst = (wchar_t)*s;
288 			nb = 1;
289 		} else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
290 		    (size_t)-1) {
291 			*src = s;
292 			return ((size_t)-1);
293 		} else if (nb == (size_t)-2) {
294 			*src = s + nms;
295 			return (nchr);
296 		} else if (nb == 0) {
297 			*src = NULL;
298 			return (nchr);
299 		}
300 		s += nb;
301 		nms -= nb;
302 		nchr++;
303 		dst++;
304 	}
305 	*src = s;
306 	return (nchr);
307 }
308 
309 static size_t
310 _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
311 {
312 	_UTF8State *us;
313 	unsigned char lead;
314 	int i, len;
315 
316 	us = (_UTF8State *)ps;
317 
318 	if (us->want != 0) {
319 		errno = EINVAL;
320 		return ((size_t)-1);
321 	}
322 
323 	if (s == NULL)
324 		/* Reset to initial shift state (no-op) */
325 		return (1);
326 
327 	/*
328 	 * Determine the number of octets needed to represent this character.
329 	 * We always output the shortest sequence possible. Also specify the
330 	 * first few bits of the first octet, which contains the information
331 	 * about the sequence length.
332 	 */
333 	if ((wc & ~0x7f) == 0) {
334 		/* Fast path for plain ASCII characters. */
335 		*s = (char)wc;
336 		return (1);
337 	} else if ((wc & ~0x7ff) == 0) {
338 		lead = 0xc0;
339 		len = 2;
340 	} else if ((wc & ~0xffff) == 0) {
341 		lead = 0xe0;
342 		len = 3;
343 	} else if ((wc & ~0x10ffff) == 0) {
344 		lead = 0xf0;
345 		len = 4;
346 	} else {
347 		errno = EILSEQ;
348 		return ((size_t)-1);
349 	}
350 
351 	/*
352 	 * Output the octets representing the character in chunks
353 	 * of 6 bits, least significant last. The first octet is
354 	 * a special case because it contains the sequence length
355 	 * information.
356 	 */
357 	for (i = len - 1; i > 0; i--) {
358 		s[i] = (wc & 0x3f) | 0x80;
359 		wc >>= 6;
360 	}
361 	*s = (wc & 0xff) | lead;
362 
363 	return (len);
364 }
365 
366 static size_t
367 _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
368     size_t nwc, size_t len, mbstate_t * __restrict ps)
369 {
370 	_UTF8State *us;
371 	char buf[MB_LEN_MAX];
372 	const wchar_t *s;
373 	size_t nbytes;
374 	size_t nb;
375 
376 	us = (_UTF8State *)ps;
377 
378 	if (us->want != 0) {
379 		errno = EINVAL;
380 		return ((size_t)-1);
381 	}
382 
383 	s = *src;
384 	nbytes = 0;
385 
386 	if (dst == NULL) {
387 		while (nwc-- > 0) {
388 			if (0 <= *s && *s < 0x80)
389 				/* Fast path for plain ASCII characters. */
390 				nb = 1;
391 			else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
392 			    (size_t)-1)
393 				/* Invalid character - wcrtomb() sets errno. */
394 				return ((size_t)-1);
395 			if (*s == L'\0')
396 				return (nbytes + nb - 1);
397 			s++;
398 			nbytes += nb;
399 		}
400 		return (nbytes);
401 	}
402 
403 	while (len > 0 && nwc-- > 0) {
404 		if (0 <= *s && *s < 0x80) {
405 			/* Fast path for plain ASCII characters. */
406 			nb = 1;
407 			*dst = *s;
408 		} else if (len > (size_t)MB_CUR_MAX) {
409 			/* Enough space to translate in-place. */
410 			if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
411 				*src = s;
412 				return ((size_t)-1);
413 			}
414 		} else {
415 			/*
416 			 * May not be enough space; use temp. buffer.
417 			 */
418 			if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
419 				*src = s;
420 				return ((size_t)-1);
421 			}
422 			if (nb > (int)len)
423 				/* MB sequence for character won't fit. */
424 				break;
425 			(void) memcpy(dst, buf, nb);
426 		}
427 		if (*s == L'\0') {
428 			*src = NULL;
429 			return (nbytes + nb - 1);
430 		}
431 		s++;
432 		dst += nb;
433 		len -= nb;
434 		nbytes += nb;
435 	}
436 	*src = s;
437 	return (nbytes);
438 }
439 
440 /*
441  * Clean binary to wchar buffer conversions.  This is basically like a normal
442  * buffer conversion but with a sane argument API and escaping.  See none.c
443  * for a more complete description.
444  */
445 static size_t
446 _UTF8_mbintowcr(wchar_t * __restrict dst, const char * __restrict src,
447 		size_t dlen, size_t *slen, int flags)
448 {
449 	size_t i;
450 	size_t j;
451 	size_t k;
452 	size_t n = *slen;
453 	int ch, mask, want;
454 	wchar_t lbound, wch;
455 
456 	for (i = j = 0; i < n; ++i) {
457 		if (j == dlen)
458 			break;
459 		ch = (unsigned char)src[i];
460 
461 		if ((ch & 0x80) == 0) {
462 			/* Fast path for plain ASCII characters. */
463 			if (dst)
464 				dst[j] = ch;
465 			++j;
466 			continue;
467 		}
468 		if ((ch & 0xe0) == 0xc0) {
469 			mask = 0x1f;
470 			want = 2;
471 			lbound = 0x80;
472 		} else if ((ch & 0xf0) == 0xe0) {
473 			mask = 0x0f;
474 			want = 3;
475 			lbound = 0x800;
476 		} else if ((ch & 0xf8) == 0xf0) {
477 			mask = 0x07;
478 			want = 4;
479 			lbound = 0x10000;
480 		} else if ((ch & 0xfc) == 0xf8) {
481 			/* normally illegal, handled down below */
482 			mask = 0x03;
483 			want = 5;
484 			lbound = 0x200000;
485 		} else if ((ch & 0xfe) == 0xfc) {
486 			/* normally illegal, handled down below */
487 			mask = 0x01;
488 			want = 6;
489 			lbound = 0x4000000;
490 		} else {
491 			/*
492 			 * Malformed input; input is not UTF-8, escape
493 			 * with UTF-8B.
494 			 */
495 			if (flags & WCSBIN_STRICT) {
496 				if (i == 0) {
497 					errno = EILSEQ;
498 					return ((size_t)-1);
499 				}
500 				break;
501 			}
502 			if (dst)
503 				dst[j] = 0xDC00 | ch;
504 			++j;
505 			continue;
506 		}
507 
508 		/*
509 		 * Construct wchar_t from multibyte sequence.
510 		 */
511 		wch = ch & mask;
512 		for (k = 1; k < want; ++k) {
513 			/*
514 			 * Stop if not enough input (don't do this early
515 			 * so we can detect illegal characters as they occur
516 			 * in the stream).
517 			 *
518 			 * If termination is requested force-escape all chars.
519 			 */
520 			if (i + k >= n)	{
521 				if (flags & WCSBIN_EOF) {
522 					want = n - i;
523 					goto forceesc;
524 				}
525 				goto breakout;
526 			}
527 
528 			ch = src[i+k];
529 			if ((ch & 0xc0) != 0x80) {
530 				/*
531 				 * Malformed input, bad characters in the
532 				 * middle of a multibyte sequence.  Escape
533 				 * with UTF-8B.
534 				 */
535 				if (flags & WCSBIN_STRICT) {
536 					if (i == 0) {
537 						errno = EILSEQ;
538 						return ((size_t)-1);
539 					}
540 					goto breakout;
541 				}
542 				if (dst)
543 					dst[j] = 0xDC00 | (unsigned char)src[i];
544 				++j;
545 				goto loopup;
546 			}
547 			wch <<= 6;
548 			wch |= ch & 0x3f;
549 		}
550 
551 		/*
552 		 * Check validity of the wchar.  If invalid we could escape
553 		 * just the first character and loop up, but it ought to be
554 		 * more readable if we escape all the chars in the sequence
555 		 * (since they are all >= 0x80 and might represent a legacy
556 		 * 5-byte or 6-byte code).
557 		 */
558 		if (wch < lbound ||
559 		    ((flags & WCSBIN_LONGCODES) == 0 && (wch & ~0x10ffff)) ||
560 		    ((flags & WCSBIN_LONGCODES) == 0 && want >= 5)) {
561 			goto forceesc;
562 		}
563 
564 		/*
565 		 * Check if wch is a surrogate code (which also encloses our
566 		 * UTF-8B escaping range).  This is normally illegal in UTF8.
567 		 * If it is, we need to escape each characer in the sequence.
568 		 * Breakout if there isn't enough output buffer space.
569 		 *
570 		 * If (flags & WCSBIN_SURRO) the caller wishes to accept
571 		 * surrogate codes, i.e. the input might potentially already
572 		 * be escaped UTF8-B or unchecked UTF-16 that was converted
573 		 * into UTF-8.
574 		 */
575 		if ((flags & WCSBIN_SURRO) == 0 &&
576 		    wch >= 0xD800 && wch <= 0xDFFF) {
577 forceesc:
578 			if (j + want > dlen)
579 				break;
580 			if (flags & WCSBIN_STRICT) {
581 				if (i == 0) {
582 					errno = EILSEQ;
583 					return ((size_t)-1);
584 				}
585 				break;
586 			}
587 			for (k = 0; k < want; ++k) {
588 				if (dst) {
589 					dst[j] = 0xDC00 |
590 						 (unsigned char)src[i+k];
591 				}
592 				++j;
593 			}
594 			i += k - 1;
595 		} else {
596 			i += k - 1;
597 			if (dst)
598 				dst[j] = wch;
599 			++j;
600 		}
601 loopup:
602 		;
603 	}
604 breakout:
605 	*slen = i;
606 
607 	return j;
608 }
609 
610 static size_t
611 _UTF8_wcrtombin(char * __restrict dst, const wchar_t * __restrict src,
612 		size_t dlen, size_t *slen, int flags)
613 {
614 	size_t i;
615 	size_t j;
616 	size_t k;
617 	size_t n = *slen;
618 	size_t len;
619 	unsigned char lead;
620 	wchar_t wc;
621 
622 	for (i = j = 0; i < n; ++i) {
623 		if (j == dlen)
624 			break;
625 		wc = src[i];
626 
627 		if ((wc & ~0x7f) == 0) {
628 			/* Fast path for plain ASCII characters. */
629 			if (dst)
630 				dst[j] = (unsigned char)wc;
631 			++j;
632 			continue;
633 		}
634 		if ((wc & ~0x7ff) == 0) {
635 			lead = 0xc0;
636 			len = 2;
637 		} else if (wc >= 0xDC80 && wc <= 0xDCFF &&
638 			   (flags & WCSBIN_SURRO) == 0) {
639 			if (flags & WCSBIN_STRICT) {
640 				/*
641 				 * STRICT without SURRO is an error for
642 				 * surrogates.
643 				 */
644 				if (i == 0) {
645 					errno = EILSEQ;
646 					return ((size_t)-1);
647 				}
648 				break;
649 			}
650 			if (dst)
651 				dst[j] = (unsigned char)wc;
652 			++j;
653 			continue;
654 		} else if ((wc & ~0xffff) == 0) {
655 			if (wc >= 0xD800 && wc <= 0xDFFF &&
656 			    (flags & (WCSBIN_SURRO | WCSBIN_STRICT)) ==
657 			    WCSBIN_STRICT) {
658 				/*
659 				 * Surrogates in general are an error
660 				 * if STRICT is specified and SURRO is not
661 				 * specified.
662 				 */
663 				if (i == 0) {
664 					errno = EILSEQ;
665 					return ((size_t)-1);
666 				}
667 				break;
668 			}
669 			lead = 0xe0;
670 			len = 3;
671 		} else if ((wc & ~0x10ffff) == 0) {
672 			lead = 0xf0;
673 			len = 4;
674 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x200000) {
675 			/* normally illegal */
676 			lead = 0xf0;
677 			len = 4;
678 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x4000000) {
679 			/* normally illegal */
680 			lead = 0xf8;
681 			len = 5;
682 		} else if ((flags & WCSBIN_LONGCODES) &&
683 			   (uint32_t)wc < 0x80000000U) {
684 			/* normally illegal */
685 			lead = 0xfc;
686 			len = 6;
687 		} else {
688 			if (i == 0) {
689 				errno = EILSEQ;
690 				return ((size_t)-1);
691 			}
692 			/* stop here, process error on next loop */
693 			break;
694 		}
695 
696 		/*
697 		 * Output the octets representing the character in chunks
698 		 * of 6 bits, least significant last. The first octet is
699 		 * a special case because it contains the sequence length
700 		 * information.
701 		 */
702 		if (j + len > dlen)
703 			break;
704 		k = j;
705 		j += len;
706 		if (dst) {
707 			while (--len > 0) {
708 				dst[k + len] = (wc & 0x3f) | 0x80;
709 				wc >>= 6;
710 			}
711 			dst[k] = (wc & 0xff) | lead;
712 		}
713 	}
714 	*slen = i;
715 
716 	return j;
717 }
718 
719 size_t
720 utf8towcr(wchar_t * __restrict dst, const char * __restrict src,
721 		size_t dlen, size_t *slen, int flags)
722 {
723 	return _UTF8_mbintowcr(dst, src, dlen, slen, flags);
724 }
725 
726 size_t
727 wcrtoutf8(char * __restrict dst, const wchar_t * __restrict src,
728 	  size_t dlen, size_t *slen, int flags)
729 {
730 	return _UTF8_wcrtombin(dst, src, dlen, slen, flags);
731 }
732