xref: /dflybsd-src/lib/libc/locale/utf8.c (revision cb40c8cc2df81ae2ad923617b2becc49e89c11de)
14776d4e8SJohn Marino /*
28a84c799SMatthew Dillon  * Copyright 2015 Matthew Dillon <dillon@backplane.com> (mbintowcr, wcrtombin)
34776d4e8SJohn Marino  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
44776d4e8SJohn Marino  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
50d5acd74SJohn Marino  * Copyright (c) 2002-2004 Tim J. Robbins
60d5acd74SJohn Marino  * All rights reserved.
70d5acd74SJohn Marino  *
80d5acd74SJohn Marino  * Copyright (c) 2011 The FreeBSD Foundation
90d5acd74SJohn Marino  * All rights reserved.
100d5acd74SJohn Marino  * Portions of this software were developed by David Chisnall
110d5acd74SJohn Marino  * under sponsorship from the FreeBSD Foundation.
120d5acd74SJohn Marino  *
130d5acd74SJohn Marino  * Redistribution and use in source and binary forms, with or without
140d5acd74SJohn Marino  * modification, are permitted provided that the following conditions
150d5acd74SJohn Marino  * are met:
160d5acd74SJohn Marino  * 1. Redistributions of source code must retain the above copyright
170d5acd74SJohn Marino  *    notice, this list of conditions and the following disclaimer.
180d5acd74SJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
190d5acd74SJohn Marino  *    notice, this list of conditions and the following disclaimer in the
200d5acd74SJohn Marino  *    documentation and/or other materials provided with the distribution.
210d5acd74SJohn Marino  *
220d5acd74SJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
230d5acd74SJohn Marino  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
240d5acd74SJohn Marino  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
250d5acd74SJohn Marino  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
260d5acd74SJohn Marino  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
270d5acd74SJohn Marino  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
280d5acd74SJohn Marino  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
290d5acd74SJohn Marino  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
300d5acd74SJohn Marino  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
310d5acd74SJohn Marino  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
320d5acd74SJohn Marino  * SUCH DAMAGE.
330d5acd74SJohn Marino  */
340d5acd74SJohn Marino 
358a84c799SMatthew Dillon /*
368a84c799SMatthew Dillon  * WCSBIN_EOF -		Indicate EOF on input buffer.
378a84c799SMatthew Dillon  *
388a84c799SMatthew Dillon  * WCSBIN_SURRO -	Pass-through surrogate space (typically if the UTF-8
398a84c799SMatthew Dillon  *			has already been escaped), on bytes-to-wchars and
408a84c799SMatthew Dillon  *			wchars-to-bytes.  Escaping of other illegal codes will
418a84c799SMatthew Dillon  *			still occur on input but de-escaping will not occur
428a84c799SMatthew Dillon  *			on output (they will remain in the surrogate space).
438a84c799SMatthew Dillon  *
448a84c799SMatthew Dillon  * WCSBIN_LONGCODES -	Allow 4-byte >= 0x10FFFF, 5-byte and 6-byte sequences
458a84c799SMatthew Dillon  *			(normally illegal), otherwise escape it on input
468a84c799SMatthew Dillon  *			and fail on output.
478a84c799SMatthew Dillon  *
488a84c799SMatthew Dillon  * WCSBIN_STRICT -	Allow byte-to-wide conversions to fail.
498a84c799SMatthew Dillon  */
508a84c799SMatthew Dillon 
510d5acd74SJohn Marino #include <sys/param.h>
520d5acd74SJohn Marino 
530d5acd74SJohn Marino #include <errno.h>
540d5acd74SJohn Marino #include <limits.h>
550d5acd74SJohn Marino #include <runetype.h>
560d5acd74SJohn Marino #include <stdlib.h>
570d5acd74SJohn Marino #include <string.h>
580d5acd74SJohn Marino #include <wchar.h>
590d5acd74SJohn Marino #include "mblocal.h"
600d5acd74SJohn Marino 
610d5acd74SJohn Marino static size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
620d5acd74SJohn Marino 		    size_t, mbstate_t * __restrict);
630d5acd74SJohn Marino static int	_UTF8_mbsinit(const mbstate_t *);
640d5acd74SJohn Marino static size_t	_UTF8_mbsnrtowcs(wchar_t * __restrict,
650d5acd74SJohn Marino 		    const char ** __restrict, size_t, size_t,
660d5acd74SJohn Marino 		    mbstate_t * __restrict);
670d5acd74SJohn Marino static size_t	_UTF8_wcrtomb(char * __restrict, wchar_t,
680d5acd74SJohn Marino 		    mbstate_t * __restrict);
690d5acd74SJohn Marino static size_t	_UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
700d5acd74SJohn Marino 		    size_t, size_t, mbstate_t * __restrict);
718a84c799SMatthew Dillon static size_t	_UTF8_mbintowcr(wchar_t * __restrict dst,
728a84c799SMatthew Dillon 		    const char * __restrict src,
738a84c799SMatthew Dillon 		    size_t dlen, size_t *slen, int flags);
748a84c799SMatthew Dillon static size_t	_UTF8_wcrtombin(char * __restrict dst,
758a84c799SMatthew Dillon 		    const wchar_t * __restrict src,
768a84c799SMatthew Dillon 		    size_t dlen, size_t *slen, int flags);
770d5acd74SJohn Marino 
780d5acd74SJohn Marino typedef struct {
790d5acd74SJohn Marino 	wchar_t	ch;
800d5acd74SJohn Marino 	int	want;
810d5acd74SJohn Marino 	wchar_t	lbound;
820d5acd74SJohn Marino } _UTF8State;
830d5acd74SJohn Marino 
840d5acd74SJohn Marino int
_UTF8_init(struct xlocale_ctype * l,_RuneLocale * rl)850d5acd74SJohn Marino _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl)
860d5acd74SJohn Marino {
870d5acd74SJohn Marino 
880d5acd74SJohn Marino 	l->__mbrtowc = _UTF8_mbrtowc;
890d5acd74SJohn Marino 	l->__wcrtomb = _UTF8_wcrtomb;
900d5acd74SJohn Marino 	l->__mbsinit = _UTF8_mbsinit;
910d5acd74SJohn Marino 	l->__mbsnrtowcs = _UTF8_mbsnrtowcs;
920d5acd74SJohn Marino 	l->__wcsnrtombs = _UTF8_wcsnrtombs;
938a84c799SMatthew Dillon 	l->__mbintowcr = _UTF8_mbintowcr;
948a84c799SMatthew Dillon 	l->__wcrtombin = _UTF8_wcrtombin;
950d5acd74SJohn Marino 	l->runes = rl;
964776d4e8SJohn Marino 	l->__mb_cur_max = 4;
970d5acd74SJohn Marino 	/*
980d5acd74SJohn Marino 	 * UCS-4 encoding used as the internal representation, so
990d5acd74SJohn Marino 	 * slots 0x0080-0x00FF are occuped and must be excluded
1000d5acd74SJohn Marino 	 * from the single byte ctype by setting the limit.
1010d5acd74SJohn Marino 	 */
1020d5acd74SJohn Marino 	l->__mb_sb_limit = 128;
1030d5acd74SJohn Marino 
1040d5acd74SJohn Marino 	return (0);
1050d5acd74SJohn Marino }
1060d5acd74SJohn Marino 
1070d5acd74SJohn Marino static int
_UTF8_mbsinit(const mbstate_t * ps)1080d5acd74SJohn Marino _UTF8_mbsinit(const mbstate_t *ps)
1090d5acd74SJohn Marino {
1100d5acd74SJohn Marino 
1110d5acd74SJohn Marino 	return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
1120d5acd74SJohn Marino }
1130d5acd74SJohn Marino 
1140d5acd74SJohn Marino static size_t
_UTF8_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)1150d5acd74SJohn Marino _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
1160d5acd74SJohn Marino     mbstate_t * __restrict ps)
1170d5acd74SJohn Marino {
1180d5acd74SJohn Marino 	_UTF8State *us;
1190d5acd74SJohn Marino 	int ch, i, mask, want;
1200d5acd74SJohn Marino 	wchar_t lbound, wch;
1210d5acd74SJohn Marino 
1220d5acd74SJohn Marino 	us = (_UTF8State *)ps;
1230d5acd74SJohn Marino 
124594d13a0SJohn Marino 	if (us->want < 0 || us->want > 4) {
1250d5acd74SJohn Marino 		errno = EINVAL;
1260d5acd74SJohn Marino 		return ((size_t)-1);
1270d5acd74SJohn Marino 	}
1280d5acd74SJohn Marino 
1290d5acd74SJohn Marino 	if (s == NULL) {
1300d5acd74SJohn Marino 		s = "";
1310d5acd74SJohn Marino 		n = 1;
1320d5acd74SJohn Marino 		pwc = NULL;
1330d5acd74SJohn Marino 	}
1340d5acd74SJohn Marino 
1350d5acd74SJohn Marino 	if (n == 0)
1360d5acd74SJohn Marino 		/* Incomplete multibyte sequence */
1370d5acd74SJohn Marino 		return ((size_t)-2);
1380d5acd74SJohn Marino 
1390d5acd74SJohn Marino 	if (us->want == 0) {
1400d5acd74SJohn Marino 		/*
1410d5acd74SJohn Marino 		 * Determine the number of octets that make up this character
1420d5acd74SJohn Marino 		 * from the first octet, and a mask that extracts the
1430d5acd74SJohn Marino 		 * interesting bits of the first octet. We already know
1440d5acd74SJohn Marino 		 * the character is at least two bytes long.
1450d5acd74SJohn Marino 		 *
1460d5acd74SJohn Marino 		 * We also specify a lower bound for the character code to
1470d5acd74SJohn Marino 		 * detect redundant, non-"shortest form" encodings. For
1480d5acd74SJohn Marino 		 * example, the sequence C0 80 is _not_ a legal representation
1490d5acd74SJohn Marino 		 * of the null character. This enforces a 1-to-1 mapping
1500d5acd74SJohn Marino 		 * between character codes and their multibyte representations.
1510d5acd74SJohn Marino 		 */
1520d5acd74SJohn Marino 		ch = (unsigned char)*s;
1530d5acd74SJohn Marino 		if ((ch & 0x80) == 0) {
1544776d4e8SJohn Marino 			/* Fast path for plain ASCII characters. */
1554776d4e8SJohn Marino 			if (pwc != NULL)
1564776d4e8SJohn Marino 				*pwc = ch;
1574776d4e8SJohn Marino 			return (ch != '\0' ? 1 : 0);
1584776d4e8SJohn Marino 		}
1594776d4e8SJohn Marino 		if ((ch & 0xe0) == 0xc0) {
1600d5acd74SJohn Marino 			mask = 0x1f;
1610d5acd74SJohn Marino 			want = 2;
1620d5acd74SJohn Marino 			lbound = 0x80;
1630d5acd74SJohn Marino 		} else if ((ch & 0xf0) == 0xe0) {
1640d5acd74SJohn Marino 			mask = 0x0f;
1650d5acd74SJohn Marino 			want = 3;
1660d5acd74SJohn Marino 			lbound = 0x800;
1670d5acd74SJohn Marino 		} else if ((ch & 0xf8) == 0xf0) {
1680d5acd74SJohn Marino 			mask = 0x07;
1690d5acd74SJohn Marino 			want = 4;
1700d5acd74SJohn Marino 			lbound = 0x10000;
1710d5acd74SJohn Marino 		} else {
1720d5acd74SJohn Marino 			/*
1730d5acd74SJohn Marino 			 * Malformed input; input is not UTF-8.
1740d5acd74SJohn Marino 			 */
1750d5acd74SJohn Marino 			errno = EILSEQ;
1760d5acd74SJohn Marino 			return ((size_t)-1);
1770d5acd74SJohn Marino 		}
1780d5acd74SJohn Marino 	} else {
1790d5acd74SJohn Marino 		want = us->want;
1800d5acd74SJohn Marino 		lbound = us->lbound;
1810d5acd74SJohn Marino 	}
1820d5acd74SJohn Marino 
1830d5acd74SJohn Marino 	/*
1840d5acd74SJohn Marino 	 * Decode the octet sequence representing the character in chunks
1850d5acd74SJohn Marino 	 * of 6 bits, most significant first.
1860d5acd74SJohn Marino 	 */
1870d5acd74SJohn Marino 	if (us->want == 0)
1880d5acd74SJohn Marino 		wch = (unsigned char)*s++ & mask;
1890d5acd74SJohn Marino 	else
1900d5acd74SJohn Marino 		wch = us->ch;
1914776d4e8SJohn Marino 
1920d5acd74SJohn Marino 	for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
1930d5acd74SJohn Marino 		if ((*s & 0xc0) != 0x80) {
1940d5acd74SJohn Marino 			/*
1950d5acd74SJohn Marino 			 * Malformed input; bad characters in the middle
1960d5acd74SJohn Marino 			 * of a character.
1970d5acd74SJohn Marino 			 */
1980d5acd74SJohn Marino 			errno = EILSEQ;
1990d5acd74SJohn Marino 			return ((size_t)-1);
2000d5acd74SJohn Marino 		}
2010d5acd74SJohn Marino 		wch <<= 6;
2020d5acd74SJohn Marino 		wch |= *s++ & 0x3f;
2030d5acd74SJohn Marino 	}
2040d5acd74SJohn Marino 	if (i < want) {
2050d5acd74SJohn Marino 		/* Incomplete multibyte sequence. */
2060d5acd74SJohn Marino 		us->want = want - i;
2070d5acd74SJohn Marino 		us->lbound = lbound;
2080d5acd74SJohn Marino 		us->ch = wch;
2090d5acd74SJohn Marino 		return ((size_t)-2);
2100d5acd74SJohn Marino 	}
211*3d4b9338SJohn Marino 	if (wch < lbound || wch > 0x10ffff) {
2120d5acd74SJohn Marino 		/*
2138a84c799SMatthew Dillon 		 * Malformed input; redundant encoding or illegal
2148a84c799SMatthew Dillon 		 *		    code sequence.
2150d5acd74SJohn Marino 		 */
2160d5acd74SJohn Marino 		errno = EILSEQ;
2170d5acd74SJohn Marino 		return ((size_t)-1);
2180d5acd74SJohn Marino 	}
2190d5acd74SJohn Marino 	if (pwc != NULL)
2200d5acd74SJohn Marino 		*pwc = wch;
2210d5acd74SJohn Marino 	us->want = 0;
2220d5acd74SJohn Marino 	return (wch == L'\0' ? 0 : want);
2230d5acd74SJohn Marino }
2240d5acd74SJohn Marino 
2250d5acd74SJohn Marino static size_t
_UTF8_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)2260d5acd74SJohn Marino _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
2270d5acd74SJohn Marino     size_t nms, size_t len, mbstate_t * __restrict ps)
2280d5acd74SJohn Marino {
2290d5acd74SJohn Marino 	_UTF8State *us;
2300d5acd74SJohn Marino 	const char *s;
2310d5acd74SJohn Marino 	size_t nchr;
2320d5acd74SJohn Marino 	wchar_t wc;
2330d5acd74SJohn Marino 	size_t nb;
2340d5acd74SJohn Marino 
2350d5acd74SJohn Marino 	us = (_UTF8State *)ps;
2360d5acd74SJohn Marino 
2370d5acd74SJohn Marino 	s = *src;
2380d5acd74SJohn Marino 	nchr = 0;
2390d5acd74SJohn Marino 
2400d5acd74SJohn Marino 	if (dst == NULL) {
2410d5acd74SJohn Marino 		/*
2420d5acd74SJohn Marino 		 * The fast path in the loop below is not safe if an ASCII
2430d5acd74SJohn Marino 		 * character appears as anything but the first byte of a
2440d5acd74SJohn Marino 		 * multibyte sequence. Check now to avoid doing it in the loop.
2450d5acd74SJohn Marino 		 */
2460d5acd74SJohn Marino 		if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
2470d5acd74SJohn Marino 			errno = EILSEQ;
2480d5acd74SJohn Marino 			return ((size_t)-1);
2490d5acd74SJohn Marino 		}
2500d5acd74SJohn Marino 		for (;;) {
2510d5acd74SJohn Marino 			if (nms > 0 && (signed char)*s > 0)
2520d5acd74SJohn Marino 				/*
2530d5acd74SJohn Marino 				 * Fast path for plain ASCII characters
2540d5acd74SJohn Marino 				 * excluding NUL.
2550d5acd74SJohn Marino 				 */
2560d5acd74SJohn Marino 				nb = 1;
2570d5acd74SJohn Marino 			else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
2580d5acd74SJohn Marino 			    (size_t)-1)
2590d5acd74SJohn Marino 				/* Invalid sequence - mbrtowc() sets errno. */
2600d5acd74SJohn Marino 				return ((size_t)-1);
2610d5acd74SJohn Marino 			else if (nb == 0 || nb == (size_t)-2)
2620d5acd74SJohn Marino 				return (nchr);
2630d5acd74SJohn Marino 			s += nb;
2640d5acd74SJohn Marino 			nms -= nb;
2650d5acd74SJohn Marino 			nchr++;
2660d5acd74SJohn Marino 		}
2670d5acd74SJohn Marino 		/*NOTREACHED*/
2680d5acd74SJohn Marino 	}
2690d5acd74SJohn Marino 
2700d5acd74SJohn Marino 	/*
2710d5acd74SJohn Marino 	 * The fast path in the loop below is not safe if an ASCII
2720d5acd74SJohn Marino 	 * character appears as anything but the first byte of a
2730d5acd74SJohn Marino 	 * multibyte sequence. Check now to avoid doing it in the loop.
2740d5acd74SJohn Marino 	 */
2750d5acd74SJohn Marino 	if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
2760d5acd74SJohn Marino 		errno = EILSEQ;
2770d5acd74SJohn Marino 		return ((size_t)-1);
2780d5acd74SJohn Marino 	}
2790d5acd74SJohn Marino 	while (len-- > 0) {
2800d5acd74SJohn Marino 		if (nms > 0 && (signed char)*s > 0) {
2810d5acd74SJohn Marino 			/*
2820d5acd74SJohn Marino 			 * Fast path for plain ASCII characters
2830d5acd74SJohn Marino 			 * excluding NUL.
2840d5acd74SJohn Marino 			 */
2850d5acd74SJohn Marino 			*dst = (wchar_t)*s;
2860d5acd74SJohn Marino 			nb = 1;
2870d5acd74SJohn Marino 		} else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
2880d5acd74SJohn Marino 		    (size_t)-1) {
2890d5acd74SJohn Marino 			*src = s;
2900d5acd74SJohn Marino 			return ((size_t)-1);
2910d5acd74SJohn Marino 		} else if (nb == (size_t)-2) {
2920d5acd74SJohn Marino 			*src = s + nms;
2930d5acd74SJohn Marino 			return (nchr);
2940d5acd74SJohn Marino 		} else if (nb == 0) {
2950d5acd74SJohn Marino 			*src = NULL;
2960d5acd74SJohn Marino 			return (nchr);
2970d5acd74SJohn Marino 		}
2980d5acd74SJohn Marino 		s += nb;
2990d5acd74SJohn Marino 		nms -= nb;
3000d5acd74SJohn Marino 		nchr++;
3010d5acd74SJohn Marino 		dst++;
3020d5acd74SJohn Marino 	}
3030d5acd74SJohn Marino 	*src = s;
3040d5acd74SJohn Marino 	return (nchr);
3050d5acd74SJohn Marino }
3060d5acd74SJohn Marino 
3070d5acd74SJohn Marino static size_t
_UTF8_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)3080d5acd74SJohn Marino _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
3090d5acd74SJohn Marino {
3100d5acd74SJohn Marino 	_UTF8State *us;
3110d5acd74SJohn Marino 	unsigned char lead;
3120d5acd74SJohn Marino 	int i, len;
3130d5acd74SJohn Marino 
3140d5acd74SJohn Marino 	us = (_UTF8State *)ps;
3150d5acd74SJohn Marino 
3160d5acd74SJohn Marino 	if (us->want != 0) {
3170d5acd74SJohn Marino 		errno = EINVAL;
3180d5acd74SJohn Marino 		return ((size_t)-1);
3190d5acd74SJohn Marino 	}
3200d5acd74SJohn Marino 
3210d5acd74SJohn Marino 	if (s == NULL)
3220d5acd74SJohn Marino 		/* Reset to initial shift state (no-op) */
3230d5acd74SJohn Marino 		return (1);
3240d5acd74SJohn Marino 
3250d5acd74SJohn Marino 	/*
3260d5acd74SJohn Marino 	 * Determine the number of octets needed to represent this character.
3270d5acd74SJohn Marino 	 * We always output the shortest sequence possible. Also specify the
3280d5acd74SJohn Marino 	 * first few bits of the first octet, which contains the information
3290d5acd74SJohn Marino 	 * about the sequence length.
3300d5acd74SJohn Marino 	 */
3310d5acd74SJohn Marino 	if ((wc & ~0x7f) == 0) {
3324776d4e8SJohn Marino 		/* Fast path for plain ASCII characters. */
3334776d4e8SJohn Marino 		*s = (char)wc;
3344776d4e8SJohn Marino 		return (1);
3350d5acd74SJohn Marino 	} else if ((wc & ~0x7ff) == 0) {
3360d5acd74SJohn Marino 		lead = 0xc0;
3370d5acd74SJohn Marino 		len = 2;
3380d5acd74SJohn Marino 	} else if ((wc & ~0xffff) == 0) {
3390d5acd74SJohn Marino 		lead = 0xe0;
3400d5acd74SJohn Marino 		len = 3;
341*3d4b9338SJohn Marino 	} else if (wc <= 0x10ffff) {
3420d5acd74SJohn Marino 		lead = 0xf0;
3430d5acd74SJohn Marino 		len = 4;
3440d5acd74SJohn Marino 	} else {
3450d5acd74SJohn Marino 		errno = EILSEQ;
3460d5acd74SJohn Marino 		return ((size_t)-1);
3470d5acd74SJohn Marino 	}
3480d5acd74SJohn Marino 
3490d5acd74SJohn Marino 	/*
3500d5acd74SJohn Marino 	 * Output the octets representing the character in chunks
3510d5acd74SJohn Marino 	 * of 6 bits, least significant last. The first octet is
3520d5acd74SJohn Marino 	 * a special case because it contains the sequence length
3530d5acd74SJohn Marino 	 * information.
3540d5acd74SJohn Marino 	 */
3550d5acd74SJohn Marino 	for (i = len - 1; i > 0; i--) {
3560d5acd74SJohn Marino 		s[i] = (wc & 0x3f) | 0x80;
3570d5acd74SJohn Marino 		wc >>= 6;
3580d5acd74SJohn Marino 	}
3590d5acd74SJohn Marino 	*s = (wc & 0xff) | lead;
3600d5acd74SJohn Marino 
3610d5acd74SJohn Marino 	return (len);
3620d5acd74SJohn Marino }
3630d5acd74SJohn Marino 
3640d5acd74SJohn Marino static size_t
_UTF8_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)3650d5acd74SJohn Marino _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
3660d5acd74SJohn Marino     size_t nwc, size_t len, mbstate_t * __restrict ps)
3670d5acd74SJohn Marino {
3680d5acd74SJohn Marino 	_UTF8State *us;
3690d5acd74SJohn Marino 	char buf[MB_LEN_MAX];
3700d5acd74SJohn Marino 	const wchar_t *s;
3710d5acd74SJohn Marino 	size_t nbytes;
3720d5acd74SJohn Marino 	size_t nb;
3730d5acd74SJohn Marino 
3740d5acd74SJohn Marino 	us = (_UTF8State *)ps;
3750d5acd74SJohn Marino 
3760d5acd74SJohn Marino 	if (us->want != 0) {
3770d5acd74SJohn Marino 		errno = EINVAL;
3780d5acd74SJohn Marino 		return ((size_t)-1);
3790d5acd74SJohn Marino 	}
3800d5acd74SJohn Marino 
3810d5acd74SJohn Marino 	s = *src;
3820d5acd74SJohn Marino 	nbytes = 0;
3830d5acd74SJohn Marino 
3840d5acd74SJohn Marino 	if (dst == NULL) {
3850d5acd74SJohn Marino 		while (nwc-- > 0) {
3860d5acd74SJohn Marino 			if (0 <= *s && *s < 0x80)
3870d5acd74SJohn Marino 				/* Fast path for plain ASCII characters. */
3880d5acd74SJohn Marino 				nb = 1;
3890d5acd74SJohn Marino 			else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
3900d5acd74SJohn Marino 			    (size_t)-1)
3910d5acd74SJohn Marino 				/* Invalid character - wcrtomb() sets errno. */
3920d5acd74SJohn Marino 				return ((size_t)-1);
3930d5acd74SJohn Marino 			if (*s == L'\0')
3940d5acd74SJohn Marino 				return (nbytes + nb - 1);
3950d5acd74SJohn Marino 			s++;
3960d5acd74SJohn Marino 			nbytes += nb;
3970d5acd74SJohn Marino 		}
3980d5acd74SJohn Marino 		return (nbytes);
3990d5acd74SJohn Marino 	}
4000d5acd74SJohn Marino 
4010d5acd74SJohn Marino 	while (len > 0 && nwc-- > 0) {
4020d5acd74SJohn Marino 		if (0 <= *s && *s < 0x80) {
4030d5acd74SJohn Marino 			/* Fast path for plain ASCII characters. */
4040d5acd74SJohn Marino 			nb = 1;
4050d5acd74SJohn Marino 			*dst = *s;
4060d5acd74SJohn Marino 		} else if (len > (size_t)MB_CUR_MAX) {
4070d5acd74SJohn Marino 			/* Enough space to translate in-place. */
4080d5acd74SJohn Marino 			if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
4090d5acd74SJohn Marino 				*src = s;
4100d5acd74SJohn Marino 				return ((size_t)-1);
4110d5acd74SJohn Marino 			}
4120d5acd74SJohn Marino 		} else {
4130d5acd74SJohn Marino 			/*
4140d5acd74SJohn Marino 			 * May not be enough space; use temp. buffer.
4150d5acd74SJohn Marino 			 */
4160d5acd74SJohn Marino 			if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
4170d5acd74SJohn Marino 				*src = s;
4180d5acd74SJohn Marino 				return ((size_t)-1);
4190d5acd74SJohn Marino 			}
4200d5acd74SJohn Marino 			if (nb > (int)len)
4210d5acd74SJohn Marino 				/* MB sequence for character won't fit. */
4220d5acd74SJohn Marino 				break;
4234776d4e8SJohn Marino 			(void) memcpy(dst, buf, nb);
4240d5acd74SJohn Marino 		}
4250d5acd74SJohn Marino 		if (*s == L'\0') {
4260d5acd74SJohn Marino 			*src = NULL;
4270d5acd74SJohn Marino 			return (nbytes + nb - 1);
4280d5acd74SJohn Marino 		}
4290d5acd74SJohn Marino 		s++;
4300d5acd74SJohn Marino 		dst += nb;
4310d5acd74SJohn Marino 		len -= nb;
4320d5acd74SJohn Marino 		nbytes += nb;
4330d5acd74SJohn Marino 	}
4340d5acd74SJohn Marino 	*src = s;
4350d5acd74SJohn Marino 	return (nbytes);
4360d5acd74SJohn Marino }
4378a84c799SMatthew Dillon 
4388a84c799SMatthew Dillon /*
4398a84c799SMatthew Dillon  * Clean binary to wchar buffer conversions.  This is basically like a normal
4408a84c799SMatthew Dillon  * buffer conversion but with a sane argument API and escaping.  See none.c
4418a84c799SMatthew Dillon  * for a more complete description.
4428a84c799SMatthew Dillon  */
4438a84c799SMatthew Dillon static size_t
_UTF8_mbintowcr(wchar_t * __restrict dst,const char * __restrict src,size_t dlen,size_t * slen,int flags)4448a84c799SMatthew Dillon _UTF8_mbintowcr(wchar_t * __restrict dst, const char * __restrict src,
4458a84c799SMatthew Dillon 		size_t dlen, size_t *slen, int flags)
4468a84c799SMatthew Dillon {
4478a84c799SMatthew Dillon 	size_t i;
4488a84c799SMatthew Dillon 	size_t j;
4498a84c799SMatthew Dillon 	size_t k;
4508a84c799SMatthew Dillon 	size_t n = *slen;
4518a84c799SMatthew Dillon 	int ch, mask, want;
4528a84c799SMatthew Dillon 	wchar_t lbound, wch;
4538a84c799SMatthew Dillon 
4548a84c799SMatthew Dillon 	for (i = j = 0; i < n; ++i) {
4558a84c799SMatthew Dillon 		if (j == dlen)
4568a84c799SMatthew Dillon 			break;
4578a84c799SMatthew Dillon 		ch = (unsigned char)src[i];
4588a84c799SMatthew Dillon 
4598a84c799SMatthew Dillon 		if ((ch & 0x80) == 0) {
4608a84c799SMatthew Dillon 			/* Fast path for plain ASCII characters. */
4618a84c799SMatthew Dillon 			if (dst)
4628a84c799SMatthew Dillon 				dst[j] = ch;
4638a84c799SMatthew Dillon 			++j;
4648a84c799SMatthew Dillon 			continue;
4658a84c799SMatthew Dillon 		}
4668a84c799SMatthew Dillon 		if ((ch & 0xe0) == 0xc0) {
4678a84c799SMatthew Dillon 			mask = 0x1f;
4688a84c799SMatthew Dillon 			want = 2;
4698a84c799SMatthew Dillon 			lbound = 0x80;
4708a84c799SMatthew Dillon 		} else if ((ch & 0xf0) == 0xe0) {
4718a84c799SMatthew Dillon 			mask = 0x0f;
4728a84c799SMatthew Dillon 			want = 3;
4738a84c799SMatthew Dillon 			lbound = 0x800;
4748a84c799SMatthew Dillon 		} else if ((ch & 0xf8) == 0xf0) {
4758a84c799SMatthew Dillon 			mask = 0x07;
4768a84c799SMatthew Dillon 			want = 4;
4778a84c799SMatthew Dillon 			lbound = 0x10000;
4788a84c799SMatthew Dillon 		} else if ((ch & 0xfc) == 0xf8) {
4798a84c799SMatthew Dillon 			/* normally illegal, handled down below */
4808a84c799SMatthew Dillon 			mask = 0x03;
4818a84c799SMatthew Dillon 			want = 5;
4828a84c799SMatthew Dillon 			lbound = 0x200000;
4838a84c799SMatthew Dillon 		} else if ((ch & 0xfe) == 0xfc) {
4848a84c799SMatthew Dillon 			/* normally illegal, handled down below */
4858a84c799SMatthew Dillon 			mask = 0x01;
4868a84c799SMatthew Dillon 			want = 6;
4878a84c799SMatthew Dillon 			lbound = 0x4000000;
4888a84c799SMatthew Dillon 		} else {
4898a84c799SMatthew Dillon 			/*
4908a84c799SMatthew Dillon 			 * Malformed input; input is not UTF-8, escape
4918a84c799SMatthew Dillon 			 * with UTF-8B.
4928a84c799SMatthew Dillon 			 */
4938a84c799SMatthew Dillon 			if (flags & WCSBIN_STRICT) {
4948a84c799SMatthew Dillon 				if (i == 0) {
4958a84c799SMatthew Dillon 					errno = EILSEQ;
4968a84c799SMatthew Dillon 					return ((size_t)-1);
4978a84c799SMatthew Dillon 				}
4988a84c799SMatthew Dillon 				break;
4998a84c799SMatthew Dillon 			}
5008a84c799SMatthew Dillon 			if (dst)
5018a84c799SMatthew Dillon 				dst[j] = 0xDC00 | ch;
5028a84c799SMatthew Dillon 			++j;
5038a84c799SMatthew Dillon 			continue;
5048a84c799SMatthew Dillon 		}
5058a84c799SMatthew Dillon 
5068a84c799SMatthew Dillon 		/*
5078a84c799SMatthew Dillon 		 * Construct wchar_t from multibyte sequence.
5088a84c799SMatthew Dillon 		 */
5098a84c799SMatthew Dillon 		wch = ch & mask;
5108a84c799SMatthew Dillon 		for (k = 1; k < want; ++k) {
5118a84c799SMatthew Dillon 			/*
5128a84c799SMatthew Dillon 			 * Stop if not enough input (don't do this early
5138a84c799SMatthew Dillon 			 * so we can detect illegal characters as they occur
5148a84c799SMatthew Dillon 			 * in the stream).
5158a84c799SMatthew Dillon 			 *
5168a84c799SMatthew Dillon 			 * If termination is requested force-escape all chars.
5178a84c799SMatthew Dillon 			 */
5188a84c799SMatthew Dillon 			if (i + k >= n)	{
5198a84c799SMatthew Dillon 				if (flags & WCSBIN_EOF) {
5208a84c799SMatthew Dillon 					want = n - i;
5218a84c799SMatthew Dillon 					goto forceesc;
5228a84c799SMatthew Dillon 				}
5238a84c799SMatthew Dillon 				goto breakout;
5248a84c799SMatthew Dillon 			}
5258a84c799SMatthew Dillon 
5268a84c799SMatthew Dillon 			ch = src[i+k];
5278a84c799SMatthew Dillon 			if ((ch & 0xc0) != 0x80) {
5288a84c799SMatthew Dillon 				/*
5298a84c799SMatthew Dillon 				 * Malformed input, bad characters in the
5308a84c799SMatthew Dillon 				 * middle of a multibyte sequence.  Escape
5318a84c799SMatthew Dillon 				 * with UTF-8B.
5328a84c799SMatthew Dillon 				 */
5338a84c799SMatthew Dillon 				if (flags & WCSBIN_STRICT) {
5348a84c799SMatthew Dillon 					if (i == 0) {
5358a84c799SMatthew Dillon 						errno = EILSEQ;
5368a84c799SMatthew Dillon 						return ((size_t)-1);
5378a84c799SMatthew Dillon 					}
5388a84c799SMatthew Dillon 					goto breakout;
5398a84c799SMatthew Dillon 				}
5408a84c799SMatthew Dillon 				if (dst)
5418a84c799SMatthew Dillon 					dst[j] = 0xDC00 | (unsigned char)src[i];
5428a84c799SMatthew Dillon 				++j;
5438a84c799SMatthew Dillon 				goto loopup;
5448a84c799SMatthew Dillon 			}
5458a84c799SMatthew Dillon 			wch <<= 6;
5468a84c799SMatthew Dillon 			wch |= ch & 0x3f;
5478a84c799SMatthew Dillon 		}
5488a84c799SMatthew Dillon 
5498a84c799SMatthew Dillon 		/*
5508a84c799SMatthew Dillon 		 * Check validity of the wchar.  If invalid we could escape
5518a84c799SMatthew Dillon 		 * just the first character and loop up, but it ought to be
5528a84c799SMatthew Dillon 		 * more readable if we escape all the chars in the sequence
5538a84c799SMatthew Dillon 		 * (since they are all >= 0x80 and might represent a legacy
5548a84c799SMatthew Dillon 		 * 5-byte or 6-byte code).
5558a84c799SMatthew Dillon 		 */
5568a84c799SMatthew Dillon 		if (wch < lbound ||
557*3d4b9338SJohn Marino 		    ((flags & WCSBIN_LONGCODES) == 0 && wch > 0x10ffff)) {
5588a84c799SMatthew Dillon 			goto forceesc;
5598a84c799SMatthew Dillon 		}
5608a84c799SMatthew Dillon 
5618a84c799SMatthew Dillon 		/*
5628a84c799SMatthew Dillon 		 * Check if wch is a surrogate code (which also encloses our
5638a84c799SMatthew Dillon 		 * UTF-8B escaping range).  This is normally illegal in UTF8.
5648a84c799SMatthew Dillon 		 * If it is, we need to escape each characer in the sequence.
5658a84c799SMatthew Dillon 		 * Breakout if there isn't enough output buffer space.
5668a84c799SMatthew Dillon 		 *
5678a84c799SMatthew Dillon 		 * If (flags & WCSBIN_SURRO) the caller wishes to accept
5688a84c799SMatthew Dillon 		 * surrogate codes, i.e. the input might potentially already
5698a84c799SMatthew Dillon 		 * be escaped UTF8-B or unchecked UTF-16 that was converted
5708a84c799SMatthew Dillon 		 * into UTF-8.
5718a84c799SMatthew Dillon 		 */
5728a84c799SMatthew Dillon 		if ((flags & WCSBIN_SURRO) == 0 &&
5738a84c799SMatthew Dillon 		    wch >= 0xD800 && wch <= 0xDFFF) {
5748a84c799SMatthew Dillon forceesc:
5758a84c799SMatthew Dillon 			if (j + want > dlen)
5768a84c799SMatthew Dillon 				break;
5778a84c799SMatthew Dillon 			if (flags & WCSBIN_STRICT) {
5788a84c799SMatthew Dillon 				if (i == 0) {
5798a84c799SMatthew Dillon 					errno = EILSEQ;
5808a84c799SMatthew Dillon 					return ((size_t)-1);
5818a84c799SMatthew Dillon 				}
5828a84c799SMatthew Dillon 				break;
5838a84c799SMatthew Dillon 			}
5848a84c799SMatthew Dillon 			for (k = 0; k < want; ++k) {
5858a84c799SMatthew Dillon 				if (dst) {
5868a84c799SMatthew Dillon 					dst[j] = 0xDC00 |
5878a84c799SMatthew Dillon 						 (unsigned char)src[i+k];
5888a84c799SMatthew Dillon 				}
5898a84c799SMatthew Dillon 				++j;
5908a84c799SMatthew Dillon 			}
5918a84c799SMatthew Dillon 			i += k - 1;
5928a84c799SMatthew Dillon 		} else {
5938a84c799SMatthew Dillon 			i += k - 1;
5948a84c799SMatthew Dillon 			if (dst)
5958a84c799SMatthew Dillon 				dst[j] = wch;
5968a84c799SMatthew Dillon 			++j;
5978a84c799SMatthew Dillon 		}
5988a84c799SMatthew Dillon loopup:
5998a84c799SMatthew Dillon 		;
6008a84c799SMatthew Dillon 	}
6018a84c799SMatthew Dillon breakout:
6028a84c799SMatthew Dillon 	*slen = i;
6038a84c799SMatthew Dillon 
6048a84c799SMatthew Dillon 	return j;
6058a84c799SMatthew Dillon }
6068a84c799SMatthew Dillon 
6078a84c799SMatthew Dillon static size_t
_UTF8_wcrtombin(char * __restrict dst,const wchar_t * __restrict src,size_t dlen,size_t * slen,int flags)6088a84c799SMatthew Dillon _UTF8_wcrtombin(char * __restrict dst, const wchar_t * __restrict src,
6098a84c799SMatthew Dillon 		size_t dlen, size_t *slen, int flags)
6108a84c799SMatthew Dillon {
6118a84c799SMatthew Dillon 	size_t i;
6128a84c799SMatthew Dillon 	size_t j;
6138a84c799SMatthew Dillon 	size_t k;
6148a84c799SMatthew Dillon 	size_t n = *slen;
6158a84c799SMatthew Dillon 	size_t len;
6168a84c799SMatthew Dillon 	unsigned char lead;
6178a84c799SMatthew Dillon 	wchar_t wc;
6188a84c799SMatthew Dillon 
6198a84c799SMatthew Dillon 	for (i = j = 0; i < n; ++i) {
6208a84c799SMatthew Dillon 		if (j == dlen)
6218a84c799SMatthew Dillon 			break;
6228a84c799SMatthew Dillon 		wc = src[i];
6238a84c799SMatthew Dillon 
6248a84c799SMatthew Dillon 		if ((wc & ~0x7f) == 0) {
6258a84c799SMatthew Dillon 			/* Fast path for plain ASCII characters. */
6268a84c799SMatthew Dillon 			if (dst)
6278a84c799SMatthew Dillon 				dst[j] = (unsigned char)wc;
6288a84c799SMatthew Dillon 			++j;
6298a84c799SMatthew Dillon 			continue;
6308a84c799SMatthew Dillon 		}
6318a84c799SMatthew Dillon 		if ((wc & ~0x7ff) == 0) {
6328a84c799SMatthew Dillon 			lead = 0xc0;
6338a84c799SMatthew Dillon 			len = 2;
6348a84c799SMatthew Dillon 		} else if (wc >= 0xDC80 && wc <= 0xDCFF &&
6358a84c799SMatthew Dillon 			   (flags & WCSBIN_SURRO) == 0) {
6368a84c799SMatthew Dillon 			if (flags & WCSBIN_STRICT) {
6378a84c799SMatthew Dillon 				/*
6388a84c799SMatthew Dillon 				 * STRICT without SURRO is an error for
6398a84c799SMatthew Dillon 				 * surrogates.
6408a84c799SMatthew Dillon 				 */
6418a84c799SMatthew Dillon 				if (i == 0) {
6428a84c799SMatthew Dillon 					errno = EILSEQ;
6438a84c799SMatthew Dillon 					return ((size_t)-1);
6448a84c799SMatthew Dillon 				}
6458a84c799SMatthew Dillon 				break;
6468a84c799SMatthew Dillon 			}
6478a84c799SMatthew Dillon 			if (dst)
6488a84c799SMatthew Dillon 				dst[j] = (unsigned char)wc;
6498a84c799SMatthew Dillon 			++j;
6508a84c799SMatthew Dillon 			continue;
6518a84c799SMatthew Dillon 		} else if ((wc & ~0xffff) == 0) {
6528a84c799SMatthew Dillon 			if (wc >= 0xD800 && wc <= 0xDFFF &&
6538a84c799SMatthew Dillon 			    (flags & (WCSBIN_SURRO | WCSBIN_STRICT)) ==
6548a84c799SMatthew Dillon 			    WCSBIN_STRICT) {
6558a84c799SMatthew Dillon 				/*
6568a84c799SMatthew Dillon 				 * Surrogates in general are an error
6578a84c799SMatthew Dillon 				 * if STRICT is specified and SURRO is not
6588a84c799SMatthew Dillon 				 * specified.
6598a84c799SMatthew Dillon 				 */
6608a84c799SMatthew Dillon 				if (i == 0) {
6618a84c799SMatthew Dillon 					errno = EILSEQ;
6628a84c799SMatthew Dillon 					return ((size_t)-1);
6638a84c799SMatthew Dillon 				}
6648a84c799SMatthew Dillon 				break;
6658a84c799SMatthew Dillon 			}
6668a84c799SMatthew Dillon 			lead = 0xe0;
6678a84c799SMatthew Dillon 			len = 3;
668*3d4b9338SJohn Marino 		} else if (wc <= 0x10ffff) {
6698a84c799SMatthew Dillon 			lead = 0xf0;
6708a84c799SMatthew Dillon 			len = 4;
6718a84c799SMatthew Dillon 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x200000) {
6728a84c799SMatthew Dillon 			/* normally illegal */
6738a84c799SMatthew Dillon 			lead = 0xf0;
6748a84c799SMatthew Dillon 			len = 4;
6758a84c799SMatthew Dillon 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x4000000) {
6768a84c799SMatthew Dillon 			/* normally illegal */
6778a84c799SMatthew Dillon 			lead = 0xf8;
6788a84c799SMatthew Dillon 			len = 5;
6798a84c799SMatthew Dillon 		} else if ((flags & WCSBIN_LONGCODES) &&
6808a84c799SMatthew Dillon 			   (uint32_t)wc < 0x80000000U) {
6818a84c799SMatthew Dillon 			/* normally illegal */
6828a84c799SMatthew Dillon 			lead = 0xfc;
6838a84c799SMatthew Dillon 			len = 6;
6848a84c799SMatthew Dillon 		} else {
6858a84c799SMatthew Dillon 			if (i == 0) {
6868a84c799SMatthew Dillon 				errno = EILSEQ;
6878a84c799SMatthew Dillon 				return ((size_t)-1);
6888a84c799SMatthew Dillon 			}
6898a84c799SMatthew Dillon 			/* stop here, process error on next loop */
6908a84c799SMatthew Dillon 			break;
6918a84c799SMatthew Dillon 		}
6928a84c799SMatthew Dillon 
6938a84c799SMatthew Dillon 		/*
6948a84c799SMatthew Dillon 		 * Output the octets representing the character in chunks
6958a84c799SMatthew Dillon 		 * of 6 bits, least significant last. The first octet is
6968a84c799SMatthew Dillon 		 * a special case because it contains the sequence length
6978a84c799SMatthew Dillon 		 * information.
6988a84c799SMatthew Dillon 		 */
6998a84c799SMatthew Dillon 		if (j + len > dlen)
7008a84c799SMatthew Dillon 			break;
7018a84c799SMatthew Dillon 		k = j;
7028a84c799SMatthew Dillon 		j += len;
7038a84c799SMatthew Dillon 		if (dst) {
7048a84c799SMatthew Dillon 			while (--len > 0) {
7058a84c799SMatthew Dillon 				dst[k + len] = (wc & 0x3f) | 0x80;
7068a84c799SMatthew Dillon 				wc >>= 6;
7078a84c799SMatthew Dillon 			}
7088a84c799SMatthew Dillon 			dst[k] = (wc & 0xff) | lead;
7098a84c799SMatthew Dillon 		}
7108a84c799SMatthew Dillon 	}
7118a84c799SMatthew Dillon 	*slen = i;
7128a84c799SMatthew Dillon 
7138a84c799SMatthew Dillon 	return j;
7148a84c799SMatthew Dillon }
7158a84c799SMatthew Dillon 
7168a84c799SMatthew Dillon size_t
utf8towcr(wchar_t * __restrict dst,const char * __restrict src,size_t dlen,size_t * slen,int flags)7178a84c799SMatthew Dillon utf8towcr(wchar_t * __restrict dst, const char * __restrict src,
7188a84c799SMatthew Dillon 		size_t dlen, size_t *slen, int flags)
7198a84c799SMatthew Dillon {
7208a84c799SMatthew Dillon 	return _UTF8_mbintowcr(dst, src, dlen, slen, flags);
7218a84c799SMatthew Dillon }
7228a84c799SMatthew Dillon 
7238a84c799SMatthew Dillon size_t
wcrtoutf8(char * __restrict dst,const wchar_t * __restrict src,size_t dlen,size_t * slen,int flags)7248a84c799SMatthew Dillon wcrtoutf8(char * __restrict dst, const wchar_t * __restrict src,
7258a84c799SMatthew Dillon 	  size_t dlen, size_t *slen, int flags)
7268a84c799SMatthew Dillon {
7278a84c799SMatthew Dillon 	return _UTF8_wcrtombin(dst, src, dlen, slen, flags);
7288a84c799SMatthew Dillon }
729