xref: /netbsd-src/lib/libc/gen/vis.c (revision a5c0af2445ce57395d99f8dbf153491f4cbf5cbb)
1 /*	$NetBSD: vis.c,v 1.88 2024/03/17 21:48:02 andvar Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55  * POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.88 2024/03/17 21:48:02 andvar Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 #ifdef __FBSDID
63 __FBSDID("$FreeBSD$");
64 #define	_DIAGASSERT(x)	assert(x)
65 #endif
66 
67 #include "namespace.h"
68 
69 #include <sys/param.h>
70 #include <sys/types.h>
71 
72 #include <assert.h>
73 #include <errno.h>
74 #include <stdint.h>
75 #include <stdlib.h>
76 #include <vis.h>
77 #include <wchar.h>
78 #include <wctype.h>
79 
80 #ifdef __weak_alias
81 __weak_alias(strvisx,_strvisx)
82 #endif
83 
84 #if !HAVE_VIS || !HAVE_SVIS
85 #include <ctype.h>
86 #include <limits.h>
87 #include <stdio.h>
88 #include <string.h>
89 
90 /*
91  * The reason for going through the trouble to deal with character encodings
92  * in vis(3), is that we use this to safe encode output of commands. This
93  * safe encoding varies depending on the character set. For example if we
94  * display ps output in French, we don't want to display French characters
95  * as M-foo.
96  */
97 
98 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
99 
100 #undef BELL
101 #define BELL L'\a'
102 
103 #if defined(LC_C_LOCALE)
104 #define iscgraph(c)      isgraph_l(c, LC_C_LOCALE)
105 #else
106 /* Keep it simple for now, no locale stuff */
107 #define iscgraph(c)	isgraph(c)
108 #ifdef notyet
109 #include <locale.h>
110 static int
iscgraph(int c)111 iscgraph(int c) {
112 	int rv;
113 	char *ol;
114 
115 	ol = setlocale(LC_CTYPE, "C");
116 	rv = isgraph(c);
117 	if (ol)
118 		setlocale(LC_CTYPE, ol);
119 	return rv;
120 }
121 #endif
122 #endif
123 
124 #define ISGRAPH(flags, c) \
125     (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c))
126 
127 #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
128 #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
129 #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
130 #define xtoa(c)		L"0123456789abcdef"[c]
131 #define XTOA(c)		L"0123456789ABCDEF"[c]
132 
133 #define MAXEXTRAS	30
134 
135 static const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~";
136 static const wchar_t char_glob[] = L"*?[#";
137 
138 #if !HAVE_NBTOOL_CONFIG_H
139 #ifndef __NetBSD__
140 /*
141  * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
142  * integral type and it is probably wrong, since currently the maximum
143  * number of bytes and character needs is 6. Until this is fixed, the
144  * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
145  * the assertion is commented out.
146  */
147 #ifdef __FreeBSD__
148 /*
149  * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
150  * mode.
151  */
152 #ifndef CTASSERT
153 #define CTASSERT(x)             _CTASSERT(x, __LINE__)
154 #define _CTASSERT(x, y)         __CTASSERT(x, y)
155 #define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
156 #endif
157 #endif /* __FreeBSD__ */
158 CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
159 #endif /* !__NetBSD__ */
160 #endif
161 
162 /*
163  * This is do_hvis, for HTTP style (RFC 1808)
164  */
165 static wchar_t *
do_hvis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)166 do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
167 {
168 	if (iswalnum(c)
169 	    /* safe */
170 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
171 	    /* extra */
172 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
173 	    || c == L',')
174 		dst = do_svis(dst, c, flags, nextc, extra);
175 	else {
176 		*dst++ = L'%';
177 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
178 		*dst++ = xtoa((unsigned int)c & 0xf);
179 	}
180 
181 	return dst;
182 }
183 
184 /*
185  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
186  * NB: No handling of long lines or CRLF.
187  */
188 static wchar_t *
do_mvis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)189 do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
190 {
191 	if ((c != L'\n') &&
192 	    /* Space at the end of the line */
193 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
194 	    /* Out of range */
195 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
196 	    /* Specific char to be escaped */
197 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
198 		*dst++ = L'=';
199 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
200 		*dst++ = XTOA((unsigned int)c & 0xf);
201 	} else
202 		dst = do_svis(dst, c, flags, nextc, extra);
203 	return dst;
204 }
205 
206 /*
207  * Output single byte of multibyte character.
208  */
209 static wchar_t *
do_mbyte(wchar_t * dst,wint_t c,int flags,wint_t nextc,int iswextra)210 do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
211 {
212 	if (flags & VIS_CSTYLE) {
213 		switch (c) {
214 		case L'\n':
215 			*dst++ = L'\\'; *dst++ = L'n';
216 			return dst;
217 		case L'\r':
218 			*dst++ = L'\\'; *dst++ = L'r';
219 			return dst;
220 		case L'\b':
221 			*dst++ = L'\\'; *dst++ = L'b';
222 			return dst;
223 		case BELL:
224 			*dst++ = L'\\'; *dst++ = L'a';
225 			return dst;
226 		case L'\v':
227 			*dst++ = L'\\'; *dst++ = L'v';
228 			return dst;
229 		case L'\t':
230 			*dst++ = L'\\'; *dst++ = L't';
231 			return dst;
232 		case L'\f':
233 			*dst++ = L'\\'; *dst++ = L'f';
234 			return dst;
235 		case L' ':
236 			*dst++ = L'\\'; *dst++ = L's';
237 			return dst;
238 		case L'\0':
239 			*dst++ = L'\\'; *dst++ = L'0';
240 			if (iswoctal(nextc)) {
241 				*dst++ = L'0';
242 				*dst++ = L'0';
243 			}
244 			return dst;
245 		/* We cannot encode these characters in VIS_CSTYLE
246 		 * because they special meaning */
247 		case L'n':
248 		case L'r':
249 		case L'b':
250 		case L'a':
251 		case L'v':
252 		case L't':
253 		case L'f':
254 		case L's':
255 		case L'0':
256 		case L'M':
257 		case L'^':
258 		case L'$': /* vis(1) -l */
259 			break;
260 		default:
261 			if (ISGRAPH(flags, c) && !iswoctal(c)) {
262 				*dst++ = L'\\';
263 				*dst++ = c;
264 				return dst;
265 			}
266 		}
267 	}
268 	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
269 		*dst++ = L'\\';
270 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
271 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
272 		*dst++ =			     (c	      & 07) + L'0';
273 	} else {
274 		if ((flags & VIS_NOSLASH) == 0)
275 			*dst++ = L'\\';
276 
277 		if (c & 0200) {
278 			c &= 0177;
279 			*dst++ = L'M';
280 		}
281 
282 		if (iswcntrl(c)) {
283 			*dst++ = L'^';
284 			if (c == 0177)
285 				*dst++ = L'?';
286 			else
287 				*dst++ = c + L'@';
288 		} else {
289 			*dst++ = L'-';
290 			*dst++ = c;
291 		}
292 	}
293 
294 	return dst;
295 }
296 
297 /*
298  * This is do_vis, the central code of vis.
299  * dst:	      Pointer to the destination buffer
300  * c:	      Character to encode
301  * flags:     Flags word
302  * nextc:     The character following 'c'
303  * extra:     Pointer to the list of extra characters to be
304  *	      backslash-protected.
305  */
306 static wchar_t *
do_svis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)307 do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
308 {
309 	int iswextra, i, shft;
310 	uint64_t bmsk, wmsk;
311 
312 	iswextra = wcschr(extra, c) != NULL;
313 	if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) ||
314 	    ((flags & VIS_SAFE) && iswsafe(c)))) {
315 		*dst++ = c;
316 		return dst;
317 	}
318 
319 	/* See comment in istrsenvisx() output loop, below. */
320 	wmsk = 0;
321 	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
322 		shft = i * NBBY;
323 		bmsk = (uint64_t)0xffLL << shft;
324 		wmsk |= bmsk;
325 		if ((c & wmsk) || i == 0)
326 			dst = do_mbyte(dst, (wint_t)(
327 			    (uint64_t)(c & bmsk) >> shft),
328 			    flags, nextc, iswextra);
329 	}
330 
331 	return dst;
332 }
333 
334 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
335 
336 /*
337  * Return the appropriate encoding function depending on the flags given.
338  */
339 static visfun_t
getvisfun(int flags)340 getvisfun(int flags)
341 {
342 	if (flags & VIS_HTTPSTYLE)
343 		return do_hvis;
344 	if (flags & VIS_MIMESTYLE)
345 		return do_mvis;
346 	return do_svis;
347 }
348 
349 /*
350  * Expand list of extra characters to not visually encode.
351  */
352 static wchar_t *
makeextralist(int flags,const char * src)353 makeextralist(int flags, const char *src)
354 {
355 	wchar_t *dst, *d;
356 	size_t len;
357 	const wchar_t *s;
358 	mbstate_t mbstate;
359 
360 	len = strlen(src);
361 	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
362 		return NULL;
363 
364 	memset(&mbstate, 0, sizeof(mbstate));
365 	if ((flags & VIS_NOLOCALE)
366 	    || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) {
367 		size_t i;
368 		for (i = 0; i < len; i++)
369 			dst[i] = (wchar_t)(u_char)src[i];
370 		d = dst + len;
371 	} else
372 		d = dst + wcslen(dst);
373 
374 	if (flags & VIS_GLOB)
375 		for (s = char_glob; *s; *d++ = *s++)
376 			continue;
377 
378 	if (flags & VIS_SHELL)
379 		for (s = char_shell; *s; *d++ = *s++)
380 			continue;
381 
382 	if (flags & VIS_SP) *d++ = L' ';
383 	if (flags & VIS_TAB) *d++ = L'\t';
384 	if (flags & VIS_NL) *d++ = L'\n';
385 	if (flags & VIS_DQ) *d++ = L'"';
386 	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
387 	*d = L'\0';
388 
389 	return dst;
390 }
391 
392 /*
393  * istrsenvisx()
394  * 	The main internal function.
395  *	All user-visible functions call this one.
396  */
397 static int
istrsenvisx(char ** mbdstp,size_t * dlen,const char * mbsrc,size_t mblength,int flags,const char * mbextra,int * cerr_ptr)398 istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
399     int flags, const char *mbextra, int *cerr_ptr)
400 {
401 	char mbbuf[MB_LEN_MAX];
402 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
403 	size_t len, olen;
404 	uint64_t bmsk, wmsk;
405 	wint_t c;
406 	visfun_t f;
407 	int cerr, error = -1, i, shft;
408 	ssize_t clen = 0;
409 	char *mbdst, *mbwrite, *mdst;
410 	size_t mbslength;
411 	size_t maxolen;
412 	mbstate_t mbstate;
413 
414 	_DIAGASSERT(mbdstp != NULL);
415 	_DIAGASSERT(mbsrc != NULL || mblength == 0);
416 	_DIAGASSERT(mbextra != NULL);
417 
418 	mbslength = mblength;
419 	/*
420 	 * When inputing a single character, must also read in the
421 	 * next character for nextc, the look-ahead character.
422 	 */
423 	if (mbslength == 1)
424 		mbslength++;
425 
426 	/*
427 	 * Input (mbsrc) is a char string considered to be multibyte
428 	 * characters.  The input loop will read this string pulling
429 	 * one character, possibly multiple bytes, from mbsrc and
430 	 * converting each to wchar_t in src.
431 	 *
432 	 * The vis conversion will be done using the wide char
433 	 * wchar_t string.
434 	 *
435 	 * This will then be converted back to a multibyte string to
436 	 * return to the caller.
437 	 */
438 
439 	/*
440 	 * Guarantee the arithmetic on input to calloc won't overflow.
441 	 */
442 	if (mbslength > (SIZE_MAX - 1)/16) {
443 		errno = ENOMEM;
444 		return -1;
445 	}
446 
447 	/* Allocate space for the wide char strings */
448 	psrc = pdst = extra = NULL;
449 	mdst = NULL;
450 	if ((psrc = calloc(mbslength + 1, sizeof(*psrc))) == NULL)
451 		return -1;
452 	if ((pdst = calloc((16 * mbslength) + 1, sizeof(*pdst))) == NULL)
453 		goto out;
454 	if (*mbdstp == NULL) {
455 		if ((mdst = calloc((16 * mbslength) + 1, sizeof(*mdst))) == NULL)
456 			goto out;
457 		*mbdstp = mdst;
458 	}
459 
460 	mbdst = *mbdstp;
461 	dst = pdst;
462 	src = psrc;
463 
464 	if (flags & VIS_NOLOCALE) {
465 		/* Do one byte at a time conversion */
466 		cerr = 1;
467 	} else {
468 		/* Use caller's multibyte conversion error flag. */
469 		cerr = cerr_ptr ? *cerr_ptr : 0;
470 	}
471 
472 	/*
473 	 * Input loop.
474 	 * Handle up to mblength characters (not bytes).  We do not
475 	 * stop at NULs because we may be processing a block of data
476 	 * that includes NULs.
477 	 */
478 	memset(&mbstate, 0, sizeof(mbstate));
479 	while (mbslength > 0) {
480 		/* Convert one multibyte character to wchar_t. */
481 		if (!cerr) {
482 			clen = (ssize_t)mbrtowc(src, mbsrc,
483 			    (mbslength < MB_LEN_MAX
484 				? mbslength
485 				: MB_LEN_MAX),
486 			    &mbstate);
487 			assert(clen < 0 || (size_t)clen <= mbslength);
488 			assert(clen <= MB_LEN_MAX);
489 		}
490 		if (cerr || clen < 0) {
491 			/* Conversion error, process as a byte instead. */
492 			*src = (wint_t)(u_char)*mbsrc;
493 			clen = 1;
494 			cerr = 1;
495 		}
496 		if (clen == 0) {
497 			/*
498 			 * NUL in input gives 0 return value. process
499 			 * as single NUL byte and keep going.
500 			 */
501 			clen = 1;
502 		}
503 		/*
504 		 * Let n := MIN(mbslength, MB_LEN_MAX).  We have:
505 		 *
506 		 *	mbslength >= 1
507 		 *	mbrtowc(..., n, &mbstate) <= n,
508 		 *		by the contract of mbrtowc
509 		 *
510 		 *  clen is either
511 		 *  (a) mbrtowc(..., n, &mbstate), in which case
512 		 *      clen <= n <= mbslength; or
513 		 *  (b) 1, in which case clen = 1 <= mbslength.
514 		 */
515 		assert(clen > 0);
516 		assert((size_t)clen <= mbslength);
517 		/* Advance buffer character pointer. */
518 		src++;
519 		/* Advance input pointer by number of bytes read. */
520 		mbsrc += clen;
521 		/* Decrement input byte count. */
522 		mbslength -= clen;
523 	}
524 	len = src - psrc;
525 	src = psrc;
526 
527 	/*
528 	 * In the single character input case, we will have actually
529 	 * processed two characters, c and nextc.  Reset len back to
530 	 * just a single character.
531 	 */
532 	if (mblength < len)
533 		len = mblength;
534 
535 	/* Convert extra argument to list of characters for this mode. */
536 	extra = makeextralist(flags, mbextra);
537 	if (!extra) {
538 		if (dlen && *dlen == 0) {
539 			errno = ENOSPC;
540 			goto out;
541 		}
542 		*mbdst = '\0';	/* can't create extra, return "" */
543 		error = 0;
544 		goto out;
545 	}
546 
547 	/* Look up which processing function to call. */
548 	f = getvisfun(flags);
549 
550 	/*
551 	 * Main processing loop.
552 	 * Call do_Xvis processing function one character at a time
553 	 * with next character available for look-ahead.
554 	 */
555 	for (start = dst; len > 0; len--) {
556 		c = *src++;
557 		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
558 		if (dst == NULL) {
559 			errno = ENOSPC;
560 			goto out;
561 		}
562 	}
563 
564 	/* Terminate the string in the buffer. */
565 	*dst = L'\0';
566 
567 	/*
568 	 * Output loop.
569 	 * Convert wchar_t string back to multibyte output string.
570 	 * If we have hit a multi-byte conversion error on input,
571 	 * output byte-by-byte here.  Else use wctomb().
572 	 */
573 	len = wcslen(start);
574 	if (dlen) {
575 		maxolen = *dlen;
576 		if (maxolen == 0) {
577 			errno = ENOSPC;
578 			goto out;
579 		}
580 	} else {
581 		if (len > (SIZE_MAX - 1)/MB_LEN_MAX) {
582 			errno = ENOSPC;
583 			goto out;
584 		}
585 		maxolen = len*MB_LEN_MAX + 1;
586 	}
587 	olen = 0;
588 	memset(&mbstate, 0, sizeof(mbstate));
589 	for (dst = start; len > 0; len--) {
590 		if (!cerr) {
591 			/*
592 			 * If we have at least MB_CUR_MAX bytes in the buffer,
593 			 * we'll just do the conversion in-place into mbdst.  We
594 			 * need to be a little more conservative when we get to
595 			 * the end of the buffer, as we may not have MB_CUR_MAX
596 			 * bytes but we may not need it.
597 			 */
598 			if (maxolen - olen > MB_CUR_MAX)
599 				mbwrite = mbdst;
600 			else
601 				mbwrite = mbbuf;
602 			clen = (ssize_t)wcrtomb(mbwrite, *dst, &mbstate);
603 			if (clen > 0 && mbwrite != mbdst) {
604 				/*
605 				 * Don't break past our output limit, noting
606 				 * that maxolen includes the nul terminator so
607 				 * we can't write past maxolen - 1 here.
608 				 */
609 				if (olen + clen >= maxolen) {
610 					errno = ENOSPC;
611 					goto out;
612 				}
613 
614 				memcpy(mbdst, mbwrite, clen);
615 			}
616 		}
617 		if (cerr || clen < 0) {
618 			/*
619 			 * Conversion error, process as a byte(s) instead.
620 			 * Examine each byte and higher-order bytes for
621 			 * data.  E.g.,
622 			 *	0x000000000000a264 -> a2 64
623 			 *	0x000000001f00a264 -> 1f 00 a2 64
624 			 */
625 			clen = 0;
626 			wmsk = 0;
627 			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
628 				shft = i * NBBY;
629 				bmsk = (uint64_t)0xffLL << shft;
630 				wmsk |= bmsk;
631 				if ((*dst & wmsk) || i == 0) {
632 					if (olen + clen + 1 >= maxolen) {
633 						errno = ENOSPC;
634 						goto out;
635 					}
636 
637 					mbdst[clen++] = (char)(
638 					    (uint64_t)(*dst & bmsk) >>
639 					    shft);
640 				}
641 			}
642 			cerr = 1;
643 		}
644 
645 		/*
646 		 * We'll be dereferencing mbdst[clen] after this to write the
647 		 * nul terminator; the above paths should have checked for a
648 		 * possible overflow already.
649 		 */
650 		assert(olen + clen < maxolen);
651 
652 		/* Advance output pointer by number of bytes written. */
653 		mbdst += clen;
654 		/* Advance buffer character pointer. */
655 		dst++;
656 		/* Increment output character count. */
657 		olen += clen;
658 	}
659 
660 	/* Terminate the output string. */
661 	assert(olen < maxolen);
662 	*mbdst = '\0';
663 
664 	if (flags & VIS_NOLOCALE) {
665 		/* Pass conversion error flag out. */
666 		if (cerr_ptr)
667 			*cerr_ptr = cerr;
668 	}
669 
670 	free(extra);
671 	free(pdst);
672 	free(psrc);
673 
674 	return (int)olen;
675 out:
676 	free(extra);
677 	free(pdst);
678 	free(psrc);
679 	free(mdst);
680 	return error;
681 }
682 
683 static int
istrsenvisxl(char ** mbdstp,size_t * dlen,const char * mbsrc,int flags,const char * mbextra,int * cerr_ptr)684 istrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc,
685     int flags, const char *mbextra, int *cerr_ptr)
686 {
687 	return istrsenvisx(mbdstp, dlen, mbsrc,
688 	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
689 }
690 
691 #endif
692 
693 #if !HAVE_SVIS
694 /*
695  *	The "svis" variants all take an "extra" arg that is a pointer
696  *	to a NUL-terminated list of characters to be encoded, too.
697  *	These functions are useful e. g. to encode strings in such a
698  *	way so that they are not interpreted by a shell.
699  */
700 
701 char *
svis(char * mbdst,int c,int flags,int nextc,const char * mbextra)702 svis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
703 {
704 	char cc[2];
705 	int ret;
706 
707 	cc[0] = c;
708 	cc[1] = nextc;
709 
710 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL);
711 	if (ret < 0)
712 		return NULL;
713 	return mbdst + ret;
714 }
715 
716 char *
snvis(char * mbdst,size_t dlen,int c,int flags,int nextc,const char * mbextra)717 snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
718 {
719 	char cc[2];
720 	int ret;
721 
722 	cc[0] = c;
723 	cc[1] = nextc;
724 
725 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL);
726 	if (ret < 0)
727 		return NULL;
728 	return mbdst + ret;
729 }
730 
731 int
strsvis(char * mbdst,const char * mbsrc,int flags,const char * mbextra)732 strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
733 {
734 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL);
735 }
736 
737 int
strsnvis(char * mbdst,size_t dlen,const char * mbsrc,int flags,const char * mbextra)738 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
739 {
740 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL);
741 }
742 
743 int
strsvisx(char * mbdst,const char * mbsrc,size_t len,int flags,const char * mbextra)744 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
745 {
746 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
747 }
748 
749 int
strsnvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,const char * mbextra)750 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
751     const char *mbextra)
752 {
753 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
754 }
755 
756 int
strsenvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,const char * mbextra,int * cerr_ptr)757 strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
758     const char *mbextra, int *cerr_ptr)
759 {
760 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
761 }
762 #endif
763 
764 #if !HAVE_VIS
765 /*
766  * vis - visually encode characters
767  */
768 char *
vis(char * mbdst,int c,int flags,int nextc)769 vis(char *mbdst, int c, int flags, int nextc)
770 {
771 	char cc[2];
772 	int ret;
773 
774 	cc[0] = c;
775 	cc[1] = nextc;
776 
777 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL);
778 	if (ret < 0)
779 		return NULL;
780 	return mbdst + ret;
781 }
782 
783 char *
nvis(char * mbdst,size_t dlen,int c,int flags,int nextc)784 nvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
785 {
786 	char cc[2];
787 	int ret;
788 
789 	cc[0] = c;
790 	cc[1] = nextc;
791 
792 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL);
793 	if (ret < 0)
794 		return NULL;
795 	return mbdst + ret;
796 }
797 
798 /*
799  * strvis - visually encode characters from src into dst
800  *
801  *	Dst must be 4 times the size of src to account for possible
802  *	expansion.  The length of dst, not including the trailing NULL,
803  *	is returned.
804  */
805 
806 int
strvis(char * mbdst,const char * mbsrc,int flags)807 strvis(char *mbdst, const char *mbsrc, int flags)
808 {
809 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL);
810 }
811 
812 int
strnvis(char * mbdst,size_t dlen,const char * mbsrc,int flags)813 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
814 {
815 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL);
816 }
817 
818 int
stravis(char ** mbdstp,const char * mbsrc,int flags)819 stravis(char **mbdstp, const char *mbsrc, int flags)
820 {
821 	*mbdstp = NULL;
822 	return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL);
823 }
824 
825 /*
826  * strvisx - visually encode characters from src into dst
827  *
828  *	Dst must be 4 times the size of src to account for possible
829  *	expansion.  The length of dst, not including the trailing NULL,
830  *	is returned.
831  *
832  *	Strvisx encodes exactly len characters from src into dst.
833  *	This is useful for encoding a block of data.
834  */
835 
836 int
strvisx(char * mbdst,const char * mbsrc,size_t len,int flags)837 strvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
838 {
839 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL);
840 }
841 
842 int
strnvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags)843 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
844 {
845 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL);
846 }
847 
848 int
strenvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,int * cerr_ptr)849 strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
850     int *cerr_ptr)
851 {
852 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
853 }
854 #endif
855