xref: /netbsd-src/lib/libc/gen/vis.c (revision a536ee5124e62c9a0051a252f7833dc8f50f44c9)
1 /*	$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55  * POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 
63 #include "namespace.h"
64 #include <sys/types.h>
65 
66 #include <assert.h>
67 #include <vis.h>
68 #include <errno.h>
69 #include <stdlib.h>
70 
71 #ifdef __weak_alias
72 __weak_alias(strvisx,_strvisx)
73 #endif
74 
75 #if !HAVE_VIS || !HAVE_SVIS
76 #include <ctype.h>
77 #include <limits.h>
78 #include <stdio.h>
79 #include <string.h>
80 
81 static char *do_svis(char *, size_t *, int, int, int, const char *);
82 
83 #undef BELL
84 #define BELL '\a'
85 
86 #define isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
87 #define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')
88 #define issafe(c)	(c == '\b' || c == BELL || c == '\r')
89 #define xtoa(c)		"0123456789abcdef"[c]
90 #define XTOA(c)		"0123456789ABCDEF"[c]
91 
92 #define MAXEXTRAS	9
93 
94 #define MAKEEXTRALIST(flag, extra, orig_str)				      \
95 do {									      \
96 	const char *orig = orig_str;					      \
97 	const char *o = orig;						      \
98 	char *e;							      \
99 	while (*o++)							      \
100 		continue;						      \
101 	extra = malloc((size_t)((o - orig) + MAXEXTRAS));		      \
102 	if (!extra) break;						      \
103 	for (o = orig, e = extra; (*e++ = *o++) != '\0';)		      \
104 		continue;						      \
105 	e--;								      \
106 	if (flag & VIS_GLOB) {						      \
107 		*e++ = '*';						      \
108 		*e++ = '?';						      \
109 		*e++ = '[';						      \
110 		*e++ = '#';						      \
111 	}								      \
112 	if (flag & VIS_SP) *e++ = ' ';					      \
113 	if (flag & VIS_TAB) *e++ = '\t';				      \
114 	if (flag & VIS_NL) *e++ = '\n';					      \
115 	if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';			      \
116 	*e = '\0';							      \
117 } while (/*CONSTCOND*/0)
118 
119 /*
120  * This is do_hvis, for HTTP style (RFC 1808)
121  */
122 static char *
123 do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
124 {
125 
126 	if ((isascii(c) && isalnum(c))
127 	    /* safe */
128 	    || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
129 	    /* extra */
130 	    || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
131 	    || c == ',') {
132 		dst = do_svis(dst, dlen, c, flag, nextc, extra);
133 	} else {
134 		if (dlen) {
135 			if (*dlen < 3)
136 				return NULL;
137 			*dlen -= 3;
138 		}
139 		*dst++ = '%';
140 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
141 		*dst++ = xtoa((unsigned int)c & 0xf);
142 	}
143 
144 	return dst;
145 }
146 
147 /*
148  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
149  * NB: No handling of long lines or CRLF.
150  */
151 static char *
152 do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
153 {
154 	if ((c != '\n') &&
155 	    /* Space at the end of the line */
156 	    ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
157 	    /* Out of range */
158 	    (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
159 	    /* Specific char to be escaped */
160 	    strchr("#$@[\\]^`{|}~", c) != NULL)) {
161 		if (dlen) {
162 			if (*dlen < 3)
163 				return NULL;
164 			*dlen -= 3;
165 		}
166 		*dst++ = '=';
167 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
168 		*dst++ = XTOA((unsigned int)c & 0xf);
169 	} else {
170 		dst = do_svis(dst, dlen, c, flag, nextc, extra);
171 	}
172 	return dst;
173 }
174 
175 /*
176  * This is do_vis, the central code of vis.
177  * dst:	      Pointer to the destination buffer
178  * c:	      Character to encode
179  * flag:      Flag word
180  * nextc:     The character following 'c'
181  * extra:     Pointer to the list of extra characters to be
182  *	      backslash-protected.
183  */
184 static char *
185 do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
186 {
187 	int isextra;
188 	size_t odlen = dlen ? *dlen : 0;
189 
190 	isextra = strchr(extra, c) != NULL;
191 #define HAVE(x) \
192 	do { \
193 		if (dlen) { \
194 			if (*dlen < (x)) \
195 				goto out; \
196 			*dlen -= (x); \
197 		} \
198 	} while (/*CONSTCOND*/0)
199 	if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
200 	    ((flag & VIS_SAFE) && issafe(c)))) {
201 		HAVE(1);
202 		*dst++ = c;
203 		return dst;
204 	}
205 	if (flag & VIS_CSTYLE) {
206 		HAVE(2);
207 		switch (c) {
208 		case '\n':
209 			*dst++ = '\\'; *dst++ = 'n';
210 			return dst;
211 		case '\r':
212 			*dst++ = '\\'; *dst++ = 'r';
213 			return dst;
214 		case '\b':
215 			*dst++ = '\\'; *dst++ = 'b';
216 			return dst;
217 		case BELL:
218 			*dst++ = '\\'; *dst++ = 'a';
219 			return dst;
220 		case '\v':
221 			*dst++ = '\\'; *dst++ = 'v';
222 			return dst;
223 		case '\t':
224 			*dst++ = '\\'; *dst++ = 't';
225 			return dst;
226 		case '\f':
227 			*dst++ = '\\'; *dst++ = 'f';
228 			return dst;
229 		case ' ':
230 			*dst++ = '\\'; *dst++ = 's';
231 			return dst;
232 		case '\0':
233 			*dst++ = '\\'; *dst++ = '0';
234 			if (isoctal(nextc)) {
235 				HAVE(2);
236 				*dst++ = '0';
237 				*dst++ = '0';
238 			}
239 			return dst;
240 		default:
241 			if (isgraph(c)) {
242 				*dst++ = '\\'; *dst++ = c;
243 				return dst;
244 			}
245 			if (dlen)
246 				*dlen = odlen;
247 		}
248 	}
249 	if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
250 		HAVE(4);
251 		*dst++ = '\\';
252 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
253 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
254 		*dst++ =			     (c	      & 07) + '0';
255 	} else {
256 		if ((flag & VIS_NOSLASH) == 0) {
257 			HAVE(1);
258 			*dst++ = '\\';
259 		}
260 
261 		if (c & 0200) {
262 			HAVE(1);
263 			c &= 0177; *dst++ = 'M';
264 		}
265 
266 		if (iscntrl(c)) {
267 			HAVE(2);
268 			*dst++ = '^';
269 			if (c == 0177)
270 				*dst++ = '?';
271 			else
272 				*dst++ = c + '@';
273 		} else {
274 			HAVE(2);
275 			*dst++ = '-'; *dst++ = c;
276 		}
277 	}
278 	return dst;
279 out:
280 	*dlen = odlen;
281 	return NULL;
282 }
283 
284 typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
285 
286 /*
287  * Return the appropriate encoding function depending on the flags given.
288  */
289 static visfun_t
290 getvisfun(int flag)
291 {
292 	if (flag & VIS_HTTPSTYLE)
293 		return do_hvis;
294 	if (flag & VIS_MIMESTYLE)
295 		return do_mvis;
296 	return do_svis;
297 }
298 
299 /*
300  * isnvis - visually encode characters, also encoding the characters
301  *	  pointed to by `extra'
302  */
303 static char *
304 isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
305 {
306 	char *nextra = NULL;
307 	visfun_t f;
308 
309 	_DIAGASSERT(dst != NULL);
310 	_DIAGASSERT(extra != NULL);
311 	MAKEEXTRALIST(flag, nextra, extra);
312 	if (!nextra) {
313 		if (dlen && *dlen == 0) {
314 			errno = ENOSPC;
315 			return NULL;
316 		}
317 		*dst = '\0';		/* can't create nextra, return "" */
318 		return dst;
319 	}
320 	f = getvisfun(flag);
321 	dst = (*f)(dst, dlen, c, flag, nextc, nextra);
322 	free(nextra);
323 	if (dst == NULL || (dlen && *dlen == 0)) {
324 		errno = ENOSPC;
325 		return NULL;
326 	}
327 	*dst = '\0';
328 	return dst;
329 }
330 
331 char *
332 svis(char *dst, int c, int flag, int nextc, const char *extra)
333 {
334 	return isnvis(dst, NULL, c, flag, nextc, extra);
335 }
336 
337 char *
338 snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
339 {
340 	return isnvis(dst, &dlen, c, flag, nextc, extra);
341 }
342 
343 
344 /*
345  * strsvis, strsvisx - visually encode characters from src into dst
346  *
347  *	Extra is a pointer to a \0-terminated list of characters to
348  *	be encoded, too. These functions are useful e. g. to
349  *	encode strings in such a way so that they are not interpreted
350  *	by a shell.
351  *
352  *	Dst must be 4 times the size of src to account for possible
353  *	expansion.  The length of dst, not including the trailing NULL,
354  *	is returned.
355  *
356  *	Strsvisx encodes exactly len bytes from src into dst.
357  *	This is useful for encoding a block of data.
358  */
359 static int
360 istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
361 {
362 	int c;
363 	char *start;
364 	char *nextra = NULL;
365 	const unsigned char *src = (const unsigned char *)csrc;
366 	visfun_t f;
367 
368 	_DIAGASSERT(dst != NULL);
369 	_DIAGASSERT(src != NULL);
370 	_DIAGASSERT(extra != NULL);
371 	MAKEEXTRALIST(flag, nextra, extra);
372 	if (!nextra) {
373 		*dst = '\0';		/* can't create nextra, return "" */
374 		return 0;
375 	}
376 	f = getvisfun(flag);
377 	for (start = dst; (c = *src++) != '\0'; /* empty */) {
378 		dst = (*f)(dst, dlen, c, flag, *src, nextra);
379 		if (dst == NULL) {
380 			errno = ENOSPC;
381 			return -1;
382 		}
383 	}
384 	free(nextra);
385 	if (dlen && *dlen == 0) {
386 		errno = ENOSPC;
387 		return -1;
388 	}
389 	*dst = '\0';
390 	return (int)(dst - start);
391 }
392 
393 int
394 strsvis(char *dst, const char *csrc, int flag, const char *extra)
395 {
396 	return istrsnvis(dst, NULL, csrc, flag, extra);
397 }
398 
399 int
400 strsnvis(char *dst, size_t dlen, const char *csrc, int flag, const char *extra)
401 {
402 	return istrsnvis(dst, &dlen, csrc, flag, extra);
403 }
404 
405 static int
406 istrsnvisx(char *dst, size_t *dlen, const char *csrc, size_t len, int flag,
407     const char *extra)
408 {
409 	unsigned char c;
410 	char *start;
411 	char *nextra = NULL;
412 	const unsigned char *src = (const unsigned char *)csrc;
413 	visfun_t f;
414 
415 	_DIAGASSERT(dst != NULL);
416 	_DIAGASSERT(src != NULL);
417 	_DIAGASSERT(extra != NULL);
418 	MAKEEXTRALIST(flag, nextra, extra);
419 	if (! nextra) {
420 		if (dlen && *dlen == 0) {
421 			errno = ENOSPC;
422 			return -1;
423 		}
424 		*dst = '\0';		/* can't create nextra, return "" */
425 		return 0;
426 	}
427 
428 	f = getvisfun(flag);
429 	for (start = dst; len > 0; len--) {
430 		c = *src++;
431 		dst = (*f)(dst, dlen, c, flag, len > 1 ? *src : '\0', nextra);
432 		if (dst == NULL) {
433 			errno = ENOSPC;
434 			return -1;
435 		}
436 	}
437 	free(nextra);
438 	if (dlen && *dlen == 0) {
439 		errno = ENOSPC;
440 		return -1;
441 	}
442 	*dst = '\0';
443 	return (int)(dst - start);
444 }
445 
446 int
447 strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra)
448 {
449 	return istrsnvisx(dst, NULL, csrc, len, flag, extra);
450 }
451 
452 int
453 strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag,
454     const char *extra)
455 {
456 	return istrsnvisx(dst, &dlen, csrc, len, flag, extra);
457 }
458 #endif
459 
460 #if !HAVE_VIS
461 /*
462  * vis - visually encode characters
463  */
464 static char *
465 invis(char *dst, size_t *dlen, int c, int flag, int nextc)
466 {
467 	char *extra = NULL;
468 	unsigned char uc = (unsigned char)c;
469 	visfun_t f;
470 
471 	_DIAGASSERT(dst != NULL);
472 
473 	MAKEEXTRALIST(flag, extra, "");
474 	if (! extra) {
475 		if (dlen && *dlen == 0) {
476 			errno = ENOSPC;
477 			return NULL;
478 		}
479 		*dst = '\0';		/* can't create extra, return "" */
480 		return dst;
481 	}
482 	f = getvisfun(flag);
483 	dst = (*f)(dst, dlen, uc, flag, nextc, extra);
484 	free(extra);
485 	if (dst == NULL || (dlen && *dlen == 0)) {
486 		errno = ENOSPC;
487 		return NULL;
488 	}
489 	*dst = '\0';
490 	return dst;
491 }
492 
493 char *
494 vis(char *dst, int c, int flag, int nextc)
495 {
496 	return invis(dst, NULL, c, flag, nextc);
497 }
498 
499 char *
500 nvis(char *dst, size_t dlen, int c, int flag, int nextc)
501 {
502 	return invis(dst, &dlen, c, flag, nextc);
503 }
504 
505 
506 /*
507  * strvis, strvisx - visually encode characters from src into dst
508  *
509  *	Dst must be 4 times the size of src to account for possible
510  *	expansion.  The length of dst, not including the trailing NULL,
511  *	is returned.
512  *
513  *	Strvisx encodes exactly len bytes from src into dst.
514  *	This is useful for encoding a block of data.
515  */
516 static int
517 istrnvis(char *dst, size_t *dlen, const char *src, int flag)
518 {
519 	char *extra = NULL;
520 	int rv;
521 
522 	MAKEEXTRALIST(flag, extra, "");
523 	if (!extra) {
524 		if (dlen && *dlen == 0) {
525 			errno = ENOSPC;
526 			return -1;
527 		}
528 		*dst = '\0';		/* can't create extra, return "" */
529 		return 0;
530 	}
531 	rv = istrsnvis(dst, dlen, src, flag, extra);
532 	free(extra);
533 	return rv;
534 }
535 
536 int
537 strvis(char *dst, const char *src, int flag)
538 {
539 	return istrnvis(dst, NULL, src, flag);
540 }
541 
542 int
543 strnvis(char *dst, size_t dlen, const char *src, int flag)
544 {
545 	return istrnvis(dst, &dlen, src, flag);
546 }
547 
548 static int
549 istrnvisx(char *dst, size_t *dlen, const char *src, size_t len, int flag)
550 {
551 	char *extra = NULL;
552 	int rv;
553 
554 	MAKEEXTRALIST(flag, extra, "");
555 	if (!extra) {
556 		if (dlen && *dlen == 0) {
557 			errno = ENOSPC;
558 			return -1;
559 		}
560 		*dst = '\0';		/* can't create extra, return "" */
561 		return 0;
562 	}
563 	rv = istrsnvisx(dst, dlen, src, len, flag, extra);
564 	free(extra);
565 	return rv;
566 }
567 
568 int
569 strvisx(char *dst, const char *src, size_t len, int flag)
570 {
571 	return istrnvisx(dst, NULL, src, len, flag);
572 }
573 
574 int
575 strnvisx(char *dst, size_t dlen, const char *src, size_t len, int flag)
576 {
577 	return istrnvisx(dst, &dlen, src, len, flag);
578 }
579 
580 #endif
581