xref: /netbsd-src/lib/libc/gen/vis.c (revision 5bbd2a12505d72a8177929a37b5cee489d0a1cfd)
1 /*	$NetBSD: vis.c,v 1.44 2011/03/12 19:52:48 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55  * POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.44 2011/03/12 19:52:48 christos Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 
63 #include "namespace.h"
64 #include <sys/types.h>
65 
66 #include <assert.h>
67 #include <vis.h>
68 #include <errno.h>
69 #include <stdlib.h>
70 
71 #ifdef __weak_alias
72 __weak_alias(strvisx,_strvisx)
73 #endif
74 
75 #if !HAVE_VIS || !HAVE_SVIS
76 #include <ctype.h>
77 #include <limits.h>
78 #include <stdio.h>
79 #include <string.h>
80 
81 static char *do_svis(char *, size_t *, int, int, int, const char *);
82 
83 #undef BELL
84 #define BELL '\a'
85 
86 #define isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
87 #define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')
88 #define issafe(c)	(c == '\b' || c == BELL || c == '\r')
89 #define xtoa(c)		"0123456789abcdef"[c]
90 #define XTOA(c)		"0123456789ABCDEF"[c]
91 
92 #define MAXEXTRAS	5
93 
94 #define MAKEEXTRALIST(flag, extra, orig_str)				      \
95 do {									      \
96 	const char *orig = orig_str;					      \
97 	const char *o = orig;						      \
98 	char *e;							      \
99 	while (*o++)							      \
100 		continue;						      \
101 	extra = malloc((size_t)((o - orig) + MAXEXTRAS));		      \
102 	if (!extra) break;						      \
103 	for (o = orig, e = extra; (*e++ = *o++) != '\0';)		      \
104 		continue;						      \
105 	e--;								      \
106 	if (flag & VIS_SP) *e++ = ' ';					      \
107 	if (flag & VIS_TAB) *e++ = '\t';				      \
108 	if (flag & VIS_NL) *e++ = '\n';					      \
109 	if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';			      \
110 	*e = '\0';							      \
111 } while (/*CONSTCOND*/0)
112 
113 /*
114  * This is do_hvis, for HTTP style (RFC 1808)
115  */
116 static char *
117 do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
118 {
119 
120 	if ((isascii(c) && isalnum(c))
121 	    /* safe */
122 	    || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
123 	    /* extra */
124 	    || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
125 	    || c == ',') {
126 		dst = do_svis(dst, dlen, c, flag, nextc, extra);
127 	} else {
128 		if (dlen) {
129 			if (*dlen < 3)
130 				return NULL;
131 			*dlen -= 3;
132 		}
133 		*dst++ = '%';
134 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
135 		*dst++ = xtoa((unsigned int)c & 0xf);
136 	}
137 
138 	return dst;
139 }
140 
141 /*
142  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
143  * NB: No handling of long lines or CRLF.
144  */
145 static char *
146 do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
147 {
148 	if ((c != '\n') &&
149 	    /* Space at the end of the line */
150 	    ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
151 	    /* Out of range */
152 	    (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
153 	    /* Specific char to be escaped */
154 	    strchr("#$@[\\]^`{|}~", c) != NULL)) {
155 		if (dlen) {
156 			if (*dlen < 3)
157 				return NULL;
158 			*dlen -= 3;
159 		}
160 		*dst++ = '=';
161 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
162 		*dst++ = XTOA((unsigned int)c & 0xf);
163 	} else {
164 		dst = do_svis(dst, dlen, c, flag, nextc, extra);
165 	}
166 	return dst;
167 }
168 
169 /*
170  * This is do_vis, the central code of vis.
171  * dst:	      Pointer to the destination buffer
172  * c:	      Character to encode
173  * flag:      Flag word
174  * nextc:     The character following 'c'
175  * extra:     Pointer to the list of extra characters to be
176  *	      backslash-protected.
177  */
178 static char *
179 do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
180 {
181 	int isextra;
182 	size_t odlen = dlen ? *dlen : 0;
183 
184 	isextra = strchr(extra, c) != NULL;
185 #define HAVE(x) \
186 	do { \
187 		if (dlen) { \
188 			if (*dlen < (x)) \
189 				goto out; \
190 			*dlen -= (x); \
191 		} \
192 	} while (/*CONSTCOND*/0)
193 	if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
194 	    ((flag & VIS_SAFE) && issafe(c)))) {
195 		HAVE(1);
196 		*dst++ = c;
197 		return dst;
198 	}
199 	if (flag & VIS_CSTYLE) {
200 		HAVE(2);
201 		switch (c) {
202 		case '\n':
203 			*dst++ = '\\'; *dst++ = 'n';
204 			return dst;
205 		case '\r':
206 			*dst++ = '\\'; *dst++ = 'r';
207 			return dst;
208 		case '\b':
209 			*dst++ = '\\'; *dst++ = 'b';
210 			return dst;
211 		case BELL:
212 			*dst++ = '\\'; *dst++ = 'a';
213 			return dst;
214 		case '\v':
215 			*dst++ = '\\'; *dst++ = 'v';
216 			return dst;
217 		case '\t':
218 			*dst++ = '\\'; *dst++ = 't';
219 			return dst;
220 		case '\f':
221 			*dst++ = '\\'; *dst++ = 'f';
222 			return dst;
223 		case ' ':
224 			*dst++ = '\\'; *dst++ = 's';
225 			return dst;
226 		case '\0':
227 			*dst++ = '\\'; *dst++ = '0';
228 			if (isoctal(nextc)) {
229 				HAVE(2);
230 				*dst++ = '0';
231 				*dst++ = '0';
232 			}
233 			return dst;
234 		default:
235 			if (isgraph(c)) {
236 				*dst++ = '\\'; *dst++ = c;
237 				return dst;
238 			}
239 			if (dlen)
240 				*dlen = odlen;
241 		}
242 	}
243 	if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
244 		HAVE(4);
245 		*dst++ = '\\';
246 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
247 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
248 		*dst++ =			     (c	      & 07) + '0';
249 	} else {
250 		if ((flag & VIS_NOSLASH) == 0) {
251 			HAVE(1);
252 			*dst++ = '\\';
253 		}
254 
255 		if (c & 0200) {
256 			HAVE(1);
257 			c &= 0177; *dst++ = 'M';
258 		}
259 
260 		if (iscntrl(c)) {
261 			HAVE(2);
262 			*dst++ = '^';
263 			if (c == 0177)
264 				*dst++ = '?';
265 			else
266 				*dst++ = c + '@';
267 		} else {
268 			HAVE(2);
269 			*dst++ = '-'; *dst++ = c;
270 		}
271 	}
272 	return dst;
273 out:
274 	*dlen = odlen;
275 	return NULL;
276 }
277 
278 typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
279 
280 /*
281  * Return the appropriate encoding function depending on the flags given.
282  */
283 static visfun_t
284 getvisfun(int flag)
285 {
286 	if (flag & VIS_HTTPSTYLE)
287 		return do_hvis;
288 	if (flag & VIS_MIMESTYLE)
289 		return do_mvis;
290 	return do_svis;
291 }
292 
293 /*
294  * isnvis - visually encode characters, also encoding the characters
295  *	  pointed to by `extra'
296  */
297 static char *
298 isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
299 {
300 	char *nextra = NULL;
301 	visfun_t f;
302 
303 	_DIAGASSERT(dst != NULL);
304 	_DIAGASSERT(extra != NULL);
305 	MAKEEXTRALIST(flag, nextra, extra);
306 	if (!nextra) {
307 		if (dlen && *dlen == 0) {
308 			errno = ENOSPC;
309 			return NULL;
310 		}
311 		*dst = '\0';		/* can't create nextra, return "" */
312 		return dst;
313 	}
314 	f = getvisfun(flag);
315 	dst = (*f)(dst, dlen, c, flag, nextc, nextra);
316 	free(nextra);
317 	if (dst == NULL || (dlen && *dlen == 0)) {
318 		errno = ENOSPC;
319 		return NULL;
320 	}
321 	*dst = '\0';
322 	return dst;
323 }
324 
325 char *
326 svis(char *dst, int c, int flag, int nextc, const char *extra)
327 {
328 	return isnvis(dst, NULL, c, flag, nextc, extra);
329 }
330 
331 char *
332 snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
333 {
334 	return isnvis(dst, &dlen, c, flag, nextc, extra);
335 }
336 
337 
338 /*
339  * strsvis, strsvisx - visually encode characters from src into dst
340  *
341  *	Extra is a pointer to a \0-terminated list of characters to
342  *	be encoded, too. These functions are useful e. g. to
343  *	encode strings in such a way so that they are not interpreted
344  *	by a shell.
345  *
346  *	Dst must be 4 times the size of src to account for possible
347  *	expansion.  The length of dst, not including the trailing NULL,
348  *	is returned.
349  *
350  *	Strsvisx encodes exactly len bytes from src into dst.
351  *	This is useful for encoding a block of data.
352  */
353 static int
354 istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
355 {
356 	int c;
357 	char *start;
358 	char *nextra = NULL;
359 	const unsigned char *src = (const unsigned char *)csrc;
360 	visfun_t f;
361 
362 	_DIAGASSERT(dst != NULL);
363 	_DIAGASSERT(src != NULL);
364 	_DIAGASSERT(extra != NULL);
365 	MAKEEXTRALIST(flag, nextra, extra);
366 	if (!nextra) {
367 		*dst = '\0';		/* can't create nextra, return "" */
368 		return 0;
369 	}
370 	f = getvisfun(flag);
371 	for (start = dst; (c = *src++) != '\0'; /* empty */) {
372 		dst = (*f)(dst, dlen, c, flag, *src, nextra);
373 		if (dst == NULL) {
374 			errno = ENOSPC;
375 			return -1;
376 		}
377 	}
378 	free(nextra);
379 	if (dlen && *dlen == 0) {
380 		errno = ENOSPC;
381 		return -1;
382 	}
383 	*dst = '\0';
384 	return (int)(dst - start);
385 }
386 
387 int
388 strsvis(char *dst, const char *csrc, int flag, const char *extra)
389 {
390 	return istrsnvis(dst, NULL, csrc, flag, extra);
391 }
392 
393 int
394 strsnvis(char *dst, size_t dlen, const char *csrc, int flag, const char *extra)
395 {
396 	return istrsnvis(dst, &dlen, csrc, flag, extra);
397 }
398 
399 static int
400 istrsnvisx(char *dst, size_t *dlen, const char *csrc, size_t len, int flag,
401     const char *extra)
402 {
403 	unsigned char c;
404 	char *start;
405 	char *nextra = NULL;
406 	const unsigned char *src = (const unsigned char *)csrc;
407 	visfun_t f;
408 
409 	_DIAGASSERT(dst != NULL);
410 	_DIAGASSERT(src != NULL);
411 	_DIAGASSERT(extra != NULL);
412 	MAKEEXTRALIST(flag, nextra, extra);
413 	if (! nextra) {
414 		if (dlen && *dlen == 0) {
415 			errno = ENOSPC;
416 			return -1;
417 		}
418 		*dst = '\0';		/* can't create nextra, return "" */
419 		return 0;
420 	}
421 
422 	f = getvisfun(flag);
423 	for (start = dst; len > 0; len--) {
424 		c = *src++;
425 		dst = (*f)(dst, dlen, c, flag, len > 1 ? *src : '\0', nextra);
426 		if (dst == NULL) {
427 			errno = ENOSPC;
428 			return -1;
429 		}
430 	}
431 	free(nextra);
432 	if (dlen && *dlen == 0) {
433 		errno = ENOSPC;
434 		return -1;
435 	}
436 	*dst = '\0';
437 	return (int)(dst - start);
438 }
439 
440 int
441 strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra)
442 {
443 	return istrsnvisx(dst, NULL, csrc, len, flag, extra);
444 }
445 
446 int
447 strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag,
448     const char *extra)
449 {
450 	return istrsnvisx(dst, &dlen, csrc, len, flag, extra);
451 }
452 #endif
453 
454 #if !HAVE_VIS
455 /*
456  * vis - visually encode characters
457  */
458 static char *
459 invis(char *dst, size_t *dlen, int c, int flag, int nextc)
460 {
461 	char *extra = NULL;
462 	unsigned char uc = (unsigned char)c;
463 	visfun_t f;
464 
465 	_DIAGASSERT(dst != NULL);
466 
467 	MAKEEXTRALIST(flag, extra, "");
468 	if (! extra) {
469 		if (dlen && *dlen == 0) {
470 			errno = ENOSPC;
471 			return NULL;
472 		}
473 		*dst = '\0';		/* can't create extra, return "" */
474 		return dst;
475 	}
476 	f = getvisfun(flag);
477 	dst = (*f)(dst, dlen, uc, flag, nextc, extra);
478 	free(extra);
479 	if (dst == NULL || (dlen && *dlen == 0)) {
480 		errno = ENOSPC;
481 		return NULL;
482 	}
483 	*dst = '\0';
484 	return dst;
485 }
486 
487 char *
488 vis(char *dst, int c, int flag, int nextc)
489 {
490 	return invis(dst, NULL, c, flag, nextc);
491 }
492 
493 char *
494 nvis(char *dst, size_t dlen, int c, int flag, int nextc)
495 {
496 	return invis(dst, &dlen, c, flag, nextc);
497 }
498 
499 
500 /*
501  * strvis, strvisx - visually encode characters from src into dst
502  *
503  *	Dst must be 4 times the size of src to account for possible
504  *	expansion.  The length of dst, not including the trailing NULL,
505  *	is returned.
506  *
507  *	Strvisx encodes exactly len bytes from src into dst.
508  *	This is useful for encoding a block of data.
509  */
510 static int
511 istrnvis(char *dst, size_t *dlen, const char *src, int flag)
512 {
513 	char *extra = NULL;
514 	int rv;
515 
516 	MAKEEXTRALIST(flag, extra, "");
517 	if (!extra) {
518 		if (dlen && *dlen == 0) {
519 			errno = ENOSPC;
520 			return -1;
521 		}
522 		*dst = '\0';		/* can't create extra, return "" */
523 		return 0;
524 	}
525 	rv = istrsnvis(dst, dlen, src, flag, extra);
526 	free(extra);
527 	return rv;
528 }
529 
530 int
531 strvis(char *dst, const char *src, int flag)
532 {
533 	return istrnvis(dst, NULL, src, flag);
534 }
535 
536 int
537 strnvis(char *dst, size_t dlen, const char *src, int flag)
538 {
539 	return istrnvis(dst, &dlen, src, flag);
540 }
541 
542 static int
543 istrnvisx(char *dst, size_t *dlen, const char *src, size_t len, int flag)
544 {
545 	char *extra = NULL;
546 	int rv;
547 
548 	MAKEEXTRALIST(flag, extra, "");
549 	if (!extra) {
550 		if (dlen && *dlen == 0) {
551 			errno = ENOSPC;
552 			return -1;
553 		}
554 		*dst = '\0';		/* can't create extra, return "" */
555 		return 0;
556 	}
557 	rv = istrsnvisx(dst, dlen, src, len, flag, extra);
558 	free(extra);
559 	return rv;
560 }
561 
562 int
563 strvisx(char *dst, const char *src, size_t len, int flag)
564 {
565 	return istrnvisx(dst, NULL, src, len, flag);
566 }
567 
568 int
569 strnvisx(char *dst, size_t dlen, const char *src, size_t len, int flag)
570 {
571 	return istrnvisx(dst, &dlen, src, len, flag);
572 }
573 
574 #endif
575