xref: /netbsd-src/usr.bin/printf/printf.c (revision 711626f8b9dff33a9c33b0b2bf232f323bfc5e49)
1 /*	$NetBSD: printf.c,v 1.42 2018/07/25 15:35:27 kre Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 #if !defined(BUILTIN) && !defined(SHELL)
35 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
36  The Regents of the University of California.  All rights reserved.");
37 #endif
38 #endif
39 
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)printf.c	8.2 (Berkeley) 3/22/95";
43 #else
44 __RCSID("$NetBSD: printf.c,v 1.42 2018/07/25 15:35:27 kre Exp $");
45 #endif
46 #endif /* not lint */
47 
48 #include <sys/types.h>
49 
50 #include <ctype.h>
51 #include <err.h>
52 #include <errno.h>
53 #include <inttypes.h>
54 #include <limits.h>
55 #include <locale.h>
56 #include <stdarg.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 
62 #ifdef __GNUC__
63 #define ESCAPE '\e'
64 #else
65 #define ESCAPE 033
66 #endif
67 
68 static void	 conv_escape_str(char *, void (*)(int), int);
69 static char	*conv_escape(char *, char *, int);
70 static char	*conv_expand(const char *);
71 static char	 getchr(void);
72 static double	 getdouble(void);
73 static int	 getwidth(void);
74 static intmax_t	 getintmax(void);
75 static uintmax_t getuintmax(void);
76 static char	*getstr(void);
77 static char	*mklong(const char *, char);
78 static void      check_conversion(const char *, const char *);
79 static void	 usage(void);
80 
81 static void	b_count(int);
82 static void	b_output(int);
83 static size_t	b_length;
84 static char	*b_fmt;
85 
86 static int	rval;
87 static char  **gargv;
88 
89 #ifdef BUILTIN		/* csh builtin */
90 #define main progprintf
91 #endif
92 
93 #ifdef SHELL		/* sh (aka ash) builtin */
94 #define main printfcmd
95 #include "../../bin/sh/bltin/bltin.h"
96 #endif /* SHELL */
97 
98 #define PF(f, func) { \
99 	if (fieldwidth != -1) { \
100 		if (precision != -1) \
101 			error = printf(f, fieldwidth, precision, func); \
102 		else \
103 			error = printf(f, fieldwidth, func); \
104 	} else if (precision != -1) \
105 		error = printf(f, precision, func); \
106 	else \
107 		error = printf(f, func); \
108 }
109 
110 #define APF(cpp, f, func) { \
111 	if (fieldwidth != -1) { \
112 		if (precision != -1) \
113 			error = asprintf(cpp, f, fieldwidth, precision, func); \
114 		else \
115 			error = asprintf(cpp, f, fieldwidth, func); \
116 	} else if (precision != -1) \
117 		error = asprintf(cpp, f, precision, func); \
118 	else \
119 		error = asprintf(cpp, f, func); \
120 }
121 
122 #ifdef main
123 int main(int, char *[]);
124 #endif
125 int main(int argc, char *argv[])
126 {
127 	char *fmt, *start;
128 	int fieldwidth, precision;
129 	char nextch;
130 	char *format;
131 	char ch;
132 	int error, o;
133 
134 #if !defined(SHELL) && !defined(BUILTIN)
135 	(void)setlocale (LC_ALL, "");
136 #endif
137 
138 	while ((o = getopt(argc, argv, "")) != -1) {
139 		switch (o) {
140 		case '?':
141 		default:
142 			usage();
143 			return 1;
144 		}
145 	}
146 	argc -= optind;
147 	argv += optind;
148 
149 	if (argc < 1) {
150 		usage();
151 		return 1;
152 	}
153 
154 	format = *argv;
155 	gargv = ++argv;
156 
157 #define SKIP1	"#-+ 0'"
158 #define SKIP2	"0123456789"
159 	do {
160 		/*
161 		 * Basic algorithm is to scan the format string for conversion
162 		 * specifications -- once one is found, find out if the field
163 		 * width or precision is a '*'; if it is, gather up value.
164 		 * Note, format strings are reused as necessary to use up the
165 		 * provided arguments, arguments of zero/null string are
166 		 * provided to use up the format string.
167 		 */
168 
169 		/* find next format specification */
170 		for (fmt = format; (ch = *fmt++) != '\0';) {
171 			if (ch == '\\') {
172 				char c_ch;
173 				fmt = conv_escape(fmt, &c_ch, 0);
174 				putchar(c_ch);
175 				continue;
176 			}
177 			if (ch != '%' || (*fmt == '%' && ++fmt)) {
178 				(void)putchar(ch);
179 				continue;
180 			}
181 
182 			/*
183 			 * Ok - we've found a format specification,
184 			 * Save its address for a later printf().
185 			 */
186 			start = fmt - 1;
187 
188 			/* skip to field width */
189 			fmt += strspn(fmt, SKIP1);
190 			if (*fmt == '*') {
191 				fmt++;
192 				fieldwidth = getwidth();
193 			} else
194 				fieldwidth = -1;
195 
196 			/* skip to possible '.', get following precision */
197 			fmt += strspn(fmt, SKIP2);
198 			if (*fmt == '.') {
199 				fmt++;
200 				if (*fmt == '*') {
201 					fmt++;
202 					precision = getwidth();
203 				} else
204 					precision = -1;
205 			} else
206 				precision = -1;
207 
208 			fmt += strspn(fmt, SKIP2);
209 
210 			ch = *fmt;
211 			if (!ch) {
212 				warnx("missing format character");
213 				return 1;
214 			}
215 			/*
216 			 * null terminate format string to we can use it
217 			 * as an argument to printf.
218 			 */
219 			nextch = fmt[1];
220 			fmt[1] = 0;
221 			switch (ch) {
222 
223 			case 'B': {
224 				const char *p = conv_expand(getstr());
225 
226 				if (p == NULL)
227 					goto out;
228 				*fmt = 's';
229 				PF(start, p);
230 				if (error < 0)
231 					goto out;
232 				break;
233 			}
234 			case 'b': {
235 				/*
236 				 * There has to be a better way to do this,
237 				 * but the string we generate might have
238 				 * embedded nulls
239 				 */
240 				static char *a, *t;
241 				char *cp = getstr();
242 
243 				/* Free on entry in case shell longjumped out */
244 				if (a != NULL)
245 					free(a);
246 				a = NULL;
247 				if (t != NULL)
248 					free(t);
249 				t = NULL;
250 
251 				/* Count number of bytes we want to output */
252 				b_length = 0;
253 				conv_escape_str(cp, b_count, 0);
254 				t = malloc(b_length + 1);
255 				if (t == NULL)
256 					goto out;
257 				(void)memset(t, 'x', b_length);
258 				t[b_length] = 0;
259 
260 				/* Get printf to calculate the lengths */
261 				*fmt = 's';
262 				APF(&a, start, t);
263 				if (error == -1)
264 					goto out;
265 				b_fmt = a;
266 
267 				/* Output leading spaces and data bytes */
268 				conv_escape_str(cp, b_output, 1);
269 
270 				/* Add any trailing spaces */
271 				printf("%s", b_fmt);
272 				break;
273 			}
274 			case 'c': {
275 				char p = getchr();
276 
277 				PF(start, p);
278 				if (error < 0)
279 					goto out;
280 				break;
281 			}
282 			case 's': {
283 				char *p = getstr();
284 
285 				PF(start, p);
286 				if (error < 0)
287 					goto out;
288 				break;
289 			}
290 			case 'd':
291 			case 'i': {
292 				intmax_t p = getintmax();
293 				char *f = mklong(start, ch);
294 
295 				PF(f, p);
296 				if (error < 0)
297 					goto out;
298 				break;
299 			}
300 			case 'o':
301 			case 'u':
302 			case 'x':
303 			case 'X': {
304 				uintmax_t p = getuintmax();
305 				char *f = mklong(start, ch);
306 
307 				PF(f, p);
308 				if (error < 0)
309 					goto out;
310 				break;
311 			}
312 			case 'a':
313 			case 'A':
314 			case 'e':
315 			case 'E':
316 			case 'f':
317 			case 'F':
318 			case 'g':
319 			case 'G': {
320 				double p = getdouble();
321 
322 				PF(start, p);
323 				if (error < 0)
324 					goto out;
325 				break;
326 			}
327 			default:
328 				warnx("%s: invalid directive", start);
329 				return 1;
330 			}
331 			*fmt++ = ch;
332 			*fmt = nextch;
333 			/* escape if a \c was encountered */
334 			if (rval & 0x100)
335 				return rval & ~0x100;
336 		}
337 	} while (gargv != argv && *gargv);
338 
339 	return rval & ~0x100;
340   out:
341 	warn("print failed");
342 	return 1;
343 }
344 
345 /* helper functions for conv_escape_str */
346 
347 static void
348 /*ARGSUSED*/
349 b_count(int ch)
350 {
351 	b_length++;
352 }
353 
354 /* Output one converted character for every 'x' in the 'format' */
355 
356 static void
357 b_output(int ch)
358 {
359 	for (;;) {
360 		switch (*b_fmt++) {
361 		case 0:
362 			b_fmt--;
363 			return;
364 		case ' ':
365 			putchar(' ');
366 			break;
367 		default:
368 			putchar(ch);
369 			return;
370 		}
371 	}
372 }
373 
374 
375 /*
376  * Print SysV echo(1) style escape string
377  *	Halts processing string if a \c escape is encountered.
378  */
379 static void
380 conv_escape_str(char *str, void (*do_putchar)(int), int quiet)
381 {
382 	int value;
383 	int ch;
384 	char c;
385 
386 	while ((ch = *str++) != '\0') {
387 		if (ch != '\\') {
388 			do_putchar(ch);
389 			continue;
390 		}
391 
392 		ch = *str++;
393 		if (ch == 'c') {
394 			/* \c as in SYSV echo - abort all processing.... */
395 			rval |= 0x100;
396 			break;
397 		}
398 
399 		/*
400 		 * %b string octal constants are not like those in C.
401 		 * They start with a \0, and are followed by 0, 1, 2,
402 		 * or 3 octal digits.
403 		 */
404 		if (ch == '0') {
405 			int octnum = 0, i;
406 			for (i = 0; i < 3; i++) {
407 				if (!isdigit((unsigned char)*str) || *str > '7')
408 					break;
409 				octnum = (octnum << 3) | (*str++ - '0');
410 			}
411 			do_putchar(octnum);
412 			continue;
413 		}
414 
415 		/* \[M][^|-]C as defined by vis(3) */
416 		if (ch == 'M' && *str == '-') {
417 			do_putchar(0200 | str[1]);
418 			str += 2;
419 			continue;
420 		}
421 		if (ch == 'M' && *str == '^') {
422 			str++;
423 			value = 0200;
424 			ch = '^';
425 		} else
426 			value = 0;
427 		if (ch == '^') {
428 			ch = *str++;
429 			if (ch == '?')
430 				value |= 0177;
431 			else
432 				value |= ch & 037;
433 			do_putchar(value);
434 			continue;
435 		}
436 
437 		/* Finally test for sequences valid in the format string */
438 		str = conv_escape(str - 1, &c, quiet);
439 		do_putchar(c);
440 	}
441 }
442 
443 /*
444  * Print "standard" escape characters
445  */
446 static char *
447 conv_escape(char *str, char *conv_ch, int quiet)
448 {
449 	char value;
450 	char ch;
451 	char num_buf[4], *num_end;
452 
453 	ch = *str++;
454 
455 	switch (ch) {
456 	case '\0':
457 		if (!quiet)
458 			warnx("incomplete escape sequence");
459 		rval = 1;
460 		value = '\\';
461 		--str;
462 		break;
463 
464 	case '0': case '1': case '2': case '3':
465 	case '4': case '5': case '6': case '7':
466 		num_buf[0] = ch;
467 		ch = str[0];
468 		num_buf[1] = ch;
469 		num_buf[2] = (char)(ch != '\0' ? str[1] : '\0');
470 		num_buf[3] = '\0';
471 		value = (char)strtoul(num_buf, &num_end, 8);
472 		str += num_end  - (num_buf + 1);
473 		break;
474 
475 	case 'x':
476 		/*
477 		 * Hexadecimal character constants are not required to be
478 		 * supported (by SuS v1) because there is no consistent
479 		 * way to detect the end of the constant.
480 		 * Supporting 2 byte constants is a compromise.
481 		 */
482 		ch = str[0];
483 		num_buf[0] = ch;
484 		num_buf[1] = (char)(ch != '\0' ? str[1] : '\0');
485 		num_buf[2] = '\0';
486 		value = (char)strtoul(num_buf, &num_end, 16);
487 		str += num_end - num_buf;
488 		break;
489 
490 	case '\\':	value = '\\';	break;	/* backslash */
491 	case '\'':	value = '\'';	break;	/* single quote */
492 	case '"':	value = '"';	break;	/* double quote */
493 	case 'a':	value = '\a';	break;	/* alert */
494 	case 'b':	value = '\b';	break;	/* backspace */
495 	case 'e':	value = ESCAPE;	break;	/* escape */
496 	case 'f':	value = '\f';	break;	/* form-feed */
497 	case 'n':	value = '\n';	break;	/* newline */
498 	case 'r':	value = '\r';	break;	/* carriage-return */
499 	case 't':	value = '\t';	break;	/* tab */
500 	case 'v':	value = '\v';	break;	/* vertical-tab */
501 
502 	default:
503 		if (!quiet)
504 			warnx("unknown escape sequence `\\%c'", ch);
505 		rval = 1;
506 		value = ch;
507 		break;
508 	}
509 
510 	*conv_ch = value;
511 	return str;
512 }
513 
514 /* expand a string so that everything is printable */
515 
516 static char *
517 conv_expand(const char *str)
518 {
519 	static char *conv_str;
520 	char *cp;
521 	char ch;
522 
523 	if (conv_str)
524 		free(conv_str);
525 	/* get a buffer that is definitely large enough.... */
526 	conv_str = malloc(4 * strlen(str) + 1);
527 	if (!conv_str)
528 		return NULL;
529 	cp = conv_str;
530 
531 	while ((ch = *(const char *)str++) != '\0') {
532 		switch (ch) {
533 		/* Use C escapes for expected control characters */
534 		case '\\':	ch = '\\';	break;	/* backslash */
535 		case '\'':	ch = '\'';	break;	/* single quote */
536 		case '"':	ch = '"';	break;	/* double quote */
537 		case '\a':	ch = 'a';	break;	/* alert */
538 		case '\b':	ch = 'b';	break;	/* backspace */
539 		case ESCAPE:	ch = 'e';	break;	/* escape */
540 		case '\f':	ch = 'f';	break;	/* form-feed */
541 		case '\n':	ch = 'n';	break;	/* newline */
542 		case '\r':	ch = 'r';	break;	/* carriage-return */
543 		case '\t':	ch = 't';	break;	/* tab */
544 		case '\v':	ch = 'v';	break;	/* vertical-tab */
545 		default:
546 			/* Copy anything printable */
547 			if (isprint((unsigned char)ch)) {
548 				*cp++ = ch;
549 				continue;
550 			}
551 			/* Use vis(3) encodings for the rest */
552 			*cp++ = '\\';
553 			if (ch & 0200) {
554 				*cp++ = 'M';
555 				ch &= (char)~0200;
556 			}
557 			if (ch == 0177) {
558 				*cp++ = '^';
559 				*cp++ = '?';
560 				continue;
561 			}
562 			if (ch < 040) {
563 				*cp++ = '^';
564 				*cp++ = ch | 0100;
565 				continue;
566 			}
567 			*cp++ = '-';
568 			*cp++ = ch;
569 			continue;
570 		}
571 		*cp++ = '\\';
572 		*cp++ = ch;
573 	}
574 
575 	*cp = 0;
576 	return conv_str;
577 }
578 
579 static char *
580 mklong(const char *str, char ch)
581 {
582 	static char copy[64];
583 	size_t len;
584 
585 	len = strlen(str) + 2;
586 	if (len > sizeof copy) {
587 		warnx("format %s too complex", str);
588 		len = 4;
589 	}
590 	(void)memmove(copy, str, len - 3);
591 	copy[len - 3] = 'j';
592 	copy[len - 2] = ch;
593 	copy[len - 1] = '\0';
594 	return copy;
595 }
596 
597 static char
598 getchr(void)
599 {
600 	if (!*gargv)
601 		return 0;
602 	return **gargv++;
603 }
604 
605 static char *
606 getstr(void)
607 {
608 	static char empty[] = "";
609 	if (!*gargv)
610 		return empty;
611 	return *gargv++;
612 }
613 
614 static int
615 getwidth(void)
616 {
617 	unsigned long val;
618 	char *s, *ep;
619 
620 	s = *gargv;
621 	if (!*gargv)
622 		return 0;
623 	gargv++;
624 
625 	errno = 0;
626 	val = strtoul(s, &ep, 0);
627 	check_conversion(s, ep);
628 
629 	/* Arbitrarily 'restrict' field widths to 1Mbyte */
630 	if (val > 1 << 20) {
631 		warnx("%s: invalid field width", s);
632 		return 0;
633 	}
634 
635 	return (int)val;
636 }
637 
638 static intmax_t
639 getintmax(void)
640 {
641 	intmax_t val;
642 	char *cp, *ep;
643 
644 	cp = *gargv;
645 	if (cp == NULL)
646 		return 0;
647 	gargv++;
648 
649 	if (*cp == '\"' || *cp == '\'')
650 		return *(cp + 1);
651 
652 	errno = 0;
653 	val = strtoimax(cp, &ep, 0);
654 	check_conversion(cp, ep);
655 	return val;
656 }
657 
658 static uintmax_t
659 getuintmax(void)
660 {
661 	uintmax_t val;
662 	char *cp, *ep;
663 
664 	cp = *gargv;
665 	if (cp == NULL)
666 		return 0;
667 	gargv++;
668 
669 	if (*cp == '\"' || *cp == '\'')
670 		return (uintmax_t)*(cp + 1);
671 
672 	/* strtoumax won't error -ve values */
673 	while (isspace(*(unsigned char *)cp))
674 		cp++;
675 	if (*cp == '-') {
676 		warnx("%s: expected positive numeric value", cp);
677 		rval = 1;
678 		return 0;
679 	}
680 
681 	errno = 0;
682 	val = strtoumax(cp, &ep, 0);
683 	check_conversion(cp, ep);
684 	return val;
685 }
686 
687 static double
688 getdouble(void)
689 {
690 	double val;
691 	char *ep;
692 
693 	if (!*gargv)
694 		return 0.0;
695 
696 	if (**gargv == '\"' || **gargv == '\'')
697 		return (double) *((*gargv++)+1);
698 
699 	errno = 0;
700 	val = strtod(*gargv, &ep);
701 	check_conversion(*gargv++, ep);
702 	return val;
703 }
704 
705 static void
706 check_conversion(const char *s, const char *ep)
707 {
708 	if (*ep) {
709 		if (ep == s)
710 			warnx("%s: expected numeric value", s);
711 		else
712 			warnx("%s: not completely converted", s);
713 		rval = 1;
714 	} else if (errno == ERANGE) {
715 		warnx("%s: %s", s, strerror(ERANGE));
716 		rval = 1;
717 	}
718 }
719 
720 static void
721 usage(void)
722 {
723 	(void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname());
724 }
725