xref: /dflybsd-src/usr.bin/printf/printf.c (revision dff616f71d2401d2d44f647f79744eb1ebb3f189)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * @(#) Copyright (c) 1989, 1993 The Regents of the University of California.  All rights reserved.
35  * @(#)printf.c	8.1 (Berkeley) 7/20/93
36  * $FreeBSD: head/usr.bin/printf/printf.c 337618 2018-08-11 11:13:34Z jilles $
37  */
38 /*
39  * Important: This file is used both as a standalone program /usr/bin/printf
40  * and as a builtin for /bin/sh (#define SHELL).
41  */
42 
43 #include <sys/types.h>
44 
45 #include <ctype.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <limits.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <wchar.h>
56 
57 #ifdef SHELL
58 #define	main printfcmd
59 #include "bltin/bltin.h"
60 #include "error.h"
61 #include "options.h"
62 #endif
63 
64 #define	PF(f, func) do {						\
65 	if (havewidth)							\
66 		if (haveprec)						\
67 			printf(f, fieldwidth, precision, func);	\
68 		else							\
69 			printf(f, fieldwidth, func);			\
70 	else if (haveprec)						\
71 		printf(f, precision, func);				\
72 	else								\
73 		printf(f, func);					\
74 } while (0)
75 
76 static int	 asciicode(void);
77 static char	*printf_doformat(char *, int *);
78 static int	 escape(char *, int, size_t *);
79 static int	 getchr(void);
80 static int	 getfloating(long double *, int);
81 static int	 getint(int *);
82 static int	 getnum(intmax_t *, uintmax_t *, int);
83 static const char
84 		*getstr(void);
85 static char	*mknum(char *, char);
86 static void	 usage(void);
87 
88 static const char digits[] = "0123456789";
89 
90 static char end_fmt[1];
91 
92 static int  myargc;
93 static char **myargv;
94 static char **gargv;
95 static char **maxargv;
96 
97 int
98 main(int argc, char *argv[])
99 {
100 	size_t len;
101 	int end, rval;
102 	char *format, *fmt, *start;
103 
104 #ifndef SHELL
105 	setlocale(LC_ALL, "");
106 #endif
107 
108 	/*
109 	 * We may not use getopt(3) because calling
110 	 * "printf -f%s oo" may not result in an invalid
111 	 * option error.
112 	 * However common usage and other implementations seem
113 	 * to indicate that we need to allow -- as a discardable
114 	 * option separator.
115 	 */
116 	if (argc > 1 && strcmp(argv[1], "--") == 0) {
117 		argc--;
118 		argv++;
119 	}
120 
121 	if (argc < 2) {
122 		usage();
123 		return (1);
124 	}
125 
126 	argv++;
127 
128 #ifdef SHELL
129 	INTOFF;
130 #endif
131 	/*
132 	 * Basic algorithm is to scan the format string for conversion
133 	 * specifications -- once one is found, find out if the field
134 	 * width or precision is a '*'; if it is, gather up value.  Note,
135 	 * format strings are reused as necessary to use up the provided
136 	 * arguments, arguments of zero/null string are provided to use
137 	 * up the format string.
138 	 */
139 	fmt = format = *argv;
140 	escape(fmt, 1, &len);		/* backslash interpretation */
141 	rval = end = 0;
142 	gargv = ++argv;
143 
144 	for (;;) {
145 		maxargv = gargv;
146 
147 		myargv = gargv;
148 		for (myargc = 0; gargv[myargc]; myargc++)
149 			/* nop */;
150 		start = fmt;
151 		while (fmt < format + len) {
152 			if (fmt[0] == '%') {
153 				fwrite(start, 1, fmt - start, stdout);
154 				if (fmt[1] == '%') {
155 					/* %% prints a % */
156 					putchar('%');
157 					fmt += 2;
158 				} else {
159 					fmt = printf_doformat(fmt, &rval);
160 					if (fmt == NULL || fmt == end_fmt) {
161 #ifdef SHELL
162 						INTON;
163 #endif
164 						return (fmt == NULL ? 1 : rval);
165 					}
166 					end = 0;
167 				}
168 				start = fmt;
169 			} else
170 				fmt++;
171 			if (gargv > maxargv)
172 				maxargv = gargv;
173 		}
174 		gargv = maxargv;
175 
176 		if (end == 1) {
177 			warnx("missing format character");
178 #ifdef SHELL
179 			INTON;
180 #endif
181 			return (1);
182 		}
183 		fwrite(start, 1, fmt - start, stdout);
184 		if (!*gargv) {
185 #ifdef SHELL
186 			INTON;
187 #endif
188 			return (rval);
189 		}
190 		/* Restart at the beginning of the format string. */
191 		fmt = format;
192 		end = 1;
193 	}
194 	/* NOTREACHED */
195 }
196 
197 
198 static char *
199 printf_doformat(char *fmt, int *rval)
200 {
201 	static const char skip1[] = "#'-+ 0";
202 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
203 	char convch, nextch;
204 	char start[strlen(fmt) + 1];
205 	char **fargv;
206 	char *dptr;
207 	int l;
208 
209 	dptr = start;
210 	*dptr++ = '%';
211 	*dptr = 0;
212 
213 	fmt++;
214 
215 	/* look for "n$" field index specifier */
216 	l = strspn(fmt, digits);
217 	if ((l > 0) && (fmt[l] == '$')) {
218 		int idx = atoi(fmt);
219 		if (idx <= myargc) {
220 			gargv = &myargv[idx - 1];
221 		} else {
222 			gargv = &myargv[myargc];
223 		}
224 		if (gargv > maxargv)
225 			maxargv = gargv;
226 		fmt += l + 1;
227 
228 		/* save format argument */
229 		fargv = gargv;
230 	} else {
231 		fargv = NULL;
232 	}
233 
234 	/* skip to field width */
235 	while (*fmt && strchr(skip1, *fmt) != NULL) {
236 		*dptr++ = *fmt++;
237 		*dptr = 0;
238 	}
239 
240 	if (*fmt == '*') {
241 
242 		fmt++;
243 		l = strspn(fmt, digits);
244 		if ((l > 0) && (fmt[l] == '$')) {
245 			int idx = atoi(fmt);
246 			if (fargv == NULL) {
247 				warnx("incomplete use of n$");
248 				return (NULL);
249 			}
250 			if (idx <= myargc) {
251 				gargv = &myargv[idx - 1];
252 			} else {
253 				gargv = &myargv[myargc];
254 			}
255 			fmt += l + 1;
256 		} else if (fargv != NULL) {
257 			warnx("incomplete use of n$");
258 			return (NULL);
259 		}
260 
261 		if (getint(&fieldwidth))
262 			return (NULL);
263 		if (gargv > maxargv)
264 			maxargv = gargv;
265 		havewidth = 1;
266 
267 		*dptr++ = '*';
268 		*dptr = 0;
269 	} else {
270 		havewidth = 0;
271 
272 		/* skip to possible '.', get following precision */
273 		while (isdigit(*fmt)) {
274 			*dptr++ = *fmt++;
275 			*dptr = 0;
276 		}
277 	}
278 
279 	if (*fmt == '.') {
280 		/* precision present? */
281 		fmt++;
282 		*dptr++ = '.';
283 
284 		if (*fmt == '*') {
285 
286 			fmt++;
287 			l = strspn(fmt, digits);
288 			if ((l > 0) && (fmt[l] == '$')) {
289 				int idx = atoi(fmt);
290 				if (fargv == NULL) {
291 					warnx("incomplete use of n$");
292 					return (NULL);
293 				}
294 				if (idx <= myargc) {
295 					gargv = &myargv[idx - 1];
296 				} else {
297 					gargv = &myargv[myargc];
298 				}
299 				fmt += l + 1;
300 			} else if (fargv != NULL) {
301 				warnx("incomplete use of n$");
302 				return (NULL);
303 			}
304 
305 			if (getint(&precision))
306 				return (NULL);
307 			if (gargv > maxargv)
308 				maxargv = gargv;
309 			haveprec = 1;
310 			*dptr++ = '*';
311 			*dptr = 0;
312 		} else {
313 			haveprec = 0;
314 
315 			/* skip to conversion char */
316 			while (isdigit(*fmt)) {
317 				*dptr++ = *fmt++;
318 				*dptr = 0;
319 			}
320 		}
321 	} else
322 		haveprec = 0;
323 	if (!*fmt) {
324 		warnx("missing format character");
325 		return (NULL);
326 	}
327 	*dptr++ = *fmt;
328 	*dptr = 0;
329 
330 	/*
331 	 * Look for a length modifier.  POSIX doesn't have these, so
332 	 * we only support them for floating-point conversions, which
333 	 * are extensions.  This is useful because the L modifier can
334 	 * be used to gain extra range and precision, while omitting
335 	 * it is more likely to produce consistent results on different
336 	 * architectures.  This is not so important for integers
337 	 * because overflow is the only bad thing that can happen to
338 	 * them, but consider the command  printf %a 1.1
339 	 */
340 	if (*fmt == 'L') {
341 		mod_ldbl = 1;
342 		fmt++;
343 		if (!strchr("aAeEfFgG", *fmt)) {
344 			warnx("bad modifier L for %%%c", *fmt);
345 			return (NULL);
346 		}
347 	} else {
348 		mod_ldbl = 0;
349 	}
350 
351 	/* save the current arg offset, and set to the format arg */
352 	if (fargv != NULL) {
353 		gargv = fargv;
354 	}
355 
356 	convch = *fmt;
357 	nextch = *++fmt;
358 
359 	*fmt = '\0';
360 	switch (convch) {
361 	case 'b': {
362 		size_t len;
363 		char *p;
364 		int getout;
365 
366 		/* Convert "b" to "s" for output. */
367 		start[strlen(start) - 1] = 's';
368 		if ((p = strdup(getstr())) == NULL) {
369 			warnx("%s", strerror(ENOMEM));
370 			return (NULL);
371 		}
372 		getout = escape(p, 0, &len);
373 		PF(start, p);
374 		/* Restore format for next loop. */
375 
376 		free(p);
377 		if (getout)
378 			return (end_fmt);
379 		break;
380 	}
381 	case 'c': {
382 		char p;
383 
384 		p = getchr();
385 		if (p != '\0')
386 			PF(start, p);
387 		break;
388 	}
389 	case 's': {
390 		const char *p;
391 
392 		p = getstr();
393 		PF(start, p);
394 		break;
395 	}
396 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
397 		char *f;
398 		intmax_t val;
399 		uintmax_t uval;
400 		int signedconv;
401 
402 		signedconv = (convch == 'd' || convch == 'i');
403 		if ((f = mknum(start, convch)) == NULL)
404 			return (NULL);
405 		if (getnum(&val, &uval, signedconv))
406 			*rval = 1;
407 		if (signedconv)
408 			PF(f, val);
409 		else
410 			PF(f, uval);
411 		break;
412 	}
413 	case 'e': case 'E':
414 	case 'f': case 'F':
415 	case 'g': case 'G':
416 	case 'a': case 'A': {
417 		long double p;
418 
419 		if (getfloating(&p, mod_ldbl))
420 			*rval = 1;
421 		if (mod_ldbl)
422 			PF(start, p);
423 		else
424 			PF(start, (double)p);
425 		break;
426 	}
427 	default:
428 		warnx("illegal format character %c", convch);
429 		return (NULL);
430 	}
431 	*fmt = nextch;
432 	/* return the gargv to the next element */
433 	return (fmt);
434 }
435 
436 static char *
437 mknum(char *str, char ch)
438 {
439 	static char *copy;
440 	static size_t copy_size;
441 	char *newcopy;
442 	size_t len, newlen;
443 
444 	len = strlen(str) + 2;
445 	if (len > copy_size) {
446 		newlen = ((len + 1023) >> 10) << 10;
447 		if ((newcopy = realloc(copy, newlen)) == NULL) {
448 			warnx("%s", strerror(ENOMEM));
449 			return (NULL);
450 		}
451 		copy = newcopy;
452 		copy_size = newlen;
453 	}
454 
455 	memmove(copy, str, len - 3);
456 	copy[len - 3] = 'j';
457 	copy[len - 2] = ch;
458 	copy[len - 1] = '\0';
459 	return (copy);
460 }
461 
462 static int
463 escape(char *fmt, int percent, size_t *len)
464 {
465 	char *save, *store, c;
466 	int value;
467 
468 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
469 		if (c != '\\') {
470 			*store = c;
471 			continue;
472 		}
473 		switch (*++fmt) {
474 		case '\0':		/* EOS, user error */
475 			*store = '\\';
476 			*++store = '\0';
477 			*len = store - save;
478 			return (0);
479 		case '\\':		/* backslash */
480 		case '\'':		/* single quote */
481 			*store = *fmt;
482 			break;
483 		case 'a':		/* bell/alert */
484 			*store = '\a';
485 			break;
486 		case 'b':		/* backspace */
487 			*store = '\b';
488 			break;
489 		case 'c':
490 			if (!percent) {
491 				*store = '\0';
492 				*len = store - save;
493 				return (1);
494 			}
495 			*store = 'c';
496 			break;
497 		case 'f':		/* form-feed */
498 			*store = '\f';
499 			break;
500 		case 'n':		/* newline */
501 			*store = '\n';
502 			break;
503 		case 'r':		/* carriage-return */
504 			*store = '\r';
505 			break;
506 		case 't':		/* horizontal tab */
507 			*store = '\t';
508 			break;
509 		case 'v':		/* vertical tab */
510 			*store = '\v';
511 			break;
512 					/* octal constant */
513 		case '0': case '1': case '2': case '3':
514 		case '4': case '5': case '6': case '7':
515 			c = (!percent && *fmt == '0') ? 4 : 3;
516 			for (value = 0;
517 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
518 				value <<= 3;
519 				value += *fmt - '0';
520 			}
521 			--fmt;
522 			if (percent && value == '%') {
523 				*store++ = '%';
524 				*store = '%';
525 			} else
526 				*store = (char)value;
527 			break;
528 		default:
529 			*store = *fmt;
530 			break;
531 		}
532 	}
533 	*store = '\0';
534 	*len = store - save;
535 	return (0);
536 }
537 
538 static int
539 getchr(void)
540 {
541 	if (!*gargv)
542 		return ('\0');
543 	return ((int)**gargv++);
544 }
545 
546 static const char *
547 getstr(void)
548 {
549 	if (!*gargv)
550 		return ("");
551 	return (*gargv++);
552 }
553 
554 static int
555 getint(int *ip)
556 {
557 	intmax_t val;
558 	uintmax_t uval;
559 	int rval;
560 
561 	if (getnum(&val, &uval, 1))
562 		return (1);
563 	rval = 0;
564 	if (val < INT_MIN || val > INT_MAX) {
565 		warnx("%s: %s", *gargv, strerror(ERANGE));
566 		rval = 1;
567 	}
568 	*ip = (int)val;
569 	return (rval);
570 }
571 
572 static int
573 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
574 {
575 	char *ep;
576 	int rval;
577 
578 	if (!*gargv) {
579 		*ip = *uip = 0;
580 		return (0);
581 	}
582 	if (**gargv == '"' || **gargv == '\'') {
583 		if (signedconv)
584 			*ip = asciicode();
585 		else
586 			*uip = asciicode();
587 		return (0);
588 	}
589 	rval = 0;
590 	errno = 0;
591 	if (signedconv)
592 		*ip = strtoimax(*gargv, &ep, 0);
593 	else
594 		*uip = strtoumax(*gargv, &ep, 0);
595 	if (ep == *gargv) {
596 		warnx("%s: expected numeric value", *gargv);
597 		rval = 1;
598 	}
599 	else if (*ep != '\0') {
600 		warnx("%s: not completely converted", *gargv);
601 		rval = 1;
602 	}
603 	if (errno == ERANGE) {
604 		warnx("%s: %s", *gargv, strerror(ERANGE));
605 		rval = 1;
606 	}
607 	++gargv;
608 	return (rval);
609 }
610 
611 static int
612 getfloating(long double *dp, int mod_ldbl)
613 {
614 	char *ep;
615 	int rval;
616 
617 	if (!*gargv) {
618 		*dp = 0.0;
619 		return (0);
620 	}
621 	if (**gargv == '"' || **gargv == '\'') {
622 		*dp = asciicode();
623 		return (0);
624 	}
625 	rval = 0;
626 	errno = 0;
627 	if (mod_ldbl)
628 		*dp = strtold(*gargv, &ep);
629 	else
630 		*dp = strtod(*gargv, &ep);
631 	if (ep == *gargv) {
632 		warnx("%s: expected numeric value", *gargv);
633 		rval = 1;
634 	} else if (*ep != '\0') {
635 		warnx("%s: not completely converted", *gargv);
636 		rval = 1;
637 	}
638 	if (errno == ERANGE) {
639 		warnx("%s: %s", *gargv, strerror(ERANGE));
640 		rval = 1;
641 	}
642 	++gargv;
643 	return (rval);
644 }
645 
646 static int
647 asciicode(void)
648 {
649 	int ch;
650 	wchar_t wch;
651 	mbstate_t mbs;
652 
653 	ch = (unsigned char)**gargv;
654 	if (ch == '\'' || ch == '"') {
655 		memset(&mbs, 0, sizeof(mbs));
656 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
657 		case (size_t)-2:
658 		case (size_t)-1:
659 			wch = (unsigned char)gargv[0][1];
660 			break;
661 		case 0:
662 			wch = 0;
663 			break;
664 		}
665 		ch = wch;
666 	}
667 	++gargv;
668 	return (ch);
669 }
670 
671 static void
672 usage(void)
673 {
674 	fprintf(stderr, "usage: printf format [arguments ...]\n");
675 }
676