1 /* $NetBSD: printf.c,v 1.59 2024/11/24 12:33:00 kre Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 #if !defined(BUILTIN) && !defined(SHELL) 35 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 36 The Regents of the University of California. All rights reserved."); 37 #endif 38 #endif 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95"; 43 #else 44 __RCSID("$NetBSD: printf.c,v 1.59 2024/11/24 12:33:00 kre Exp $"); 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/types.h> 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <errno.h> 53 #include <inttypes.h> 54 #include <limits.h> 55 #include <locale.h> 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 62 #ifdef __GNUC__ 63 #define ESCAPE '\e' 64 #else 65 #define ESCAPE 033 66 #endif 67 68 static void conv_escape_str(char *, void (*)(int), int); 69 static char *conv_escape(char *, char *, int); 70 static char *conv_expand(const char *); 71 static wchar_t getchr(void); 72 static long double getdouble(void); 73 static int getwidth(void); 74 static intmax_t getintmax(void); 75 static char *getstr(void); 76 static char *mklong(const char *, char, char); 77 static intmax_t wide_char(const char *, int); 78 static void check_conversion(const char *, const char *); 79 static void usage(void); 80 81 static void b_count(int); 82 static void b_output(int); 83 static size_t b_length; 84 static char *b_fmt; 85 86 static int rval; 87 static char ** gargv, ** firstarg; 88 static int long_double; 89 90 #define ARGNUM ((int)(gargv - firstarg)) 91 92 #ifdef BUILTIN /* csh builtin */ 93 #define main progprintf 94 #endif 95 96 #ifdef SHELL /* sh (aka ash) builtin */ 97 #define main printfcmd 98 #include "../../bin/sh/bltin/bltin.h" 99 #endif /* SHELL */ 100 101 #define PF(f, func) { \ 102 if (fieldwidth != -1) { \ 103 if (precision != -1) \ 104 error = printf(f, fieldwidth, precision, func); \ 105 else \ 106 error = printf(f, fieldwidth, func); \ 107 } else if (precision != -1) \ 108 error = printf(f, precision, func); \ 109 else \ 110 error = printf(f, func); \ 111 } 112 113 #define APF(cpp, f, func) { \ 114 if (fieldwidth != -1) { \ 115 if (precision != -1) \ 116 error = asprintf(cpp, f, fieldwidth, precision, func); \ 117 else \ 118 error = asprintf(cpp, f, fieldwidth, func); \ 119 } else if (precision != -1) \ 120 error = asprintf(cpp, f, precision, func); \ 121 else \ 122 error = asprintf(cpp, f, func); \ 123 } 124 125 #define isodigit(c) ((c) >= '0' && (c) <= '7') 126 #define octtobin(c) ((c) - '0') 127 #define check(c, a) (c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10 128 #define hextobin(c) (check(c, 'a') : check(c, 'A') : (c) - '0') 129 130 #ifdef main 131 int main(int, char *[]); 132 #endif 133 134 int 135 main(int argc, char *argv[]) 136 { 137 char *fmt, *start; 138 int fieldwidth, precision; 139 char nextch; 140 char *format; 141 char ch; 142 int error; 143 144 #if !defined(SHELL) && !defined(BUILTIN) 145 (void)setlocale (LC_ALL, ""); 146 #endif 147 148 rval = 0; /* clear for builtin versions (avoid holdover) */ 149 long_double = 0; 150 clearerr(stdout); /* for the builtin version */ 151 152 if (argc > 2 && strchr(argv[1], '%') == NULL) { 153 int o; 154 155 /* 156 * We only do this for argc > 2, as: 157 * 158 * for argc <= 1 159 * at best we have a bare "printf" so there cannot be 160 * any options, thus getopts() would be a waste of time. 161 * The usage() below is assured. 162 * 163 * for argc == 2 164 * There is only one arg (argv[1]) which logically must 165 * be intended to be the (required) format string for 166 * printf, without which we can do nothing so rather 167 * than usage() if it happens to start with a '-' we 168 * just avoid getopts() and treat it as a format string. 169 * 170 * Then, for argc > 2, we also skip this if there is a '%' 171 * anywhere in argv[1] as it is likely that would be intended 172 * to be the format string, rather than options, even if it 173 * starts with a '-' so we skip getopts() in that case as well. 174 * 175 * Note that this would fail should there ever be an option 176 * which takes an arbitrary string value, which could be given 177 * as -Oabc%def so should that ever become possible, remove 178 * the strchr() test above. 179 */ 180 181 while ((o = getopt(argc, argv, "L")) != -1) { 182 switch (o) { 183 case 'L': 184 long_double = 1; 185 break; 186 case '?': 187 default: 188 usage(); 189 return 1; 190 } 191 } 192 argc -= optind; 193 argv += optind; 194 } else { 195 argc -= 1; /* drop argv[0] (the program name) */ 196 argv += 1; 197 } 198 199 if (argc < 1) { /* Nothing left at all? */ 200 usage(); 201 return 1; 202 } 203 204 format = *argv; /* First remaining arg is the format string */ 205 firstarg = gargv = ++argv; /* remaining args are for that to consume */ 206 207 #define SKIP1 "#-+ 0'" 208 #define SKIP2 "0123456789" 209 do { 210 /* 211 * Basic algorithm is to scan the format string for conversion 212 * specifications -- once one is found, find out if the field 213 * width or precision is a '*'; if it is, gather up value. 214 * Note, format strings are reused as necessary to use up the 215 * provided arguments, arguments of zero/null string are 216 * provided to use up the format string. 217 */ 218 219 /* find next format specification */ 220 for (fmt = format; (ch = *fmt++) != '\0';) { 221 if (ch == '\\') { 222 char c_ch; 223 224 fmt = conv_escape(fmt, &c_ch, 0); 225 putchar(c_ch); 226 continue; 227 } 228 if (ch != '%' || (*fmt == '%' && ++fmt)) { 229 (void)putchar(ch); 230 continue; 231 } 232 233 /* 234 * Ok - we've found a format specification, 235 * Save its address for a later printf(). 236 */ 237 start = fmt - 1; 238 239 /* skip to field width */ 240 fmt += strspn(fmt, SKIP1); 241 if (*fmt == '*') { 242 fmt++; 243 fieldwidth = getwidth(); 244 } else { 245 fieldwidth = -1; 246 247 /* skip to possible '.' for precision */ 248 fmt += strspn(fmt, SKIP2); 249 } 250 251 if (*fmt == '.') { 252 /* get following precision */ 253 fmt++; 254 if (*fmt == '*') { 255 fmt++; 256 precision = getwidth(); 257 } else { 258 precision = -1; 259 fmt += strspn(fmt, SKIP2); 260 } 261 } else 262 precision = -1; 263 264 ch = *fmt; 265 if (!ch) { 266 warnx("%s: missing format character", start); 267 return 1; 268 } 269 270 /* 271 * null terminate format string to we can use it 272 * as an argument to printf. 273 */ 274 nextch = fmt[1]; 275 fmt[1] = 0; 276 277 switch (ch) { 278 279 case 'B': { 280 const char *p = conv_expand(getstr()); 281 282 if (p == NULL) 283 goto out; 284 *fmt = 's'; 285 PF(start, p); 286 if (error < 0) 287 goto out; 288 break; 289 } 290 case 'b': { 291 /* 292 * There has to be a better way to do this, 293 * but the string we generate might have 294 * embedded nulls 295 */ 296 static char *a, *t; 297 char *cp = getstr(); 298 299 /* Free on entry in case shell longjumped out */ 300 if (a != NULL) 301 free(a); 302 a = NULL; 303 if (t != NULL) 304 free(t); 305 t = NULL; 306 307 /* Count number of bytes we want to output */ 308 b_length = 0; 309 conv_escape_str(cp, b_count, 0); 310 t = malloc(b_length + 1); 311 if (t == NULL) 312 goto out; 313 (void)memset(t, 'x', b_length); 314 t[b_length] = 0; 315 316 /* Get printf to calculate the lengths */ 317 *fmt = 's'; 318 APF(&a, start, t); 319 if (error == -1) 320 goto out; 321 b_fmt = a; 322 323 /* Output leading spaces and data bytes */ 324 conv_escape_str(cp, b_output, 1); 325 326 /* Add any trailing spaces */ 327 printf("%s", b_fmt); 328 break; 329 } 330 case 'C': { 331 wchar_t p = (wchar_t)getintmax(); 332 char *f = mklong(start, 'c', 'l'); 333 334 PF(f, p); 335 if (error < 0) 336 goto out; 337 break; 338 } 339 case 'c': { 340 wchar_t p = getchr(); 341 char *f = mklong(start, ch, 'l'); 342 343 PF(f, p); 344 if (error < 0) 345 goto out; 346 break; 347 } 348 case 's': { 349 char *p = getstr(); 350 351 PF(start, p); 352 if (error < 0) 353 goto out; 354 break; 355 } 356 case 'd': 357 case 'i': { 358 intmax_t p = getintmax(); 359 char *f = mklong(start, ch, 'j'); 360 361 PF(f, p); 362 if (error < 0) 363 goto out; 364 break; 365 } 366 case 'o': 367 case 'u': 368 case 'x': 369 case 'X': { 370 uintmax_t p = (uintmax_t)getintmax(); 371 char *f = mklong(start, ch, 'j'); 372 373 PF(f, p); 374 if (error < 0) 375 goto out; 376 break; 377 } 378 case 'a': 379 case 'A': 380 case 'e': 381 case 'E': 382 case 'f': 383 case 'F': 384 case 'g': 385 case 'G': { 386 long double p = getdouble(); 387 388 if (long_double) { 389 char * f = mklong(start, ch, 'L'); 390 PF(f, p); 391 } else { 392 double pp = (double)p; 393 PF(start, pp); 394 } 395 if (error < 0) 396 goto out; 397 break; 398 } 399 case '%': 400 /* Don't ask, but this is useful ... */ 401 if (fieldwidth == 'N' && precision == 'B') 402 return 0; 403 /* FALLTHROUGH */ 404 default: 405 warnx("%s: invalid directive", start); 406 return 1; 407 } 408 *fmt++ = ch; 409 *fmt = nextch; 410 /* escape if a \c was encountered */ 411 if (rval & 0x100) 412 goto done; 413 } 414 } while (gargv != argv && *gargv); 415 416 done:; 417 (void)fflush(stdout); 418 if (ferror(stdout)) { 419 clearerr(stdout); 420 err(1, "write error"); 421 } 422 return rval & ~0x100; 423 out:; 424 warn("print failed"); 425 return 1; 426 } 427 428 /* helper functions for conv_escape_str */ 429 430 static void 431 /*ARGSUSED*/ 432 b_count(int ch) 433 { 434 b_length++; 435 } 436 437 /* Output one converted character for every 'x' in the 'format' */ 438 439 static void 440 b_output(int ch) 441 { 442 for (;;) { 443 switch (*b_fmt++) { 444 case 0: 445 b_fmt--; 446 return; 447 case ' ': 448 putchar(' '); 449 break; 450 default: 451 putchar(ch); 452 return; 453 } 454 } 455 } 456 457 458 /* 459 * Print SysV echo(1) style escape string 460 * Halts processing string if a \c escape is encountered. 461 */ 462 static void 463 conv_escape_str(char *str, void (*do_putchar)(int), int quiet) 464 { 465 int value; 466 int ch; 467 char c; 468 469 while ((ch = *str++) != '\0') { 470 if (ch != '\\') { 471 do_putchar(ch); 472 continue; 473 } 474 475 ch = *str++; 476 if (ch == 'c') { 477 /* \c as in SYSV echo - abort all processing.... */ 478 rval |= 0x100; 479 break; 480 } 481 482 /* 483 * %b string octal constants are not like those in C. 484 * They start with a \0, and are followed by 0, 1, 2, 485 * or 3 octal digits. 486 */ 487 if (ch == '0') { 488 int octnum = 0, i; 489 490 for (i = 0; i < 3; i++) { 491 if (!isdigit((unsigned char)*str) || *str > '7') 492 break; 493 octnum = (octnum << 3) | (*str++ - '0'); 494 } 495 do_putchar(octnum); 496 continue; 497 } 498 499 /* \[M][^|-]C as defined by vis(3) */ 500 if (ch == 'M' && *str == '-') { 501 do_putchar(0200 | str[1]); 502 str += 2; 503 continue; 504 } 505 if (ch == 'M' && *str == '^') { 506 str++; 507 value = 0200; 508 ch = '^'; 509 } else 510 value = 0; 511 if (ch == '^') { 512 ch = *str++; 513 if (ch == '?') 514 value |= 0177; 515 else 516 value |= ch & 037; 517 do_putchar(value); 518 continue; 519 } 520 521 /* Finally test for sequences valid in the format string */ 522 str = conv_escape(str - 1, &c, quiet); 523 do_putchar(c); 524 } 525 } 526 527 /* 528 * Print "standard" escape characters 529 */ 530 static char * 531 conv_escape(char *str, char *conv_ch, int quiet) 532 { 533 int value = 0; 534 char ch, *begin; 535 int c; 536 537 ch = *str++; 538 539 switch (ch) { 540 case '\0': 541 if (!quiet) 542 warnx("incomplete escape sequence"); 543 rval = 1; 544 value = '\\'; 545 --str; 546 break; 547 548 case '0': case '1': case '2': case '3': 549 case '4': case '5': case '6': case '7': 550 str--; 551 for (c = 3; c-- && isodigit(*str); str++) { 552 value <<= 3; 553 value += octtobin(*str); 554 } 555 break; 556 557 case 'x': 558 /* 559 * Hexadecimal character constants are not required to be 560 * supported (by SuS v1) because there is no consistent 561 * way to detect the end of the constant. 562 * Supporting 2 byte constants is a compromise. 563 */ 564 begin = str; 565 for (c = 2; c-- && isxdigit((unsigned char)*str); str++) { 566 value <<= 4; 567 value += hextobin(*str); 568 } 569 if (str == begin) { 570 if (!quiet) 571 warnx("\\x%s: missing hexadecimal number " 572 "in escape", begin); 573 rval = 1; 574 } 575 break; 576 577 case '\\': value = '\\'; break; /* backslash */ 578 case '\'': value = '\''; break; /* single quote */ 579 case '"': value = '"'; break; /* double quote */ 580 case 'a': value = '\a'; break; /* alert */ 581 case 'b': value = '\b'; break; /* backspace */ 582 case 'e': value = ESCAPE; break; /* escape */ 583 case 'E': value = ESCAPE; break; /* escape */ 584 case 'f': value = '\f'; break; /* form-feed */ 585 case 'n': value = '\n'; break; /* newline */ 586 case 'r': value = '\r'; break; /* carriage-return */ 587 case 't': value = '\t'; break; /* tab */ 588 case 'v': value = '\v'; break; /* vertical-tab */ 589 590 default: 591 if (!quiet) 592 warnx("unknown escape sequence `\\%c'", ch); 593 rval = 1; 594 value = ch; 595 break; 596 } 597 598 *conv_ch = (char)value; 599 return str; 600 } 601 602 /* expand a string so that everything is printable */ 603 604 static char * 605 conv_expand(const char *str) 606 { 607 static char *conv_str; 608 char *cp; 609 char ch; 610 611 if (conv_str) 612 free(conv_str); 613 /* get a buffer that is definitely large enough.... */ 614 conv_str = malloc(4 * strlen(str) + 1); 615 if (!conv_str) 616 return NULL; 617 cp = conv_str; 618 619 while ((ch = *(const char *)str++) != '\0') { 620 switch (ch) { 621 /* Use C escapes for expected control characters */ 622 case '\\': ch = '\\'; break; /* backslash */ 623 case '\'': ch = '\''; break; /* single quote */ 624 case '"': ch = '"'; break; /* double quote */ 625 case '\a': ch = 'a'; break; /* alert */ 626 case '\b': ch = 'b'; break; /* backspace */ 627 case ESCAPE: ch = 'e'; break; /* escape */ 628 case '\f': ch = 'f'; break; /* form-feed */ 629 case '\n': ch = 'n'; break; /* newline */ 630 case '\r': ch = 'r'; break; /* carriage-return */ 631 case '\t': ch = 't'; break; /* tab */ 632 case '\v': ch = 'v'; break; /* vertical-tab */ 633 default: 634 /* Copy anything printable */ 635 if (isprint((unsigned char)ch)) { 636 *cp++ = ch; 637 continue; 638 } 639 /* Use vis(3) encodings for the rest */ 640 *cp++ = '\\'; 641 if (ch & 0200) { 642 *cp++ = 'M'; 643 ch &= (char)~0200; 644 } 645 if (ch == 0177) { 646 *cp++ = '^'; 647 *cp++ = '?'; 648 continue; 649 } 650 if (ch < 040) { 651 *cp++ = '^'; 652 *cp++ = ch | 0100; 653 continue; 654 } 655 *cp++ = '-'; 656 *cp++ = ch; 657 continue; 658 } 659 *cp++ = '\\'; 660 *cp++ = ch; 661 } 662 663 *cp = 0; 664 return conv_str; 665 } 666 667 static char * 668 mklong(const char *str, char ch, char longer) 669 { 670 static char copy[64]; 671 size_t len; 672 673 len = strlen(str) + 2; 674 if (len > sizeof copy) { 675 warnx("format \"%s\" too complex", str); 676 len = 4; 677 rval = 1; 678 } 679 (void)memmove(copy, str, len - 3); 680 copy[len - 3] = longer; 681 copy[len - 2] = ch; 682 copy[len - 1] = '\0'; 683 return copy; 684 } 685 686 static wchar_t 687 getchr(void) 688 { 689 if (!*gargv) 690 return 0; 691 return (wchar_t)wide_char(*gargv++, 0); 692 } 693 694 static char * 695 getstr(void) 696 { 697 static char empty[] = ""; 698 699 if (!*gargv) 700 return empty; 701 return *gargv++; 702 } 703 704 static int 705 getwidth(void) 706 { 707 unsigned long val; 708 char *s, *ep; 709 710 s = *gargv; 711 if (s == NULL) 712 return 0; 713 gargv++; 714 715 errno = 0; 716 val = strtoul(s, &ep, 0); 717 if (!isdigit(*(unsigned char *)s)) { 718 warnx("Arg %d: '%s' value for '*' width/precision" 719 " must be an unsigned integer", ARGNUM, s); 720 rval = 1; 721 val = 0; 722 } else 723 check_conversion(s, ep); 724 725 /* Arbitrarily 'restrict' field widths to 1Mbyte */ 726 if (val > 1 << 20) { 727 warnx("Arg %d: %s: invalid field width/precision", ARGNUM, s); 728 rval = 1; 729 return 0; 730 } 731 732 return (int)val; 733 } 734 735 static intmax_t 736 getintmax(void) 737 { 738 intmax_t val; 739 char *cp, *ep; 740 741 cp = *gargv; 742 if (cp == NULL) 743 return 0; 744 gargv++; 745 746 if (*cp == '\"' || *cp == '\'') 747 return wide_char(cp, 1); 748 749 errno = 0; 750 val = strtoimax(cp, &ep, 0); 751 if (*cp != '+' && *cp != '-' && !isdigit(*(unsigned char *)cp)) { 752 warnx("Arg %d: '%s' numeric value required", ARGNUM, cp); 753 rval = 1; 754 } else 755 check_conversion(cp, ep); 756 return val; 757 } 758 759 static long double 760 getdouble(void) 761 { 762 long double val; 763 char *ep; 764 765 if (!*gargv) 766 return 0.0; 767 768 /* This is a NetBSD extension, not required by POSIX (it is useless) */ 769 if (*(ep = *gargv) == '\"' || *ep == '\'') 770 return (long double)wide_char(ep, 1); 771 772 errno = 0; 773 val = strtold(*gargv, &ep); 774 check_conversion(*gargv++, ep); 775 return val; 776 } 777 778 /* 779 * Fetch a wide character from the string given 780 * 781 * if all that character must consume the entire string 782 * after an initial leading byte (ascii char) is ignored, 783 * (used for parsing intger args using the 'X syntax) 784 * 785 * if !all then there is no requirement that the whole 786 * string be consumed (remaining characters are just ignored) 787 * but the character is to start at *p. 788 * (used for fetching the first chartacter of a string arg for %c) 789 */ 790 static intmax_t 791 wide_char(const char *p, int all) 792 { 793 wchar_t wch; 794 size_t len; 795 int n; 796 797 (void)mbtowc(NULL, NULL, 0); 798 n = mbtowc(&wch, p + all, (len = strlen(p + all)) + 1); 799 if (n < 0) { 800 warn("Arg %d: %s", ARGNUM, p); 801 rval = 1; 802 } else if (all && (size_t)n != len) { 803 warnx("Arg %d: %s: not completely converted", 804 ARGNUM, p); 805 rval = 1; 806 } 807 808 return (intmax_t) wch; 809 } 810 811 static void 812 check_conversion(const char *s, const char *ep) 813 { 814 if (!*s) { 815 warnx("Arg %d: unexpected empty value ('')", ARGNUM); 816 rval = 1; 817 return; 818 } 819 820 if (*ep) { 821 if (ep == s) 822 warnx("Arg %d: %s: numeric value expected", ARGNUM, s); 823 else 824 warnx("Arg %d: %s: not completely converted", 825 ARGNUM, s); 826 rval = 1; 827 return; 828 } 829 830 if (errno == ERANGE) { 831 warnx("Arg %d: %s: %s", ARGNUM, s, strerror(ERANGE)); 832 rval = 1; 833 } 834 } 835 836 static void 837 usage(void) 838 { 839 (void)fprintf(stderr, 840 "Usage: %s [-L] format [arg ...]\n", getprogname()); 841 } 842