1 /* $NetBSD: printf.c,v 1.58 2024/08/07 15:40:03 kre Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 #if !defined(BUILTIN) && !defined(SHELL) 35 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 36 The Regents of the University of California. All rights reserved."); 37 #endif 38 #endif 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95"; 43 #else 44 __RCSID("$NetBSD: printf.c,v 1.58 2024/08/07 15:40:03 kre Exp $"); 45 #endif 46 #endif /* not lint */ 47 48 #include <sys/types.h> 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <errno.h> 53 #include <inttypes.h> 54 #include <limits.h> 55 #include <locale.h> 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 62 #ifdef __GNUC__ 63 #define ESCAPE '\e' 64 #else 65 #define ESCAPE 033 66 #endif 67 68 static void conv_escape_str(char *, void (*)(int), int); 69 static char *conv_escape(char *, char *, int); 70 static char *conv_expand(const char *); 71 static wchar_t getchr(void); 72 static long double getdouble(void); 73 static int getwidth(void); 74 static intmax_t getintmax(void); 75 static char *getstr(void); 76 static char *mklong(const char *, char, char); 77 static intmax_t wide_char(const char *, int); 78 static void check_conversion(const char *, const char *); 79 static void usage(void); 80 81 static void b_count(int); 82 static void b_output(int); 83 static size_t b_length; 84 static char *b_fmt; 85 86 static int rval; 87 static char **gargv; 88 static int long_double; 89 90 #ifdef BUILTIN /* csh builtin */ 91 #define main progprintf 92 #endif 93 94 #ifdef SHELL /* sh (aka ash) builtin */ 95 #define main printfcmd 96 #include "../../bin/sh/bltin/bltin.h" 97 #endif /* SHELL */ 98 99 #define PF(f, func) { \ 100 if (fieldwidth != -1) { \ 101 if (precision != -1) \ 102 error = printf(f, fieldwidth, precision, func); \ 103 else \ 104 error = printf(f, fieldwidth, func); \ 105 } else if (precision != -1) \ 106 error = printf(f, precision, func); \ 107 else \ 108 error = printf(f, func); \ 109 } 110 111 #define APF(cpp, f, func) { \ 112 if (fieldwidth != -1) { \ 113 if (precision != -1) \ 114 error = asprintf(cpp, f, fieldwidth, precision, func); \ 115 else \ 116 error = asprintf(cpp, f, fieldwidth, func); \ 117 } else if (precision != -1) \ 118 error = asprintf(cpp, f, precision, func); \ 119 else \ 120 error = asprintf(cpp, f, func); \ 121 } 122 123 #define isodigit(c) ((c) >= '0' && (c) <= '7') 124 #define octtobin(c) ((c) - '0') 125 #define check(c, a) (c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10 126 #define hextobin(c) (check(c, 'a') : check(c, 'A') : (c) - '0') 127 #ifdef main 128 int main(int, char *[]); 129 #endif 130 131 int 132 main(int argc, char *argv[]) 133 { 134 char *fmt, *start; 135 int fieldwidth, precision; 136 char nextch; 137 char *format; 138 char ch; 139 int error; 140 141 #if !defined(SHELL) && !defined(BUILTIN) 142 (void)setlocale (LC_ALL, ""); 143 #endif 144 145 rval = 0; /* clear for builtin versions (avoid holdover) */ 146 long_double = 0; 147 clearerr(stdout); /* for the builtin version */ 148 149 if (argc > 2 && strchr(argv[1], '%') == NULL) { 150 int o; 151 152 /* 153 * We only do this for argc > 2, as: 154 * 155 * for argc <= 1 156 * at best we have a bare "printf" so there cannot be 157 * any options, thus getopts() would be a waste of time. 158 * The usage() below is assured. 159 * 160 * for argc == 2 161 * There is only one arg (argv[1]) which logically must 162 * be intended to be the (required) format string for 163 * printf, without which we can do nothing so rather 164 * than usage() if it happens to start with a '-' we 165 * just avoid getopts() and treat it as a format string. 166 * 167 * Then, for argc > 2, we also skip this if there is a '%' 168 * anywhere in argv[1] as it is likely that would be intended 169 * to be the format string, rather than options, even if it 170 * starts with a '-' so we skip getopts() in that case as well. 171 * 172 * Note that this would fail should there ever be an option 173 * which takes an arbitrary string value, which could be given 174 * as -Oabc%def so should that ever become possible, remove 175 * the strchr() test above. 176 */ 177 178 while ((o = getopt(argc, argv, "L")) != -1) { 179 switch (o) { 180 case 'L': 181 long_double = 1; 182 break; 183 case '?': 184 default: 185 usage(); 186 return 1; 187 } 188 } 189 argc -= optind; 190 argv += optind; 191 } else { 192 argc -= 1; /* drop argv[0] (the program name) */ 193 argv += 1; 194 } 195 196 if (argc < 1) { /* Nothing left at all? */ 197 usage(); 198 return 1; 199 } 200 201 format = *argv; /* First remaining arg is the format string */ 202 gargv = ++argv; /* remaining args are for that to consume */ 203 204 #define SKIP1 "#-+ 0'" 205 #define SKIP2 "0123456789" 206 do { 207 /* 208 * Basic algorithm is to scan the format string for conversion 209 * specifications -- once one is found, find out if the field 210 * width or precision is a '*'; if it is, gather up value. 211 * Note, format strings are reused as necessary to use up the 212 * provided arguments, arguments of zero/null string are 213 * provided to use up the format string. 214 */ 215 216 /* find next format specification */ 217 for (fmt = format; (ch = *fmt++) != '\0';) { 218 if (ch == '\\') { 219 char c_ch; 220 fmt = conv_escape(fmt, &c_ch, 0); 221 putchar(c_ch); 222 continue; 223 } 224 if (ch != '%' || (*fmt == '%' && ++fmt)) { 225 (void)putchar(ch); 226 continue; 227 } 228 229 /* 230 * Ok - we've found a format specification, 231 * Save its address for a later printf(). 232 */ 233 start = fmt - 1; 234 235 /* skip to field width */ 236 fmt += strspn(fmt, SKIP1); 237 if (*fmt == '*') { 238 fmt++; 239 fieldwidth = getwidth(); 240 } else { 241 fieldwidth = -1; 242 243 /* skip to possible '.' for precision */ 244 fmt += strspn(fmt, SKIP2); 245 } 246 247 if (*fmt == '.') { 248 /* get following precision */ 249 fmt++; 250 if (*fmt == '*') { 251 fmt++; 252 precision = getwidth(); 253 } else { 254 precision = -1; 255 fmt += strspn(fmt, SKIP2); 256 } 257 } else 258 precision = -1; 259 260 ch = *fmt; 261 if (!ch) { 262 warnx("%s: missing format character", start); 263 return 1; 264 } 265 266 /* 267 * null terminate format string to we can use it 268 * as an argument to printf. 269 */ 270 nextch = fmt[1]; 271 fmt[1] = 0; 272 273 switch (ch) { 274 275 case 'B': { 276 const char *p = conv_expand(getstr()); 277 278 if (p == NULL) 279 goto out; 280 *fmt = 's'; 281 PF(start, p); 282 if (error < 0) 283 goto out; 284 break; 285 } 286 case 'b': { 287 /* 288 * There has to be a better way to do this, 289 * but the string we generate might have 290 * embedded nulls 291 */ 292 static char *a, *t; 293 char *cp = getstr(); 294 295 /* Free on entry in case shell longjumped out */ 296 if (a != NULL) 297 free(a); 298 a = NULL; 299 if (t != NULL) 300 free(t); 301 t = NULL; 302 303 /* Count number of bytes we want to output */ 304 b_length = 0; 305 conv_escape_str(cp, b_count, 0); 306 t = malloc(b_length + 1); 307 if (t == NULL) 308 goto out; 309 (void)memset(t, 'x', b_length); 310 t[b_length] = 0; 311 312 /* Get printf to calculate the lengths */ 313 *fmt = 's'; 314 APF(&a, start, t); 315 if (error == -1) 316 goto out; 317 b_fmt = a; 318 319 /* Output leading spaces and data bytes */ 320 conv_escape_str(cp, b_output, 1); 321 322 /* Add any trailing spaces */ 323 printf("%s", b_fmt); 324 break; 325 } 326 case 'C': { 327 wchar_t p = (wchar_t)getintmax(); 328 char *f = mklong(start, 'c', 'l'); 329 330 PF(f, p); 331 if (error < 0) 332 goto out; 333 break; 334 } 335 case 'c': { 336 wchar_t p = getchr(); 337 char *f = mklong(start, ch, 'l'); 338 339 PF(f, p); 340 if (error < 0) 341 goto out; 342 break; 343 } 344 case 's': { 345 char *p = getstr(); 346 347 PF(start, p); 348 if (error < 0) 349 goto out; 350 break; 351 } 352 case 'd': 353 case 'i': { 354 intmax_t p = getintmax(); 355 char *f = mklong(start, ch, 'j'); 356 357 PF(f, p); 358 if (error < 0) 359 goto out; 360 break; 361 } 362 case 'o': 363 case 'u': 364 case 'x': 365 case 'X': { 366 uintmax_t p = (uintmax_t)getintmax(); 367 char *f = mklong(start, ch, 'j'); 368 369 PF(f, p); 370 if (error < 0) 371 goto out; 372 break; 373 } 374 case 'a': 375 case 'A': 376 case 'e': 377 case 'E': 378 case 'f': 379 case 'F': 380 case 'g': 381 case 'G': { 382 long double p = getdouble(); 383 384 if (long_double) { 385 char * f = mklong(start, ch, 'L'); 386 PF(f, p); 387 } else { 388 double pp = (double)p; 389 PF(start, pp); 390 } 391 if (error < 0) 392 goto out; 393 break; 394 } 395 case '%': 396 /* Don't ask, but this is useful ... */ 397 if (fieldwidth == 'N' && precision == 'B') 398 return 0; 399 /* FALLTHROUGH */ 400 default: 401 warnx("%s: invalid directive", start); 402 return 1; 403 } 404 *fmt++ = ch; 405 *fmt = nextch; 406 /* escape if a \c was encountered */ 407 if (rval & 0x100) 408 goto done; 409 } 410 } while (gargv != argv && *gargv); 411 412 done: 413 (void)fflush(stdout); 414 if (ferror(stdout)) { 415 clearerr(stdout); 416 err(1, "write error"); 417 } 418 return rval & ~0x100; 419 out: 420 warn("print failed"); 421 return 1; 422 } 423 424 /* helper functions for conv_escape_str */ 425 426 static void 427 /*ARGSUSED*/ 428 b_count(int ch) 429 { 430 b_length++; 431 } 432 433 /* Output one converted character for every 'x' in the 'format' */ 434 435 static void 436 b_output(int ch) 437 { 438 for (;;) { 439 switch (*b_fmt++) { 440 case 0: 441 b_fmt--; 442 return; 443 case ' ': 444 putchar(' '); 445 break; 446 default: 447 putchar(ch); 448 return; 449 } 450 } 451 } 452 453 454 /* 455 * Print SysV echo(1) style escape string 456 * Halts processing string if a \c escape is encountered. 457 */ 458 static void 459 conv_escape_str(char *str, void (*do_putchar)(int), int quiet) 460 { 461 int value; 462 int ch; 463 char c; 464 465 while ((ch = *str++) != '\0') { 466 if (ch != '\\') { 467 do_putchar(ch); 468 continue; 469 } 470 471 ch = *str++; 472 if (ch == 'c') { 473 /* \c as in SYSV echo - abort all processing.... */ 474 rval |= 0x100; 475 break; 476 } 477 478 /* 479 * %b string octal constants are not like those in C. 480 * They start with a \0, and are followed by 0, 1, 2, 481 * or 3 octal digits. 482 */ 483 if (ch == '0') { 484 int octnum = 0, i; 485 for (i = 0; i < 3; i++) { 486 if (!isdigit((unsigned char)*str) || *str > '7') 487 break; 488 octnum = (octnum << 3) | (*str++ - '0'); 489 } 490 do_putchar(octnum); 491 continue; 492 } 493 494 /* \[M][^|-]C as defined by vis(3) */ 495 if (ch == 'M' && *str == '-') { 496 do_putchar(0200 | str[1]); 497 str += 2; 498 continue; 499 } 500 if (ch == 'M' && *str == '^') { 501 str++; 502 value = 0200; 503 ch = '^'; 504 } else 505 value = 0; 506 if (ch == '^') { 507 ch = *str++; 508 if (ch == '?') 509 value |= 0177; 510 else 511 value |= ch & 037; 512 do_putchar(value); 513 continue; 514 } 515 516 /* Finally test for sequences valid in the format string */ 517 str = conv_escape(str - 1, &c, quiet); 518 do_putchar(c); 519 } 520 } 521 522 /* 523 * Print "standard" escape characters 524 */ 525 static char * 526 conv_escape(char *str, char *conv_ch, int quiet) 527 { 528 int value = 0; 529 char ch, *begin; 530 int c; 531 532 ch = *str++; 533 534 switch (ch) { 535 case '\0': 536 if (!quiet) 537 warnx("incomplete escape sequence"); 538 rval = 1; 539 value = '\\'; 540 --str; 541 break; 542 543 case '0': case '1': case '2': case '3': 544 case '4': case '5': case '6': case '7': 545 str--; 546 for (c = 3; c-- && isodigit(*str); str++) { 547 value <<= 3; 548 value += octtobin(*str); 549 } 550 break; 551 552 case 'x': 553 /* 554 * Hexadecimal character constants are not required to be 555 * supported (by SuS v1) because there is no consistent 556 * way to detect the end of the constant. 557 * Supporting 2 byte constants is a compromise. 558 */ 559 begin = str; 560 for (c = 2; c-- && isxdigit((unsigned char)*str); str++) { 561 value <<= 4; 562 value += hextobin(*str); 563 } 564 if (str == begin) { 565 if (!quiet) 566 warnx("\\x%s: missing hexadecimal number " 567 "in escape", begin); 568 rval = 1; 569 } 570 break; 571 572 case '\\': value = '\\'; break; /* backslash */ 573 case '\'': value = '\''; break; /* single quote */ 574 case '"': value = '"'; break; /* double quote */ 575 case 'a': value = '\a'; break; /* alert */ 576 case 'b': value = '\b'; break; /* backspace */ 577 case 'e': value = ESCAPE; break; /* escape */ 578 case 'E': value = ESCAPE; break; /* escape */ 579 case 'f': value = '\f'; break; /* form-feed */ 580 case 'n': value = '\n'; break; /* newline */ 581 case 'r': value = '\r'; break; /* carriage-return */ 582 case 't': value = '\t'; break; /* tab */ 583 case 'v': value = '\v'; break; /* vertical-tab */ 584 585 default: 586 if (!quiet) 587 warnx("unknown escape sequence `\\%c'", ch); 588 rval = 1; 589 value = ch; 590 break; 591 } 592 593 *conv_ch = (char)value; 594 return str; 595 } 596 597 /* expand a string so that everything is printable */ 598 599 static char * 600 conv_expand(const char *str) 601 { 602 static char *conv_str; 603 char *cp; 604 char ch; 605 606 if (conv_str) 607 free(conv_str); 608 /* get a buffer that is definitely large enough.... */ 609 conv_str = malloc(4 * strlen(str) + 1); 610 if (!conv_str) 611 return NULL; 612 cp = conv_str; 613 614 while ((ch = *(const char *)str++) != '\0') { 615 switch (ch) { 616 /* Use C escapes for expected control characters */ 617 case '\\': ch = '\\'; break; /* backslash */ 618 case '\'': ch = '\''; break; /* single quote */ 619 case '"': ch = '"'; break; /* double quote */ 620 case '\a': ch = 'a'; break; /* alert */ 621 case '\b': ch = 'b'; break; /* backspace */ 622 case ESCAPE: ch = 'e'; break; /* escape */ 623 case '\f': ch = 'f'; break; /* form-feed */ 624 case '\n': ch = 'n'; break; /* newline */ 625 case '\r': ch = 'r'; break; /* carriage-return */ 626 case '\t': ch = 't'; break; /* tab */ 627 case '\v': ch = 'v'; break; /* vertical-tab */ 628 default: 629 /* Copy anything printable */ 630 if (isprint((unsigned char)ch)) { 631 *cp++ = ch; 632 continue; 633 } 634 /* Use vis(3) encodings for the rest */ 635 *cp++ = '\\'; 636 if (ch & 0200) { 637 *cp++ = 'M'; 638 ch &= (char)~0200; 639 } 640 if (ch == 0177) { 641 *cp++ = '^'; 642 *cp++ = '?'; 643 continue; 644 } 645 if (ch < 040) { 646 *cp++ = '^'; 647 *cp++ = ch | 0100; 648 continue; 649 } 650 *cp++ = '-'; 651 *cp++ = ch; 652 continue; 653 } 654 *cp++ = '\\'; 655 *cp++ = ch; 656 } 657 658 *cp = 0; 659 return conv_str; 660 } 661 662 static char * 663 mklong(const char *str, char ch, char longer) 664 { 665 static char copy[64]; 666 size_t len; 667 668 len = strlen(str) + 2; 669 if (len > sizeof copy) { 670 warnx("format \"%s\" too complex", str); 671 len = 4; 672 rval = 1; 673 } 674 (void)memmove(copy, str, len - 3); 675 copy[len - 3] = longer; 676 copy[len - 2] = ch; 677 copy[len - 1] = '\0'; 678 return copy; 679 } 680 681 static wchar_t 682 getchr(void) 683 { 684 if (!*gargv) 685 return 0; 686 return (wchar_t)wide_char(*gargv++, 0); 687 } 688 689 static char * 690 getstr(void) 691 { 692 static char empty[] = ""; 693 if (!*gargv) 694 return empty; 695 return *gargv++; 696 } 697 698 static int 699 getwidth(void) 700 { 701 unsigned long val; 702 char *s, *ep; 703 704 s = *gargv; 705 if (s == NULL) 706 return 0; 707 gargv++; 708 709 errno = 0; 710 val = strtoul(s, &ep, 0); 711 check_conversion(s, ep); 712 713 /* Arbitrarily 'restrict' field widths to 1Mbyte */ 714 if (val > 1 << 20) { 715 warnx("%s: invalid field width", s); 716 return 0; 717 } 718 719 return (int)val; 720 } 721 722 static intmax_t 723 getintmax(void) 724 { 725 intmax_t val; 726 char *cp, *ep; 727 728 cp = *gargv; 729 if (cp == NULL) 730 return 0; 731 gargv++; 732 733 if (*cp == '\"' || *cp == '\'') 734 return wide_char(cp, 1); 735 736 errno = 0; 737 val = strtoimax(cp, &ep, 0); 738 check_conversion(cp, ep); 739 return val; 740 } 741 742 static long double 743 getdouble(void) 744 { 745 long double val; 746 char *ep; 747 748 if (!*gargv) 749 return 0.0; 750 751 /* This is a NetBSD extension, not required by POSIX (it is useless) */ 752 if (*(ep = *gargv) == '\"' || *ep == '\'') 753 return (long double)wide_char(ep, 1); 754 755 errno = 0; 756 val = strtold(*gargv, &ep); 757 check_conversion(*gargv++, ep); 758 return val; 759 } 760 761 /* 762 * Fetch a wide character from the string given 763 * 764 * if all that character must consume the entire string 765 * after an initial leading byte (ascii char) is ignored, 766 * (used for parsing intger args using the 'X syntax) 767 * 768 * if !all then there is no requirement that the whole 769 * string be consumed (remaining characters are just ignored) 770 * but the character is to start at *p. 771 * (used for fetching the first chartacter of a string arg for %c) 772 */ 773 static intmax_t 774 wide_char(const char *p, int all) 775 { 776 wchar_t wch; 777 size_t len; 778 int n; 779 780 (void)mbtowc(NULL, NULL, 0); 781 n = mbtowc(&wch, p + all, (len = strlen(p + all)) + 1); 782 if (n < 0) { 783 warn("%s", p); 784 rval = -1; 785 } else if (all && (size_t)n != len) { 786 warnx("%s: not completely converted", p); 787 rval = 1; 788 } 789 790 return (intmax_t) wch; 791 } 792 793 static void 794 check_conversion(const char *s, const char *ep) 795 { 796 if (*ep) { 797 if (ep == s) 798 warnx("%s: expected numeric value", s); 799 else 800 warnx("%s: not completely converted", s); 801 rval = 1; 802 } else if (errno == ERANGE) { 803 warnx("%s: %s", s, strerror(ERANGE)); 804 rval = 1; 805 } 806 } 807 808 static void 809 usage(void) 810 { 811 (void)fprintf(stderr, 812 "Usage: %s [-L] format [arg ...]\n", getprogname()); 813 } 814