1 /* $NetBSD: vfscanf.c,v 1.26 2000/01/21 23:12:33 wrstuden Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Torek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #if defined(LIBC_SCCS) && !defined(lint) 41 #if 0 42 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 43 #else 44 __RCSID("$NetBSD: vfscanf.c,v 1.26 2000/01/21 23:12:33 wrstuden Exp $"); 45 #endif 46 #endif /* LIBC_SCCS and not lint */ 47 48 #include "namespace.h" 49 50 #include <assert.h> 51 #include <errno.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <ctype.h> 55 #if __STDC__ 56 #include <stdarg.h> 57 #else 58 #include <varargs.h> 59 #endif 60 61 #include "local.h" 62 #include "reentrant.h" 63 64 #ifdef FLOATING_POINT 65 #include "floatio.h" 66 #endif 67 68 #define BUF 513 /* Maximum length of numeric string. */ 69 70 /* 71 * Flags used during conversion. 72 */ 73 #define LONG 0x01 /* l: long or double */ 74 #define LONGDBL 0x02 /* L: long double; unimplemented */ 75 #define SHORT 0x04 /* h: short */ 76 #define QUAD 0x08 /* q: quad */ 77 #define SUPPRESS 0x10 /* suppress assignment */ 78 #define POINTER 0x20 /* weird %p pointer (`fake hex') */ 79 #define NOSKIP 0x40 /* do not skip blanks */ 80 81 /* 82 * The following are used in numeric conversions only: 83 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 84 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 85 */ 86 #define SIGNOK 0x080 /* +/- is (still) legal */ 87 #define NDIGITS 0x100 /* no digits detected */ 88 89 #define DPTOK 0x200 /* (float) decimal point is still legal */ 90 #define EXPOK 0x400 /* (float) exponent (e+3, etc) still legal */ 91 92 #define PFXOK 0x200 /* 0x prefix is (still) legal */ 93 #define NZDIGITS 0x400 /* no zero digits detected */ 94 95 /* 96 * Conversion types. 97 */ 98 #define CT_CHAR 0 /* %c conversion */ 99 #define CT_CCL 1 /* %[...] conversion */ 100 #define CT_STRING 2 /* %s conversion */ 101 #define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 102 #define CT_FLOAT 4 /* floating, i.e., strtod */ 103 104 #define u_char unsigned char 105 #define u_long unsigned long 106 107 static const u_char *__sccl __P((char *, const u_char *)); 108 109 /* 110 * vfscanf 111 */ 112 int 113 __svfscanf(fp, fmt0, ap) 114 FILE *fp; 115 const char *fmt0; 116 _BSD_VA_LIST_ ap; 117 { 118 const u_char *fmt = (const u_char *)fmt0; 119 int c; /* character from format, or conversion */ 120 size_t width; /* field width, or 0 */ 121 char *p; /* points into all kinds of strings */ 122 int n; /* handy integer */ 123 int flags; /* flags as defined above */ 124 char *p0; /* saves original value of p when necessary */ 125 int nassigned; /* number of fields assigned */ 126 int nread; /* number of characters consumed from fp */ 127 int base; /* base argument to strtoq/strtouq */ 128 u_quad_t (*ccfn) __P((const char *, char **, int)); 129 /* conversion function (strtoq/strtouq) */ 130 char ccltab[256]; /* character class table for %[...] */ 131 char buf[BUF]; /* buffer for numeric conversions */ 132 133 /* `basefix' is used to avoid `if' tests in the integer scanner */ 134 static const short basefix[17] = 135 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 136 137 _DIAGASSERT(fp != NULL); 138 _DIAGASSERT(fmt0 != NULL); 139 140 FLOCKFILE(fp); 141 142 nassigned = 0; 143 nread = 0; 144 base = 0; /* XXX just to keep gcc happy */ 145 ccfn = NULL; /* XXX just to keep gcc happy */ 146 for (;;) { 147 c = *fmt++; 148 if (c == 0) { 149 FUNLOCKFILE(fp); 150 return (nassigned); 151 } 152 if (isspace(c)) { 153 while ((fp->_r > 0 || __srefill(fp) == 0) && 154 isspace(*fp->_p)) 155 nread++, fp->_r--, fp->_p++; 156 continue; 157 } 158 if (c != '%') 159 goto literal; 160 width = 0; 161 flags = 0; 162 /* 163 * switch on the format. continue if done; 164 * break once format type is derived. 165 */ 166 again: c = *fmt++; 167 switch (c) { 168 case '%': 169 literal: 170 if (fp->_r <= 0 && __srefill(fp)) 171 goto input_failure; 172 if (*fp->_p != c) 173 goto match_failure; 174 fp->_r--, fp->_p++; 175 nread++; 176 continue; 177 178 case '*': 179 flags |= SUPPRESS; 180 goto again; 181 case 'L': 182 flags |= LONGDBL; 183 goto again; 184 case 'h': 185 flags |= SHORT; 186 goto again; 187 case 'l': 188 if (*fmt == 'l') { 189 fmt++; 190 flags |= QUAD; 191 } else { 192 flags |= LONG; 193 } 194 goto again; 195 case 'q': 196 flags |= QUAD; 197 goto again; 198 199 case '0': case '1': case '2': case '3': case '4': 200 case '5': case '6': case '7': case '8': case '9': 201 width = width * 10 + c - '0'; 202 goto again; 203 204 /* 205 * Conversions. 206 * Those marked `compat' are for 4.[123]BSD compatibility. 207 * 208 * (According to ANSI, E and X formats are supposed 209 * to the same as e and x. Sorry about that.) 210 */ 211 case 'D': /* compat */ 212 flags |= LONG; 213 /* FALLTHROUGH */ 214 case 'd': 215 c = CT_INT; 216 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq; 217 base = 10; 218 break; 219 220 case 'i': 221 c = CT_INT; 222 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq; 223 base = 0; 224 break; 225 226 case 'O': /* compat */ 227 flags |= LONG; 228 /* FALLTHROUGH */ 229 case 'o': 230 c = CT_INT; 231 ccfn = strtouq; 232 base = 8; 233 break; 234 235 case 'u': 236 c = CT_INT; 237 ccfn = strtouq; 238 base = 10; 239 break; 240 241 case 'X': 242 case 'x': 243 flags |= PFXOK; /* enable 0x prefixing */ 244 c = CT_INT; 245 ccfn = strtouq; 246 base = 16; 247 break; 248 249 #ifdef FLOATING_POINT 250 case 'E': 251 case 'G': 252 case 'e': 253 case 'f': 254 case 'g': 255 c = CT_FLOAT; 256 break; 257 #endif 258 259 case 's': 260 c = CT_STRING; 261 break; 262 263 case '[': 264 fmt = __sccl(ccltab, fmt); 265 flags |= NOSKIP; 266 c = CT_CCL; 267 break; 268 269 case 'c': 270 flags |= NOSKIP; 271 c = CT_CHAR; 272 break; 273 274 case 'p': /* pointer format is like hex */ 275 flags |= POINTER | PFXOK; 276 c = CT_INT; 277 ccfn = strtouq; 278 base = 16; 279 break; 280 281 case 'n': 282 if (flags & SUPPRESS) /* ??? */ 283 continue; 284 if (flags & SHORT) 285 *va_arg(ap, short *) = nread; 286 else if (flags & LONG) 287 *va_arg(ap, long *) = nread; 288 else 289 *va_arg(ap, int *) = nread; 290 continue; 291 292 /* 293 * Disgusting backwards compatibility hacks. XXX 294 */ 295 case '\0': /* compat */ 296 FUNLOCKFILE(fp); 297 return (EOF); 298 299 default: /* compat */ 300 if (isupper(c)) 301 flags |= LONG; 302 c = CT_INT; 303 ccfn = (u_quad_t (*) __P((const char *, char **, int)))strtoq; 304 base = 10; 305 break; 306 } 307 308 /* 309 * We have a conversion that requires input. 310 */ 311 if (fp->_r <= 0 && __srefill(fp)) 312 goto input_failure; 313 314 /* 315 * Consume leading white space, except for formats 316 * that suppress this. 317 */ 318 if ((flags & NOSKIP) == 0) { 319 while (isspace(*fp->_p)) { 320 nread++; 321 if (--fp->_r > 0) 322 fp->_p++; 323 else if (__srefill(fp)) 324 goto input_failure; 325 } 326 /* 327 * Note that there is at least one character in 328 * the buffer, so conversions that do not set NOSKIP 329 * ca no longer result in an input failure. 330 */ 331 } 332 333 /* 334 * Do the conversion. 335 */ 336 switch (c) { 337 338 case CT_CHAR: 339 /* scan arbitrary characters (sets NOSKIP) */ 340 if (width == 0) 341 width = 1; 342 if (flags & SUPPRESS) { 343 size_t sum = 0; 344 for (;;) { 345 if ((n = fp->_r) < width) { 346 sum += n; 347 width -= n; 348 fp->_p += n; 349 if (__srefill(fp)) { 350 if (sum == 0) 351 goto input_failure; 352 break; 353 } 354 } else { 355 sum += width; 356 fp->_r -= width; 357 fp->_p += width; 358 break; 359 } 360 } 361 nread += sum; 362 } else { 363 size_t r = fread((void *)va_arg(ap, char *), 1, 364 width, fp); 365 366 if (r == 0) 367 goto input_failure; 368 nread += r; 369 nassigned++; 370 } 371 break; 372 373 case CT_CCL: 374 /* scan a (nonempty) character class (sets NOSKIP) */ 375 if (width == 0) 376 width = ~0U; /* `infinity' */ 377 /* take only those things in the class */ 378 if (flags & SUPPRESS) { 379 n = 0; 380 while (ccltab[*fp->_p]) { 381 n++, fp->_r--, fp->_p++; 382 if (--width == 0) 383 break; 384 if (fp->_r <= 0 && __srefill(fp)) { 385 if (n == 0) 386 goto input_failure; 387 break; 388 } 389 } 390 if (n == 0) 391 goto match_failure; 392 } else { 393 p0 = p = va_arg(ap, char *); 394 while (ccltab[*fp->_p]) { 395 fp->_r--; 396 *p++ = *fp->_p++; 397 if (--width == 0) 398 break; 399 if (fp->_r <= 0 && __srefill(fp)) { 400 if (p == p0) 401 goto input_failure; 402 break; 403 } 404 } 405 n = p - p0; 406 if (n == 0) 407 goto match_failure; 408 *p = 0; 409 nassigned++; 410 } 411 nread += n; 412 break; 413 414 case CT_STRING: 415 /* like CCL, but zero-length string OK, & no NOSKIP */ 416 if (width == 0) 417 width = ~0U; 418 if (flags & SUPPRESS) { 419 n = 0; 420 while (!isspace(*fp->_p)) { 421 n++, fp->_r--, fp->_p++; 422 if (--width == 0) 423 break; 424 if (fp->_r <= 0 && __srefill(fp)) 425 break; 426 } 427 nread += n; 428 } else { 429 p0 = p = va_arg(ap, char *); 430 while (!isspace(*fp->_p)) { 431 fp->_r--; 432 *p++ = *fp->_p++; 433 if (--width == 0) 434 break; 435 if (fp->_r <= 0 && __srefill(fp)) 436 break; 437 } 438 *p = 0; 439 nread += p - p0; 440 nassigned++; 441 } 442 continue; 443 444 case CT_INT: 445 /* scan an integer as if by strtoq/strtouq */ 446 #ifdef hardway 447 if (width == 0 || width > sizeof(buf) - 1) 448 width = sizeof(buf) - 1; 449 #else 450 /* size_t is unsigned, hence this optimisation */ 451 if (--width > sizeof(buf) - 2) 452 width = sizeof(buf) - 2; 453 width++; 454 #endif 455 flags |= SIGNOK | NDIGITS | NZDIGITS; 456 for (p = buf; width; width--) { 457 c = *fp->_p; 458 /* 459 * Switch on the character; `goto ok' 460 * if we accept it as a part of number. 461 */ 462 switch (c) { 463 464 /* 465 * The digit 0 is always legal, but is 466 * special. For %i conversions, if no 467 * digits (zero or nonzero) have been 468 * scanned (only signs), we will have 469 * base==0. In that case, we should set 470 * it to 8 and enable 0x prefixing. 471 * Also, if we have not scanned zero digits 472 * before this, do not turn off prefixing 473 * (someone else will turn it off if we 474 * have scanned any nonzero digits). 475 */ 476 case '0': 477 if (base == 0) { 478 base = 8; 479 flags |= PFXOK; 480 } 481 if (flags & NZDIGITS) 482 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 483 else 484 flags &= ~(SIGNOK|PFXOK|NDIGITS); 485 goto ok; 486 487 /* 1 through 7 always legal */ 488 case '1': case '2': case '3': 489 case '4': case '5': case '6': case '7': 490 base = basefix[base]; 491 flags &= ~(SIGNOK | PFXOK | NDIGITS); 492 goto ok; 493 494 /* digits 8 and 9 ok iff decimal or hex */ 495 case '8': case '9': 496 base = basefix[base]; 497 if (base <= 8) 498 break; /* not legal here */ 499 flags &= ~(SIGNOK | PFXOK | NDIGITS); 500 goto ok; 501 502 /* letters ok iff hex */ 503 case 'A': case 'B': case 'C': 504 case 'D': case 'E': case 'F': 505 case 'a': case 'b': case 'c': 506 case 'd': case 'e': case 'f': 507 /* no need to fix base here */ 508 if (base <= 10) 509 break; /* not legal here */ 510 flags &= ~(SIGNOK | PFXOK | NDIGITS); 511 goto ok; 512 513 /* sign ok only as first character */ 514 case '+': case '-': 515 if (flags & SIGNOK) { 516 flags &= ~SIGNOK; 517 goto ok; 518 } 519 break; 520 521 /* x ok iff flag still set & 2nd char */ 522 case 'x': case 'X': 523 if (flags & PFXOK && p == buf + 1) { 524 base = 16; /* if %i */ 525 flags &= ~PFXOK; 526 goto ok; 527 } 528 break; 529 } 530 531 /* 532 * If we got here, c is not a legal character 533 * for a number. Stop accumulating digits. 534 */ 535 break; 536 ok: 537 /* 538 * c is legal: store it and look at the next. 539 */ 540 *p++ = c; 541 if (--fp->_r > 0) 542 fp->_p++; 543 else if (__srefill(fp)) 544 break; /* EOF */ 545 } 546 /* 547 * If we had only a sign, it is no good; push 548 * back the sign. If the number ends in `x', 549 * it was [sign] '0' 'x', so push back the x 550 * and treat it as [sign] '0'. 551 */ 552 if (flags & NDIGITS) { 553 if (p > buf) 554 (void) ungetc(*(u_char *)--p, fp); 555 goto match_failure; 556 } 557 c = ((u_char *)p)[-1]; 558 if (c == 'x' || c == 'X') { 559 --p; 560 (void) ungetc(c, fp); 561 } 562 if ((flags & SUPPRESS) == 0) { 563 u_quad_t res; 564 565 *p = 0; 566 res = (*ccfn)(buf, (char **)NULL, base); 567 if (flags & POINTER) 568 *va_arg(ap, void **) = 569 (void *)(long)res; 570 else if (flags & QUAD) 571 *va_arg(ap, quad_t *) = res; 572 else if (flags & LONG) 573 *va_arg(ap, long *) = (long)res; 574 else if (flags & SHORT) 575 *va_arg(ap, short *) = (short)res; 576 else 577 *va_arg(ap, int *) = (int)res; 578 nassigned++; 579 } 580 nread += p - buf; 581 break; 582 583 #ifdef FLOATING_POINT 584 case CT_FLOAT: 585 /* scan a floating point number as if by strtod */ 586 #ifdef hardway 587 if (width == 0 || width > sizeof(buf) - 1) 588 width = sizeof(buf) - 1; 589 #else 590 /* size_t is unsigned, hence this optimisation */ 591 if (--width > sizeof(buf) - 2) 592 width = sizeof(buf) - 2; 593 width++; 594 #endif 595 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 596 for (p = buf; width; width--) { 597 c = *fp->_p; 598 /* 599 * This code mimicks the integer conversion 600 * code, but is much simpler. 601 */ 602 switch (c) { 603 604 case '0': case '1': case '2': case '3': 605 case '4': case '5': case '6': case '7': 606 case '8': case '9': 607 flags &= ~(SIGNOK | NDIGITS); 608 goto fok; 609 610 case '+': case '-': 611 if (flags & SIGNOK) { 612 flags &= ~SIGNOK; 613 goto fok; 614 } 615 break; 616 case '.': 617 if (flags & DPTOK) { 618 flags &= ~(SIGNOK | DPTOK); 619 goto fok; 620 } 621 break; 622 case 'e': case 'E': 623 /* no exponent without some digits */ 624 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 625 flags = 626 (flags & ~(EXPOK|DPTOK)) | 627 SIGNOK | NDIGITS; 628 goto fok; 629 } 630 break; 631 } 632 break; 633 fok: 634 *p++ = c; 635 if (--fp->_r > 0) 636 fp->_p++; 637 else if (__srefill(fp)) 638 break; /* EOF */ 639 } 640 /* 641 * If no digits, might be missing exponent digits 642 * (just give back the exponent) or might be missing 643 * regular digits, but had sign and/or decimal point. 644 */ 645 if (flags & NDIGITS) { 646 if (flags & EXPOK) { 647 /* no digits at all */ 648 while (p > buf) 649 ungetc(*(u_char *)--p, fp); 650 goto match_failure; 651 } 652 /* just a bad exponent (e and maybe sign) */ 653 c = *(u_char *)--p; 654 if (c != 'e' && c != 'E') { 655 (void) ungetc(c, fp);/* sign */ 656 c = *(u_char *)--p; 657 } 658 (void) ungetc(c, fp); 659 } 660 if ((flags & SUPPRESS) == 0) { 661 double res; 662 663 *p = 0; 664 res = strtod(buf, (char **) NULL); 665 if (flags & LONGDBL) 666 *va_arg(ap, long double *) = res; 667 else if (flags & LONG) 668 *va_arg(ap, double *) = res; 669 else 670 *va_arg(ap, float *) = res; 671 nassigned++; 672 } 673 nread += p - buf; 674 break; 675 #endif /* FLOATING_POINT */ 676 } 677 } 678 input_failure: 679 FUNLOCKFILE(fp); 680 return (nassigned ? nassigned : EOF); 681 match_failure: 682 FUNLOCKFILE(fp); 683 return (nassigned); 684 } 685 686 /* 687 * Fill in the given table from the scanset at the given format 688 * (just after `['). Return a pointer to the character past the 689 * closing `]'. The table has a 1 wherever characters should be 690 * considered part of the scanset. 691 */ 692 static const u_char * 693 __sccl(tab, fmt) 694 char *tab; 695 const u_char *fmt; 696 { 697 int c, n, v; 698 699 _DIAGASSERT(tab != NULL); 700 _DIAGASSERT(fmt != NULL); 701 702 /* first `clear' the whole table */ 703 c = *fmt++; /* first char hat => negated scanset */ 704 if (c == '^') { 705 v = 1; /* default => accept */ 706 c = *fmt++; /* get new first char */ 707 } else 708 v = 0; /* default => reject */ 709 /* should probably use memset here */ 710 for (n = 0; n < 256; n++) 711 tab[n] = v; 712 if (c == 0) 713 return (fmt - 1);/* format ended before closing ] */ 714 715 /* 716 * Now set the entries corresponding to the actual scanset 717 * to the opposite of the above. 718 * 719 * The first character may be ']' (or '-') without being special; 720 * the last character may be '-'. 721 */ 722 v = 1 - v; 723 for (;;) { 724 tab[c] = v; /* take character c */ 725 doswitch: 726 n = *fmt++; /* and examine the next */ 727 switch (n) { 728 729 case 0: /* format ended too soon */ 730 return (fmt - 1); 731 732 case '-': 733 /* 734 * A scanset of the form 735 * [01+-] 736 * is defined as `the digit 0, the digit 1, 737 * the character +, the character -', but 738 * the effect of a scanset such as 739 * [a-zA-Z0-9] 740 * is implementation defined. The V7 Unix 741 * scanf treats `a-z' as `the letters a through 742 * z', but treats `a-a' as `the letter a, the 743 * character -, and the letter a'. 744 * 745 * For compatibility, the `-' is not considerd 746 * to define a range if the character following 747 * it is either a close bracket (required by ANSI) 748 * or is not numerically greater than the character 749 * we just stored in the table (c). 750 */ 751 n = *fmt; 752 if (n == ']' || n < c) { 753 c = '-'; 754 break; /* resume the for(;;) */ 755 } 756 fmt++; 757 do { /* fill in the range */ 758 tab[++c] = v; 759 } while (c < n); 760 #if 1 /* XXX another disgusting compatibility hack */ 761 /* 762 * Alas, the V7 Unix scanf also treats formats 763 * such as [a-c-e] as `the letters a through e'. 764 * This too is permitted by the standard.... 765 */ 766 goto doswitch; 767 #else 768 c = *fmt++; 769 if (c == 0) 770 return (fmt - 1); 771 if (c == ']') 772 return (fmt); 773 break; 774 #endif 775 776 case ']': /* end of scanset */ 777 return (fmt); 778 779 default: /* just another character */ 780 c = n; 781 break; 782 } 783 } 784 /* NOTREACHED */ 785 } 786