1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)vfscanf.c 5.4 (Berkeley) 02/01/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/cdefs.h> 16 #include <stdio.h> 17 #include <ctype.h> 18 #include <stdlib.h> 19 #if __STDC__ 20 #include <stdarg.h> 21 #else 22 #include <varargs.h> 23 #endif 24 #include "local.h" 25 26 #define FLOATING_POINT 27 28 #ifdef FLOATING_POINT 29 #include "floatio.h" 30 #define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */ 31 #else 32 #define BUF 40 33 #endif 34 35 /* 36 * Flags used during conversion. 37 */ 38 #define LONG 0x01 /* l: long or double */ 39 #define LONGDBL 0x02 /* L: long double; unimplemented */ 40 #define SHORT 0x04 /* h: short */ 41 #define SUPPRESS 0x08 /* suppress assignment */ 42 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 43 #define NOSKIP 0x20 /* do not skip blanks */ 44 45 /* 46 * The following are used in numeric conversions only: 47 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 48 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 49 */ 50 #define SIGNOK 0x40 /* +/- is (still) legal */ 51 #define NDIGITS 0x80 /* no digits detected */ 52 53 #define DPTOK 0x100 /* (float) decimal point is still legal */ 54 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 55 56 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 57 #define NZDIGITS 0x200 /* no zero digits detected */ 58 59 /* 60 * Conversion types. 61 */ 62 #define CT_CHAR 0 /* %c conversion */ 63 #define CT_CCL 1 /* %[...] conversion */ 64 #define CT_STRING 2 /* %s conversion */ 65 #define CT_INT 3 /* integer, i.e., strtol or strtoul */ 66 #define CT_FLOAT 4 /* floating, i.e., strtod */ 67 68 #define u_char unsigned char 69 #define u_long unsigned long 70 71 static u_char *__sccl(); 72 73 /* 74 * vfscanf 75 */ 76 __svfscanf(fp, fmt0, ap) 77 register FILE *fp; 78 char const *fmt0; 79 va_list ap; 80 { 81 register u_char *fmt = (u_char *)fmt0; 82 register int c; /* character from format, or conversion */ 83 register size_t width; /* field width, or 0 */ 84 register char *p; /* points into all kinds of strings */ 85 register int n; /* handy integer */ 86 register int flags; /* flags as defined above */ 87 register char *p0; /* saves original value of p when necessary */ 88 int nassigned; /* number of fields assigned */ 89 int nread; /* number of characters consumed from fp */ 90 int base; /* base argument to strtol/strtoul */ 91 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */ 92 char ccltab[256]; /* character class table for %[...] */ 93 char buf[BUF]; /* buffer for numeric conversions */ 94 95 /* `basefix' is used to avoid `if' tests in the integer scanner */ 96 static short basefix[17] = 97 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 98 99 nassigned = 0; 100 nread = 0; 101 for (;;) { 102 c = *fmt++; 103 if (c == 0) 104 return (nassigned); 105 if (isspace(c)) { 106 for (;;) { 107 if (fp->_r <= 0 && __srefill(fp)) 108 return (nassigned); 109 if (!isspace(*fp->_p)) 110 break; 111 nread++, fp->_r--, fp->_p++; 112 } 113 continue; 114 } 115 if (c != '%') 116 goto literal; 117 width = 0; 118 flags = 0; 119 /* 120 * switch on the format. continue if done; 121 * break once format type is derived. 122 */ 123 again: c = *fmt++; 124 switch (c) { 125 case '%': 126 literal: 127 if (fp->_r <= 0 && __srefill(fp)) 128 goto input_failure; 129 if (*fp->_p != c) 130 goto match_failure; 131 fp->_r--, fp->_p++; 132 nread++; 133 continue; 134 135 case '*': 136 flags |= SUPPRESS; 137 goto again; 138 case 'l': 139 flags |= LONG; 140 goto again; 141 case 'L': 142 flags |= LONGDBL; 143 goto again; 144 case 'h': 145 flags |= SHORT; 146 goto again; 147 148 case '0': case '1': case '2': case '3': case '4': 149 case '5': case '6': case '7': case '8': case '9': 150 width = width * 10 + c - '0'; 151 goto again; 152 153 /* 154 * Conversions. 155 * Those marked `compat' are for 4.[123]BSD compatibility. 156 * 157 * (According to ANSI, E and X formats are supposed 158 * to the same as e and x. Sorry about that.) 159 */ 160 case 'D': /* compat */ 161 flags |= LONG; 162 /* FALLTHROUGH */ 163 case 'd': 164 c = CT_INT; 165 ccfn = (u_long (*)())strtol; 166 base = 10; 167 break; 168 169 case 'i': 170 c = CT_INT; 171 ccfn = (u_long (*)())strtol; 172 base = 0; 173 break; 174 175 case 'O': /* compat */ 176 flags |= LONG; 177 /* FALLTHROUGH */ 178 case 'o': 179 c = CT_INT; 180 ccfn = strtoul; 181 base = 8; 182 break; 183 184 case 'u': 185 c = CT_INT; 186 ccfn = strtoul; 187 base = 10; 188 break; 189 190 case 'X': /* compat XXX */ 191 flags |= LONG; 192 /* FALLTHROUGH */ 193 case 'x': 194 flags |= PFXOK; /* enable 0x prefixing */ 195 c = CT_INT; 196 ccfn = strtoul; 197 base = 16; 198 break; 199 200 #ifdef FLOATING_POINT 201 case 'E': /* compat XXX */ 202 case 'F': /* compat */ 203 flags |= LONG; 204 /* FALLTHROUGH */ 205 case 'e': case 'f': case 'g': 206 c = CT_FLOAT; 207 break; 208 #endif 209 210 case 's': 211 c = CT_STRING; 212 break; 213 214 case '[': 215 fmt = __sccl(ccltab, fmt); 216 flags |= NOSKIP; 217 c = CT_CCL; 218 break; 219 220 case 'c': 221 flags |= NOSKIP; 222 c = CT_CHAR; 223 break; 224 225 case 'p': /* pointer format is like hex */ 226 flags |= POINTER | PFXOK; 227 c = CT_INT; 228 ccfn = strtoul; 229 base = 16; 230 break; 231 232 case 'n': 233 if (flags & SUPPRESS) /* ??? */ 234 continue; 235 if (flags & SHORT) 236 *va_arg(ap, short *) = nread; 237 else if (flags & LONG) 238 *va_arg(ap, long *) = nread; 239 else 240 *va_arg(ap, int *) = nread; 241 continue; 242 243 /* 244 * Disgusting backwards compatibility hacks. XXX 245 */ 246 case '\0': /* compat */ 247 return (EOF); 248 249 default: /* compat */ 250 if (isupper(c)) 251 flags |= LONG; 252 c = CT_INT; 253 ccfn = (u_long (*)())strtol; 254 base = 10; 255 break; 256 } 257 258 /* 259 * We have a conversion that requires input. 260 */ 261 if (fp->_r <= 0 && __srefill(fp)) 262 goto input_failure; 263 264 /* 265 * Consume leading white space, except for formats 266 * that suppress this. 267 */ 268 if ((flags & NOSKIP) == 0) { 269 while (isspace(*fp->_p)) { 270 nread++; 271 if (--fp->_r > 0) 272 fp->_p++; 273 else if (__srefill(fp)) 274 goto input_failure; 275 } 276 /* 277 * Note that there is at least one character in 278 * the buffer, so conversions that do not set NOSKIP 279 * ca no longer result in an input failure. 280 */ 281 } 282 283 /* 284 * Do the conversion. 285 */ 286 switch (c) { 287 288 case CT_CHAR: 289 /* scan arbitrary characters (sets NOSKIP) */ 290 if (width == 0) 291 width = 1; 292 if (flags & SUPPRESS) { 293 size_t sum = 0; 294 for (;;) { 295 if ((n = fp->_r) < width) { 296 sum += n; 297 width -= n; 298 fp->_p += n; 299 if (__srefill(fp)) { 300 if (sum == 0) 301 goto input_failure; 302 break; 303 } 304 } else { 305 sum += width; 306 fp->_r -= width; 307 fp->_p += width; 308 break; 309 } 310 } 311 nread += sum; 312 } else { 313 size_t r = fread((void *)va_arg(ap, char *), 1, 314 width, fp); 315 316 if (r == 0) 317 goto input_failure; 318 nread += r; 319 nassigned++; 320 } 321 break; 322 323 case CT_CCL: 324 /* scan a (nonempty) character class (sets NOSKIP) */ 325 if (width == 0) 326 width = ~0; /* `infinity' */ 327 /* take only those things in the class */ 328 if (flags & SUPPRESS) { 329 n = 0; 330 while (ccltab[*fp->_p]) { 331 n++, fp->_r--, fp->_p++; 332 if (--width == 0) 333 break; 334 if (fp->_r <= 0 && __srefill(fp)) { 335 if (n == 0) 336 goto input_failure; 337 break; 338 } 339 } 340 if (n == 0) 341 goto match_failure; 342 } else { 343 p0 = p = va_arg(ap, char *); 344 while (ccltab[*fp->_p]) { 345 fp->_r--; 346 *p++ = *fp->_p++; 347 if (--width == 0) 348 break; 349 if (fp->_r <= 0 && __srefill(fp)) { 350 if (p == p0) 351 goto input_failure; 352 break; 353 } 354 } 355 n = p - p0; 356 if (n == 0) 357 goto match_failure; 358 *p = 0; 359 nassigned++; 360 } 361 nread += n; 362 break; 363 364 case CT_STRING: 365 /* like CCL, but zero-length string OK, & no NOSKIP */ 366 if (width == 0) 367 width = ~0; 368 if (flags & SUPPRESS) { 369 n = 0; 370 while (!isspace(*fp->_p)) { 371 n++, fp->_r--, fp->_p++; 372 if (--width == 0) 373 break; 374 if (fp->_r <= 0 && __srefill(fp)) 375 break; 376 } 377 nread += n; 378 } else { 379 p0 = p = va_arg(ap, char *); 380 while (!isspace(*fp->_p)) { 381 fp->_r--; 382 *p++ = *fp->_p++; 383 if (--width == 0) 384 break; 385 if (fp->_r <= 0 && __srefill(fp)) 386 break; 387 } 388 *p = 0; 389 nread += p - p0; 390 nassigned++; 391 } 392 continue; 393 394 case CT_INT: 395 /* scan an integer as if by strtol/strtoul */ 396 #ifdef hardway 397 if (width == 0 || width > sizeof(buf) - 1) 398 width = sizeof(buf) - 1; 399 #else 400 /* size_t is unsigned, hence this optimisation */ 401 if (--width > sizeof(buf) - 2) 402 width = sizeof(buf) - 2; 403 width++; 404 #endif 405 flags |= SIGNOK | NDIGITS | NZDIGITS; 406 for (p = buf; width; width--) { 407 c = *fp->_p; 408 /* 409 * Switch on the character; `goto ok' 410 * if we accept it as a part of number. 411 */ 412 switch (c) { 413 414 /* 415 * The digit 0 is always legal, but is 416 * special. For %i conversions, if no 417 * digits (zero or nonzero) have been 418 * scanned (only signs), we will have 419 * base==0. In that case, we should set 420 * it to 8 and enable 0x prefixing. 421 * Also, if we have not scanned zero digits 422 * before this, do not turn off prefixing 423 * (someone else will turn it off if we 424 * have scanned any nonzero digits). 425 */ 426 case '0': 427 if (base == 0) { 428 base = 8; 429 flags |= PFXOK; 430 } 431 if (flags & NZDIGITS) 432 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 433 else 434 flags &= ~(SIGNOK|PFXOK|NDIGITS); 435 goto ok; 436 437 /* 1 through 7 always legal */ 438 case '1': case '2': case '3': 439 case '4': case '5': case '6': case '7': 440 base = basefix[base]; 441 flags &= ~(SIGNOK | PFXOK | NDIGITS); 442 goto ok; 443 444 /* digits 8 and 9 ok iff decimal or hex */ 445 case '8': case '9': 446 base = basefix[base]; 447 if (base <= 8) 448 break; /* not legal here */ 449 flags &= ~(SIGNOK | PFXOK | NDIGITS); 450 goto ok; 451 452 /* letters ok iff hex */ 453 case 'A': case 'B': case 'C': 454 case 'D': case 'E': case 'F': 455 case 'a': case 'b': case 'c': 456 case 'd': case 'e': case 'f': 457 /* no need to fix base here */ 458 if (base <= 10) 459 break; /* not legal here */ 460 flags &= ~(SIGNOK | PFXOK | NDIGITS); 461 goto ok; 462 463 /* sign ok only as first character */ 464 case '+': case '-': 465 if (flags & SIGNOK) { 466 flags &= ~SIGNOK; 467 goto ok; 468 } 469 break; 470 471 /* x ok iff flag still set & 2nd char */ 472 case 'x': case 'X': 473 if (flags & PFXOK && p == buf + 1) { 474 base = 16; /* if %i */ 475 flags &= ~PFXOK; 476 goto ok; 477 } 478 break; 479 } 480 481 /* 482 * If we got here, c is not a legal character 483 * for a number. Stop accumulating digits. 484 */ 485 break; 486 ok: 487 /* 488 * c is legal: store it and look at the next. 489 */ 490 *p++ = c; 491 if (--fp->_r > 0) 492 fp->_p++; 493 else if (__srefill(fp)) 494 break; /* EOF */ 495 } 496 /* 497 * If we had only a sign, it is no good; push 498 * back the sign. If the number ends in `x', 499 * it was [sign] '0' 'x', so push back the x 500 * and treat it as [sign] '0'. 501 */ 502 if (flags & NDIGITS) { 503 if (p > buf) 504 (void) ungetc(*(u_char *)--p, fp); 505 goto match_failure; 506 } 507 c = ((u_char *)p)[-1]; 508 if (c == 'x' || c == 'X') { 509 --p; 510 (void) ungetc(c, fp); 511 } 512 if ((flags & SUPPRESS) == 0) { 513 u_long res; 514 515 *p = 0; 516 res = (*ccfn)(buf, (char **)NULL, base); 517 if (flags & POINTER) 518 *va_arg(ap, void **) = (void *)res; 519 else if (flags & SHORT) 520 *va_arg(ap, short *) = res; 521 else if (flags & LONG) 522 *va_arg(ap, long *) = res; 523 else 524 *va_arg(ap, int *) = res; 525 nassigned++; 526 } 527 nread += p - buf; 528 break; 529 530 #ifdef FLOATING_POINT 531 case CT_FLOAT: 532 /* scan a floating point number as if by strtod */ 533 #ifdef hardway 534 if (width == 0 || width > sizeof(buf) - 1) 535 width = sizeof(buf) - 1; 536 #else 537 /* size_t is unsigned, hence this optimisation */ 538 if (--width > sizeof(buf) - 2) 539 width = sizeof(buf) - 2; 540 width++; 541 #endif 542 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 543 for (p = buf; width; width--) { 544 c = *fp->_p; 545 /* 546 * This code mimicks the integer conversion 547 * code, but is much simpler. 548 */ 549 switch (c) { 550 551 case '0': case '1': case '2': case '3': 552 case '4': case '5': case '6': case '7': 553 case '8': case '9': 554 flags &= ~(SIGNOK | NDIGITS); 555 goto fok; 556 557 case '+': case '-': 558 if (flags & SIGNOK) { 559 flags &= ~SIGNOK; 560 goto fok; 561 } 562 break; 563 case '.': 564 if (flags & DPTOK) { 565 flags &= ~(SIGNOK | DPTOK); 566 goto fok; 567 } 568 break; 569 case 'e': case 'E': 570 /* no exponent without some digits */ 571 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 572 flags = 573 (flags & ~(EXPOK|DPTOK)) | 574 SIGNOK | NDIGITS; 575 goto fok; 576 } 577 break; 578 } 579 break; 580 fok: 581 *p++ = c; 582 if (--fp->_r > 0) 583 fp->_p++; 584 else if (__srefill(fp)) 585 break; /* EOF */ 586 } 587 /* 588 * If no digits, might be missing exponent digits 589 * (just give back the exponent) or might be missing 590 * regular digits, but had sign and/or decimal point. 591 */ 592 if (flags & NDIGITS) { 593 if (flags & EXPOK) { 594 /* no digits at all */ 595 while (p > buf) 596 ungetc(*(u_char *)--p, fp); 597 goto match_failure; 598 } 599 /* just a bad exponent (e and maybe sign) */ 600 c = *(u_char *)--p; 601 if (c != 'e' && c != 'E') { 602 (void) ungetc(c, fp);/* sign */ 603 c = *(u_char *)--p; 604 } 605 (void) ungetc(c, fp); 606 } 607 if ((flags & SUPPRESS) == 0) { 608 double res; 609 610 *p = 0; 611 res = atof(buf); 612 if (flags & LONG) 613 *va_arg(ap, double *) = res; 614 else 615 *va_arg(ap, float *) = res; 616 nassigned++; 617 } 618 nread += p - buf; 619 break; 620 #endif /* FLOATING_POINT */ 621 } 622 } 623 input_failure: 624 return (nassigned ? nassigned : -1); 625 match_failure: 626 return (nassigned); 627 } 628 629 /* 630 * Fill in the given table from the scanset at the given format 631 * (just after `['). Return a pointer to the character past the 632 * closing `]'. The table has a 1 wherever characters should be 633 * considered part of the scanset. 634 */ 635 static u_char * 636 __sccl(tab, fmt) 637 register char *tab; 638 register u_char *fmt; 639 { 640 register int c, n, v; 641 642 /* first `clear' the whole table */ 643 c = *fmt++; /* first char hat => negated scanset */ 644 if (c == '^') { 645 v = 1; /* default => accept */ 646 c = *fmt++; /* get new first char */ 647 } else 648 v = 0; /* default => reject */ 649 /* should probably use memset here */ 650 for (n = 0; n < 256; n++) 651 tab[n] = v; 652 if (c == 0) 653 return (fmt - 1);/* format ended before closing ] */ 654 655 /* 656 * Now set the entries corresponding to the actual scanset 657 * to the opposite of the above. 658 * 659 * The first character may be ']' (or '-') without being special; 660 * the last character may be '-'. 661 */ 662 v = 1 - v; 663 for (;;) { 664 tab[c] = v; /* take character c */ 665 doswitch: 666 n = *fmt++; /* and examine the next */ 667 switch (n) { 668 669 case 0: /* format ended too soon */ 670 return (fmt - 1); 671 672 case '-': 673 /* 674 * A scanset of the form 675 * [01+-] 676 * is defined as `the digit 0, the digit 1, 677 * the character +, the character -', but 678 * the effect of a scanset such as 679 * [a-zA-Z0-9] 680 * is implementation defined. The V7 Unix 681 * scanf treats `a-z' as `the letters a through 682 * z', but treats `a-a' as `the letter a, the 683 * character -, and the letter a'. 684 * 685 * For compatibility, the `-' is not considerd 686 * to define a range if the character following 687 * it is either a close bracket (required by ANSI) 688 * or is not numerically greater than the character 689 * we just stored in the table (c). 690 */ 691 n = *fmt; 692 if (n == ']' || n < c) { 693 c = '-'; 694 break; /* resume the for(;;) */ 695 } 696 fmt++; 697 do { /* fill in the range */ 698 tab[++c] = v; 699 } while (c < n); 700 #if 1 /* XXX another disgusting compatibility hack */ 701 /* 702 * Alas, the V7 Unix scanf also treats formats 703 * such as [a-c-e] as `the letters a through e'. 704 * This too is permitted by the standard.... 705 */ 706 goto doswitch; 707 #else 708 c = *fmt++; 709 if (c == 0) 710 return (fmt - 1); 711 if (c == ']') 712 return (fmt); 713 #endif 714 break; 715 716 case ']': /* end of scanset */ 717 return (fmt); 718 719 default: /* just another character */ 720 c = n; 721 break; 722 } 723 } 724 /* NOTREACHED */ 725 } 726