1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)vfscanf.c 5.5 (Berkeley) 02/05/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/cdefs.h> 16 #include <stdio.h> 17 #include <ctype.h> 18 #include <stdlib.h> 19 #if __STDC__ 20 #include <stdarg.h> 21 #else 22 #include <varargs.h> 23 #endif 24 #include "local.h" 25 26 #define FLOATING_POINT 27 28 #ifdef FLOATING_POINT 29 #include "floatio.h" 30 #define BUF (MAXEXP+MAXFRACT+3) /* 3 = sign + decimal point + NUL */ 31 #else 32 #define BUF 40 33 #endif 34 35 /* 36 * Flags used during conversion. 37 */ 38 #define LONG 0x01 /* l: long or double */ 39 #define LONGDBL 0x02 /* L: long double; unimplemented */ 40 #define SHORT 0x04 /* h: short */ 41 #define SUPPRESS 0x08 /* suppress assignment */ 42 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 43 #define NOSKIP 0x20 /* do not skip blanks */ 44 45 /* 46 * The following are used in numeric conversions only: 47 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 48 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 49 */ 50 #define SIGNOK 0x40 /* +/- is (still) legal */ 51 #define NDIGITS 0x80 /* no digits detected */ 52 53 #define DPTOK 0x100 /* (float) decimal point is still legal */ 54 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 55 56 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 57 #define NZDIGITS 0x200 /* no zero digits detected */ 58 59 /* 60 * Conversion types. 61 */ 62 #define CT_CHAR 0 /* %c conversion */ 63 #define CT_CCL 1 /* %[...] conversion */ 64 #define CT_STRING 2 /* %s conversion */ 65 #define CT_INT 3 /* integer, i.e., strtol or strtoul */ 66 #define CT_FLOAT 4 /* floating, i.e., strtod */ 67 68 #define u_char unsigned char 69 #define u_long unsigned long 70 71 static u_char *__sccl(); 72 73 /* 74 * vfscanf 75 */ 76 __svfscanf(fp, fmt0, ap) 77 register FILE *fp; 78 char const *fmt0; 79 va_list ap; 80 { 81 register u_char *fmt = (u_char *)fmt0; 82 register int c; /* character from format, or conversion */ 83 register size_t width; /* field width, or 0 */ 84 register char *p; /* points into all kinds of strings */ 85 register int n; /* handy integer */ 86 register int flags; /* flags as defined above */ 87 register char *p0; /* saves original value of p when necessary */ 88 int nassigned; /* number of fields assigned */ 89 int nread; /* number of characters consumed from fp */ 90 int base; /* base argument to strtol/strtoul */ 91 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */ 92 char ccltab[256]; /* character class table for %[...] */ 93 char buf[BUF]; /* buffer for numeric conversions */ 94 95 /* `basefix' is used to avoid `if' tests in the integer scanner */ 96 static short basefix[17] = 97 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 98 99 nassigned = 0; 100 nread = 0; 101 base = 0; /* XXX just to keep gcc happy */ 102 ccfn = NULL; /* XXX just to keep gcc happy */ 103 for (;;) { 104 c = *fmt++; 105 if (c == 0) 106 return (nassigned); 107 if (isspace(c)) { 108 for (;;) { 109 if (fp->_r <= 0 && __srefill(fp)) 110 return (nassigned); 111 if (!isspace(*fp->_p)) 112 break; 113 nread++, fp->_r--, fp->_p++; 114 } 115 continue; 116 } 117 if (c != '%') 118 goto literal; 119 width = 0; 120 flags = 0; 121 /* 122 * switch on the format. continue if done; 123 * break once format type is derived. 124 */ 125 again: c = *fmt++; 126 switch (c) { 127 case '%': 128 literal: 129 if (fp->_r <= 0 && __srefill(fp)) 130 goto input_failure; 131 if (*fp->_p != c) 132 goto match_failure; 133 fp->_r--, fp->_p++; 134 nread++; 135 continue; 136 137 case '*': 138 flags |= SUPPRESS; 139 goto again; 140 case 'l': 141 flags |= LONG; 142 goto again; 143 case 'L': 144 flags |= LONGDBL; 145 goto again; 146 case 'h': 147 flags |= SHORT; 148 goto again; 149 150 case '0': case '1': case '2': case '3': case '4': 151 case '5': case '6': case '7': case '8': case '9': 152 width = width * 10 + c - '0'; 153 goto again; 154 155 /* 156 * Conversions. 157 * Those marked `compat' are for 4.[123]BSD compatibility. 158 * 159 * (According to ANSI, E and X formats are supposed 160 * to the same as e and x. Sorry about that.) 161 */ 162 case 'D': /* compat */ 163 flags |= LONG; 164 /* FALLTHROUGH */ 165 case 'd': 166 c = CT_INT; 167 ccfn = (u_long (*)())strtol; 168 base = 10; 169 break; 170 171 case 'i': 172 c = CT_INT; 173 ccfn = (u_long (*)())strtol; 174 base = 0; 175 break; 176 177 case 'O': /* compat */ 178 flags |= LONG; 179 /* FALLTHROUGH */ 180 case 'o': 181 c = CT_INT; 182 ccfn = strtoul; 183 base = 8; 184 break; 185 186 case 'u': 187 c = CT_INT; 188 ccfn = strtoul; 189 base = 10; 190 break; 191 192 case 'X': /* compat XXX */ 193 flags |= LONG; 194 /* FALLTHROUGH */ 195 case 'x': 196 flags |= PFXOK; /* enable 0x prefixing */ 197 c = CT_INT; 198 ccfn = strtoul; 199 base = 16; 200 break; 201 202 #ifdef FLOATING_POINT 203 case 'E': /* compat XXX */ 204 case 'F': /* compat */ 205 flags |= LONG; 206 /* FALLTHROUGH */ 207 case 'e': case 'f': case 'g': 208 c = CT_FLOAT; 209 break; 210 #endif 211 212 case 's': 213 c = CT_STRING; 214 break; 215 216 case '[': 217 fmt = __sccl(ccltab, fmt); 218 flags |= NOSKIP; 219 c = CT_CCL; 220 break; 221 222 case 'c': 223 flags |= NOSKIP; 224 c = CT_CHAR; 225 break; 226 227 case 'p': /* pointer format is like hex */ 228 flags |= POINTER | PFXOK; 229 c = CT_INT; 230 ccfn = strtoul; 231 base = 16; 232 break; 233 234 case 'n': 235 if (flags & SUPPRESS) /* ??? */ 236 continue; 237 if (flags & SHORT) 238 *va_arg(ap, short *) = nread; 239 else if (flags & LONG) 240 *va_arg(ap, long *) = nread; 241 else 242 *va_arg(ap, int *) = nread; 243 continue; 244 245 /* 246 * Disgusting backwards compatibility hacks. XXX 247 */ 248 case '\0': /* compat */ 249 return (EOF); 250 251 default: /* compat */ 252 if (isupper(c)) 253 flags |= LONG; 254 c = CT_INT; 255 ccfn = (u_long (*)())strtol; 256 base = 10; 257 break; 258 } 259 260 /* 261 * We have a conversion that requires input. 262 */ 263 if (fp->_r <= 0 && __srefill(fp)) 264 goto input_failure; 265 266 /* 267 * Consume leading white space, except for formats 268 * that suppress this. 269 */ 270 if ((flags & NOSKIP) == 0) { 271 while (isspace(*fp->_p)) { 272 nread++; 273 if (--fp->_r > 0) 274 fp->_p++; 275 else if (__srefill(fp)) 276 goto input_failure; 277 } 278 /* 279 * Note that there is at least one character in 280 * the buffer, so conversions that do not set NOSKIP 281 * ca no longer result in an input failure. 282 */ 283 } 284 285 /* 286 * Do the conversion. 287 */ 288 switch (c) { 289 290 case CT_CHAR: 291 /* scan arbitrary characters (sets NOSKIP) */ 292 if (width == 0) 293 width = 1; 294 if (flags & SUPPRESS) { 295 size_t sum = 0; 296 for (;;) { 297 if ((n = fp->_r) < width) { 298 sum += n; 299 width -= n; 300 fp->_p += n; 301 if (__srefill(fp)) { 302 if (sum == 0) 303 goto input_failure; 304 break; 305 } 306 } else { 307 sum += width; 308 fp->_r -= width; 309 fp->_p += width; 310 break; 311 } 312 } 313 nread += sum; 314 } else { 315 size_t r = fread((void *)va_arg(ap, char *), 1, 316 width, fp); 317 318 if (r == 0) 319 goto input_failure; 320 nread += r; 321 nassigned++; 322 } 323 break; 324 325 case CT_CCL: 326 /* scan a (nonempty) character class (sets NOSKIP) */ 327 if (width == 0) 328 width = ~0; /* `infinity' */ 329 /* take only those things in the class */ 330 if (flags & SUPPRESS) { 331 n = 0; 332 while (ccltab[*fp->_p]) { 333 n++, fp->_r--, fp->_p++; 334 if (--width == 0) 335 break; 336 if (fp->_r <= 0 && __srefill(fp)) { 337 if (n == 0) 338 goto input_failure; 339 break; 340 } 341 } 342 if (n == 0) 343 goto match_failure; 344 } else { 345 p0 = p = va_arg(ap, char *); 346 while (ccltab[*fp->_p]) { 347 fp->_r--; 348 *p++ = *fp->_p++; 349 if (--width == 0) 350 break; 351 if (fp->_r <= 0 && __srefill(fp)) { 352 if (p == p0) 353 goto input_failure; 354 break; 355 } 356 } 357 n = p - p0; 358 if (n == 0) 359 goto match_failure; 360 *p = 0; 361 nassigned++; 362 } 363 nread += n; 364 break; 365 366 case CT_STRING: 367 /* like CCL, but zero-length string OK, & no NOSKIP */ 368 if (width == 0) 369 width = ~0; 370 if (flags & SUPPRESS) { 371 n = 0; 372 while (!isspace(*fp->_p)) { 373 n++, fp->_r--, fp->_p++; 374 if (--width == 0) 375 break; 376 if (fp->_r <= 0 && __srefill(fp)) 377 break; 378 } 379 nread += n; 380 } else { 381 p0 = p = va_arg(ap, char *); 382 while (!isspace(*fp->_p)) { 383 fp->_r--; 384 *p++ = *fp->_p++; 385 if (--width == 0) 386 break; 387 if (fp->_r <= 0 && __srefill(fp)) 388 break; 389 } 390 *p = 0; 391 nread += p - p0; 392 nassigned++; 393 } 394 continue; 395 396 case CT_INT: 397 /* scan an integer as if by strtol/strtoul */ 398 #ifdef hardway 399 if (width == 0 || width > sizeof(buf) - 1) 400 width = sizeof(buf) - 1; 401 #else 402 /* size_t is unsigned, hence this optimisation */ 403 if (--width > sizeof(buf) - 2) 404 width = sizeof(buf) - 2; 405 width++; 406 #endif 407 flags |= SIGNOK | NDIGITS | NZDIGITS; 408 for (p = buf; width; width--) { 409 c = *fp->_p; 410 /* 411 * Switch on the character; `goto ok' 412 * if we accept it as a part of number. 413 */ 414 switch (c) { 415 416 /* 417 * The digit 0 is always legal, but is 418 * special. For %i conversions, if no 419 * digits (zero or nonzero) have been 420 * scanned (only signs), we will have 421 * base==0. In that case, we should set 422 * it to 8 and enable 0x prefixing. 423 * Also, if we have not scanned zero digits 424 * before this, do not turn off prefixing 425 * (someone else will turn it off if we 426 * have scanned any nonzero digits). 427 */ 428 case '0': 429 if (base == 0) { 430 base = 8; 431 flags |= PFXOK; 432 } 433 if (flags & NZDIGITS) 434 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 435 else 436 flags &= ~(SIGNOK|PFXOK|NDIGITS); 437 goto ok; 438 439 /* 1 through 7 always legal */ 440 case '1': case '2': case '3': 441 case '4': case '5': case '6': case '7': 442 base = basefix[base]; 443 flags &= ~(SIGNOK | PFXOK | NDIGITS); 444 goto ok; 445 446 /* digits 8 and 9 ok iff decimal or hex */ 447 case '8': case '9': 448 base = basefix[base]; 449 if (base <= 8) 450 break; /* not legal here */ 451 flags &= ~(SIGNOK | PFXOK | NDIGITS); 452 goto ok; 453 454 /* letters ok iff hex */ 455 case 'A': case 'B': case 'C': 456 case 'D': case 'E': case 'F': 457 case 'a': case 'b': case 'c': 458 case 'd': case 'e': case 'f': 459 /* no need to fix base here */ 460 if (base <= 10) 461 break; /* not legal here */ 462 flags &= ~(SIGNOK | PFXOK | NDIGITS); 463 goto ok; 464 465 /* sign ok only as first character */ 466 case '+': case '-': 467 if (flags & SIGNOK) { 468 flags &= ~SIGNOK; 469 goto ok; 470 } 471 break; 472 473 /* x ok iff flag still set & 2nd char */ 474 case 'x': case 'X': 475 if (flags & PFXOK && p == buf + 1) { 476 base = 16; /* if %i */ 477 flags &= ~PFXOK; 478 goto ok; 479 } 480 break; 481 } 482 483 /* 484 * If we got here, c is not a legal character 485 * for a number. Stop accumulating digits. 486 */ 487 break; 488 ok: 489 /* 490 * c is legal: store it and look at the next. 491 */ 492 *p++ = c; 493 if (--fp->_r > 0) 494 fp->_p++; 495 else if (__srefill(fp)) 496 break; /* EOF */ 497 } 498 /* 499 * If we had only a sign, it is no good; push 500 * back the sign. If the number ends in `x', 501 * it was [sign] '0' 'x', so push back the x 502 * and treat it as [sign] '0'. 503 */ 504 if (flags & NDIGITS) { 505 if (p > buf) 506 (void) ungetc(*(u_char *)--p, fp); 507 goto match_failure; 508 } 509 c = ((u_char *)p)[-1]; 510 if (c == 'x' || c == 'X') { 511 --p; 512 (void) ungetc(c, fp); 513 } 514 if ((flags & SUPPRESS) == 0) { 515 u_long res; 516 517 *p = 0; 518 res = (*ccfn)(buf, (char **)NULL, base); 519 if (flags & POINTER) 520 *va_arg(ap, void **) = (void *)res; 521 else if (flags & SHORT) 522 *va_arg(ap, short *) = res; 523 else if (flags & LONG) 524 *va_arg(ap, long *) = res; 525 else 526 *va_arg(ap, int *) = res; 527 nassigned++; 528 } 529 nread += p - buf; 530 break; 531 532 #ifdef FLOATING_POINT 533 case CT_FLOAT: 534 /* scan a floating point number as if by strtod */ 535 #ifdef hardway 536 if (width == 0 || width > sizeof(buf) - 1) 537 width = sizeof(buf) - 1; 538 #else 539 /* size_t is unsigned, hence this optimisation */ 540 if (--width > sizeof(buf) - 2) 541 width = sizeof(buf) - 2; 542 width++; 543 #endif 544 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 545 for (p = buf; width; width--) { 546 c = *fp->_p; 547 /* 548 * This code mimicks the integer conversion 549 * code, but is much simpler. 550 */ 551 switch (c) { 552 553 case '0': case '1': case '2': case '3': 554 case '4': case '5': case '6': case '7': 555 case '8': case '9': 556 flags &= ~(SIGNOK | NDIGITS); 557 goto fok; 558 559 case '+': case '-': 560 if (flags & SIGNOK) { 561 flags &= ~SIGNOK; 562 goto fok; 563 } 564 break; 565 case '.': 566 if (flags & DPTOK) { 567 flags &= ~(SIGNOK | DPTOK); 568 goto fok; 569 } 570 break; 571 case 'e': case 'E': 572 /* no exponent without some digits */ 573 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 574 flags = 575 (flags & ~(EXPOK|DPTOK)) | 576 SIGNOK | NDIGITS; 577 goto fok; 578 } 579 break; 580 } 581 break; 582 fok: 583 *p++ = c; 584 if (--fp->_r > 0) 585 fp->_p++; 586 else if (__srefill(fp)) 587 break; /* EOF */ 588 } 589 /* 590 * If no digits, might be missing exponent digits 591 * (just give back the exponent) or might be missing 592 * regular digits, but had sign and/or decimal point. 593 */ 594 if (flags & NDIGITS) { 595 if (flags & EXPOK) { 596 /* no digits at all */ 597 while (p > buf) 598 ungetc(*(u_char *)--p, fp); 599 goto match_failure; 600 } 601 /* just a bad exponent (e and maybe sign) */ 602 c = *(u_char *)--p; 603 if (c != 'e' && c != 'E') { 604 (void) ungetc(c, fp);/* sign */ 605 c = *(u_char *)--p; 606 } 607 (void) ungetc(c, fp); 608 } 609 if ((flags & SUPPRESS) == 0) { 610 double res; 611 612 *p = 0; 613 res = atof(buf); 614 if (flags & LONG) 615 *va_arg(ap, double *) = res; 616 else 617 *va_arg(ap, float *) = res; 618 nassigned++; 619 } 620 nread += p - buf; 621 break; 622 #endif /* FLOATING_POINT */ 623 } 624 } 625 input_failure: 626 return (nassigned ? nassigned : -1); 627 match_failure: 628 return (nassigned); 629 } 630 631 /* 632 * Fill in the given table from the scanset at the given format 633 * (just after `['). Return a pointer to the character past the 634 * closing `]'. The table has a 1 wherever characters should be 635 * considered part of the scanset. 636 */ 637 static u_char * 638 __sccl(tab, fmt) 639 register char *tab; 640 register u_char *fmt; 641 { 642 register int c, n, v; 643 644 /* first `clear' the whole table */ 645 c = *fmt++; /* first char hat => negated scanset */ 646 if (c == '^') { 647 v = 1; /* default => accept */ 648 c = *fmt++; /* get new first char */ 649 } else 650 v = 0; /* default => reject */ 651 /* should probably use memset here */ 652 for (n = 0; n < 256; n++) 653 tab[n] = v; 654 if (c == 0) 655 return (fmt - 1);/* format ended before closing ] */ 656 657 /* 658 * Now set the entries corresponding to the actual scanset 659 * to the opposite of the above. 660 * 661 * The first character may be ']' (or '-') without being special; 662 * the last character may be '-'. 663 */ 664 v = 1 - v; 665 for (;;) { 666 tab[c] = v; /* take character c */ 667 doswitch: 668 n = *fmt++; /* and examine the next */ 669 switch (n) { 670 671 case 0: /* format ended too soon */ 672 return (fmt - 1); 673 674 case '-': 675 /* 676 * A scanset of the form 677 * [01+-] 678 * is defined as `the digit 0, the digit 1, 679 * the character +, the character -', but 680 * the effect of a scanset such as 681 * [a-zA-Z0-9] 682 * is implementation defined. The V7 Unix 683 * scanf treats `a-z' as `the letters a through 684 * z', but treats `a-a' as `the letter a, the 685 * character -, and the letter a'. 686 * 687 * For compatibility, the `-' is not considerd 688 * to define a range if the character following 689 * it is either a close bracket (required by ANSI) 690 * or is not numerically greater than the character 691 * we just stored in the table (c). 692 */ 693 n = *fmt; 694 if (n == ']' || n < c) { 695 c = '-'; 696 break; /* resume the for(;;) */ 697 } 698 fmt++; 699 do { /* fill in the range */ 700 tab[++c] = v; 701 } while (c < n); 702 #if 1 /* XXX another disgusting compatibility hack */ 703 /* 704 * Alas, the V7 Unix scanf also treats formats 705 * such as [a-c-e] as `the letters a through e'. 706 * This too is permitted by the standard.... 707 */ 708 goto doswitch; 709 #else 710 c = *fmt++; 711 if (c == 0) 712 return (fmt - 1); 713 if (c == ']') 714 return (fmt); 715 #endif 716 break; 717 718 case ']': /* end of scanset */ 719 return (fmt); 720 721 default: /* just another character */ 722 c = n; 723 break; 724 } 725 } 726 /* NOTREACHED */ 727 } 728