1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 /*static char *sccsid = "from: @(#)vfscanf.c 5.7 (Berkeley) 12/14/92";*/ 39 static char *rcsid = "$Id: vfscanf.c,v 1.8 1994/09/19 04:43:05 mycroft Exp $"; 40 #endif /* LIBC_SCCS and not lint */ 41 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <ctype.h> 45 #if __STDC__ 46 #include <stdarg.h> 47 #else 48 #include <varargs.h> 49 #endif 50 #include "local.h" 51 52 #ifdef FLOATING_POINT 53 #include "floatio.h" 54 #endif 55 56 #define BUF 513 /* Maximum length of numeric string. */ 57 58 /* 59 * Flags used during conversion. 60 */ 61 #define LONG 0x01 /* l: long or double */ 62 #define LONGDBL 0x02 /* L: long double; unimplemented */ 63 #define SHORT 0x04 /* h: short */ 64 #define SUPPRESS 0x08 /* suppress assignment */ 65 #define POINTER 0x10 /* weird %p pointer (`fake hex') */ 66 #define NOSKIP 0x20 /* do not skip blanks */ 67 68 /* 69 * The following are used in numeric conversions only: 70 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 71 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 72 */ 73 #define SIGNOK 0x40 /* +/- is (still) legal */ 74 #define NDIGITS 0x80 /* no digits detected */ 75 76 #define DPTOK 0x100 /* (float) decimal point is still legal */ 77 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 78 79 #define PFXOK 0x100 /* 0x prefix is (still) legal */ 80 #define NZDIGITS 0x200 /* no zero digits detected */ 81 82 /* 83 * Conversion types. 84 */ 85 #define CT_CHAR 0 /* %c conversion */ 86 #define CT_CCL 1 /* %[...] conversion */ 87 #define CT_STRING 2 /* %s conversion */ 88 #define CT_INT 3 /* integer, i.e., strtol or strtoul */ 89 #define CT_FLOAT 4 /* floating, i.e., strtod */ 90 91 #define u_char unsigned char 92 #define u_long unsigned long 93 94 static u_char *__sccl(); 95 96 /* 97 * vfscanf 98 */ 99 __svfscanf(fp, fmt0, ap) 100 register FILE *fp; 101 char const *fmt0; 102 _BSD_VA_LIST_ ap; 103 { 104 register u_char *fmt = (u_char *)fmt0; 105 register int c; /* character from format, or conversion */ 106 register size_t width; /* field width, or 0 */ 107 register char *p; /* points into all kinds of strings */ 108 register int n; /* handy integer */ 109 register int flags; /* flags as defined above */ 110 register char *p0; /* saves original value of p when necessary */ 111 int nassigned; /* number of fields assigned */ 112 int nread; /* number of characters consumed from fp */ 113 int base; /* base argument to strtol/strtoul */ 114 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */ 115 char ccltab[256]; /* character class table for %[...] */ 116 char buf[BUF]; /* buffer for numeric conversions */ 117 118 /* `basefix' is used to avoid `if' tests in the integer scanner */ 119 static short basefix[17] = 120 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 121 122 nassigned = 0; 123 nread = 0; 124 base = 0; /* XXX just to keep gcc happy */ 125 ccfn = NULL; /* XXX just to keep gcc happy */ 126 for (;;) { 127 c = *fmt++; 128 if (c == 0) 129 return (nassigned); 130 if (isspace(c)) { 131 for (;;) { 132 if (fp->_r <= 0 && __srefill(fp)) 133 return (nassigned); 134 if (!isspace(*fp->_p)) 135 break; 136 nread++, fp->_r--, fp->_p++; 137 } 138 continue; 139 } 140 if (c != '%') 141 goto literal; 142 width = 0; 143 flags = 0; 144 /* 145 * switch on the format. continue if done; 146 * break once format type is derived. 147 */ 148 again: c = *fmt++; 149 switch (c) { 150 case '%': 151 literal: 152 if (fp->_r <= 0 && __srefill(fp)) 153 goto input_failure; 154 if (*fp->_p != c) 155 goto match_failure; 156 fp->_r--, fp->_p++; 157 nread++; 158 continue; 159 160 case '*': 161 flags |= SUPPRESS; 162 goto again; 163 case 'l': 164 flags |= LONG; 165 goto again; 166 case 'L': 167 flags |= LONGDBL; 168 goto again; 169 case 'h': 170 flags |= SHORT; 171 goto again; 172 173 case '0': case '1': case '2': case '3': case '4': 174 case '5': case '6': case '7': case '8': case '9': 175 width = width * 10 + c - '0'; 176 goto again; 177 178 /* 179 * Conversions. 180 * Those marked `compat' are for 4.[123]BSD compatibility. 181 * 182 * (According to ANSI, E and X formats are supposed 183 * to the same as e and x. Sorry about that.) 184 */ 185 case 'D': /* compat */ 186 flags |= LONG; 187 /* FALLTHROUGH */ 188 case 'd': 189 c = CT_INT; 190 ccfn = (u_long (*)())strtol; 191 base = 10; 192 break; 193 194 case 'i': 195 c = CT_INT; 196 ccfn = (u_long (*)())strtol; 197 base = 0; 198 break; 199 200 case 'O': /* compat */ 201 flags |= LONG; 202 /* FALLTHROUGH */ 203 case 'o': 204 c = CT_INT; 205 ccfn = strtoul; 206 base = 8; 207 break; 208 209 case 'u': 210 c = CT_INT; 211 ccfn = strtoul; 212 base = 10; 213 break; 214 215 case 'X': /* compat XXX */ 216 flags |= LONG; 217 /* FALLTHROUGH */ 218 case 'x': 219 flags |= PFXOK; /* enable 0x prefixing */ 220 c = CT_INT; 221 ccfn = strtoul; 222 base = 16; 223 break; 224 225 #ifdef FLOATING_POINT 226 case 'E': /* compat XXX */ 227 case 'F': /* compat */ 228 flags |= LONG; 229 /* FALLTHROUGH */ 230 case 'e': case 'f': case 'g': 231 c = CT_FLOAT; 232 break; 233 #endif 234 235 case 's': 236 c = CT_STRING; 237 break; 238 239 case '[': 240 fmt = __sccl(ccltab, fmt); 241 flags |= NOSKIP; 242 c = CT_CCL; 243 break; 244 245 case 'c': 246 flags |= NOSKIP; 247 c = CT_CHAR; 248 break; 249 250 case 'p': /* pointer format is like hex */ 251 flags |= POINTER | PFXOK; 252 c = CT_INT; 253 ccfn = strtoul; 254 base = 16; 255 break; 256 257 case 'n': 258 if (flags & SUPPRESS) /* ??? */ 259 continue; 260 if (flags & SHORT) 261 *va_arg(ap, short *) = nread; 262 else if (flags & LONG) 263 *va_arg(ap, long *) = nread; 264 else 265 *va_arg(ap, int *) = nread; 266 continue; 267 268 /* 269 * Disgusting backwards compatibility hacks. XXX 270 */ 271 case '\0': /* compat */ 272 return (EOF); 273 274 default: /* compat */ 275 if (isupper(c)) 276 flags |= LONG; 277 c = CT_INT; 278 ccfn = (u_long (*)())strtol; 279 base = 10; 280 break; 281 } 282 283 /* 284 * We have a conversion that requires input. 285 */ 286 if (fp->_r <= 0 && __srefill(fp)) 287 goto input_failure; 288 289 /* 290 * Consume leading white space, except for formats 291 * that suppress this. 292 */ 293 if ((flags & NOSKIP) == 0) { 294 while (isspace(*fp->_p)) { 295 nread++; 296 if (--fp->_r > 0) 297 fp->_p++; 298 else if (__srefill(fp)) 299 goto input_failure; 300 } 301 /* 302 * Note that there is at least one character in 303 * the buffer, so conversions that do not set NOSKIP 304 * ca no longer result in an input failure. 305 */ 306 } 307 308 /* 309 * Do the conversion. 310 */ 311 switch (c) { 312 313 case CT_CHAR: 314 /* scan arbitrary characters (sets NOSKIP) */ 315 if (width == 0) 316 width = 1; 317 if (flags & SUPPRESS) { 318 size_t sum = 0; 319 for (;;) { 320 if ((n = fp->_r) < width) { 321 sum += n; 322 width -= n; 323 fp->_p += n; 324 if (__srefill(fp)) { 325 if (sum == 0) 326 goto input_failure; 327 break; 328 } 329 } else { 330 sum += width; 331 fp->_r -= width; 332 fp->_p += width; 333 break; 334 } 335 } 336 nread += sum; 337 } else { 338 size_t r = fread((void *)va_arg(ap, char *), 1, 339 width, fp); 340 341 if (r == 0) 342 goto input_failure; 343 nread += r; 344 nassigned++; 345 } 346 break; 347 348 case CT_CCL: 349 /* scan a (nonempty) character class (sets NOSKIP) */ 350 if (width == 0) 351 width = ~0; /* `infinity' */ 352 /* take only those things in the class */ 353 if (flags & SUPPRESS) { 354 n = 0; 355 while (ccltab[*fp->_p]) { 356 n++, fp->_r--, fp->_p++; 357 if (--width == 0) 358 break; 359 if (fp->_r <= 0 && __srefill(fp)) { 360 if (n == 0) 361 goto input_failure; 362 break; 363 } 364 } 365 if (n == 0) 366 goto match_failure; 367 } else { 368 p0 = p = va_arg(ap, char *); 369 while (ccltab[*fp->_p]) { 370 fp->_r--; 371 *p++ = *fp->_p++; 372 if (--width == 0) 373 break; 374 if (fp->_r <= 0 && __srefill(fp)) { 375 if (p == p0) 376 goto input_failure; 377 break; 378 } 379 } 380 n = p - p0; 381 if (n == 0) 382 goto match_failure; 383 *p = 0; 384 nassigned++; 385 } 386 nread += n; 387 break; 388 389 case CT_STRING: 390 /* like CCL, but zero-length string OK, & no NOSKIP */ 391 if (width == 0) 392 width = ~0; 393 if (flags & SUPPRESS) { 394 n = 0; 395 while (!isspace(*fp->_p)) { 396 n++, fp->_r--, fp->_p++; 397 if (--width == 0) 398 break; 399 if (fp->_r <= 0 && __srefill(fp)) 400 break; 401 } 402 nread += n; 403 } else { 404 p0 = p = va_arg(ap, char *); 405 while (!isspace(*fp->_p)) { 406 fp->_r--; 407 *p++ = *fp->_p++; 408 if (--width == 0) 409 break; 410 if (fp->_r <= 0 && __srefill(fp)) 411 break; 412 } 413 *p = 0; 414 nread += p - p0; 415 nassigned++; 416 } 417 continue; 418 419 case CT_INT: 420 /* scan an integer as if by strtol/strtoul */ 421 #ifdef hardway 422 if (width == 0 || width > sizeof(buf) - 1) 423 width = sizeof(buf) - 1; 424 #else 425 /* size_t is unsigned, hence this optimisation */ 426 if (--width > sizeof(buf) - 2) 427 width = sizeof(buf) - 2; 428 width++; 429 #endif 430 flags |= SIGNOK | NDIGITS | NZDIGITS; 431 for (p = buf; width; width--) { 432 c = *fp->_p; 433 /* 434 * Switch on the character; `goto ok' 435 * if we accept it as a part of number. 436 */ 437 switch (c) { 438 439 /* 440 * The digit 0 is always legal, but is 441 * special. For %i conversions, if no 442 * digits (zero or nonzero) have been 443 * scanned (only signs), we will have 444 * base==0. In that case, we should set 445 * it to 8 and enable 0x prefixing. 446 * Also, if we have not scanned zero digits 447 * before this, do not turn off prefixing 448 * (someone else will turn it off if we 449 * have scanned any nonzero digits). 450 */ 451 case '0': 452 if (base == 0) { 453 base = 8; 454 flags |= PFXOK; 455 } 456 if (flags & NZDIGITS) 457 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 458 else 459 flags &= ~(SIGNOK|PFXOK|NDIGITS); 460 goto ok; 461 462 /* 1 through 7 always legal */ 463 case '1': case '2': case '3': 464 case '4': case '5': case '6': case '7': 465 base = basefix[base]; 466 flags &= ~(SIGNOK | PFXOK | NDIGITS); 467 goto ok; 468 469 /* digits 8 and 9 ok iff decimal or hex */ 470 case '8': case '9': 471 base = basefix[base]; 472 if (base <= 8) 473 break; /* not legal here */ 474 flags &= ~(SIGNOK | PFXOK | NDIGITS); 475 goto ok; 476 477 /* letters ok iff hex */ 478 case 'A': case 'B': case 'C': 479 case 'D': case 'E': case 'F': 480 case 'a': case 'b': case 'c': 481 case 'd': case 'e': case 'f': 482 /* no need to fix base here */ 483 if (base <= 10) 484 break; /* not legal here */ 485 flags &= ~(SIGNOK | PFXOK | NDIGITS); 486 goto ok; 487 488 /* sign ok only as first character */ 489 case '+': case '-': 490 if (flags & SIGNOK) { 491 flags &= ~SIGNOK; 492 goto ok; 493 } 494 break; 495 496 /* x ok iff flag still set & 2nd char */ 497 case 'x': case 'X': 498 if (flags & PFXOK && p == buf + 1) { 499 base = 16; /* if %i */ 500 flags &= ~PFXOK; 501 goto ok; 502 } 503 break; 504 } 505 506 /* 507 * If we got here, c is not a legal character 508 * for a number. Stop accumulating digits. 509 */ 510 break; 511 ok: 512 /* 513 * c is legal: store it and look at the next. 514 */ 515 *p++ = c; 516 if (--fp->_r > 0) 517 fp->_p++; 518 else if (__srefill(fp)) 519 break; /* EOF */ 520 } 521 /* 522 * If we had only a sign, it is no good; push 523 * back the sign. If the number ends in `x', 524 * it was [sign] '0' 'x', so push back the x 525 * and treat it as [sign] '0'. 526 */ 527 if (flags & NDIGITS) { 528 if (p > buf) 529 (void) ungetc(*(u_char *)--p, fp); 530 goto match_failure; 531 } 532 c = ((u_char *)p)[-1]; 533 if (c == 'x' || c == 'X') { 534 --p; 535 (void) ungetc(c, fp); 536 } 537 if ((flags & SUPPRESS) == 0) { 538 u_long res; 539 540 *p = 0; 541 res = (*ccfn)(buf, (char **)NULL, base); 542 if (flags & POINTER) 543 *va_arg(ap, void **) = (void *)res; 544 else if (flags & SHORT) 545 *va_arg(ap, short *) = res; 546 else if (flags & LONG) 547 *va_arg(ap, long *) = res; 548 else 549 *va_arg(ap, int *) = res; 550 nassigned++; 551 } 552 nread += p - buf; 553 break; 554 555 #ifdef FLOATING_POINT 556 case CT_FLOAT: 557 /* scan a floating point number as if by strtod */ 558 #ifdef hardway 559 if (width == 0 || width > sizeof(buf) - 1) 560 width = sizeof(buf) - 1; 561 #else 562 /* size_t is unsigned, hence this optimisation */ 563 if (--width > sizeof(buf) - 2) 564 width = sizeof(buf) - 2; 565 width++; 566 #endif 567 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 568 for (p = buf; width; width--) { 569 c = *fp->_p; 570 /* 571 * This code mimicks the integer conversion 572 * code, but is much simpler. 573 */ 574 switch (c) { 575 576 case '0': case '1': case '2': case '3': 577 case '4': case '5': case '6': case '7': 578 case '8': case '9': 579 flags &= ~(SIGNOK | NDIGITS); 580 goto fok; 581 582 case '+': case '-': 583 if (flags & SIGNOK) { 584 flags &= ~SIGNOK; 585 goto fok; 586 } 587 break; 588 case '.': 589 if (flags & DPTOK) { 590 flags &= ~(SIGNOK | DPTOK); 591 goto fok; 592 } 593 break; 594 case 'e': case 'E': 595 /* no exponent without some digits */ 596 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 597 flags = 598 (flags & ~(EXPOK|DPTOK)) | 599 SIGNOK | NDIGITS; 600 goto fok; 601 } 602 break; 603 } 604 break; 605 fok: 606 *p++ = c; 607 if (--fp->_r > 0) 608 fp->_p++; 609 else if (__srefill(fp)) 610 break; /* EOF */ 611 } 612 /* 613 * If no digits, might be missing exponent digits 614 * (just give back the exponent) or might be missing 615 * regular digits, but had sign and/or decimal point. 616 */ 617 if (flags & NDIGITS) { 618 if (flags & EXPOK) { 619 /* no digits at all */ 620 while (p > buf) 621 ungetc(*(u_char *)--p, fp); 622 goto match_failure; 623 } 624 /* just a bad exponent (e and maybe sign) */ 625 c = *(u_char *)--p; 626 if (c != 'e' && c != 'E') { 627 (void) ungetc(c, fp);/* sign */ 628 c = *(u_char *)--p; 629 } 630 (void) ungetc(c, fp); 631 } 632 if ((flags & SUPPRESS) == 0) { 633 double res; 634 635 *p = 0; 636 res = strtod(buf, (char **) NULL); 637 if (flags & LONG) 638 *va_arg(ap, double *) = res; 639 else 640 *va_arg(ap, float *) = res; 641 nassigned++; 642 } 643 nread += p - buf; 644 break; 645 #endif /* FLOATING_POINT */ 646 } 647 } 648 input_failure: 649 return (nassigned ? nassigned : -1); 650 match_failure: 651 return (nassigned); 652 } 653 654 /* 655 * Fill in the given table from the scanset at the given format 656 * (just after `['). Return a pointer to the character past the 657 * closing `]'. The table has a 1 wherever characters should be 658 * considered part of the scanset. 659 */ 660 static u_char * 661 __sccl(tab, fmt) 662 register char *tab; 663 register u_char *fmt; 664 { 665 register int c, n, v; 666 667 /* first `clear' the whole table */ 668 c = *fmt++; /* first char hat => negated scanset */ 669 if (c == '^') { 670 v = 1; /* default => accept */ 671 c = *fmt++; /* get new first char */ 672 } else 673 v = 0; /* default => reject */ 674 /* should probably use memset here */ 675 for (n = 0; n < 256; n++) 676 tab[n] = v; 677 if (c == 0) 678 return (fmt - 1);/* format ended before closing ] */ 679 680 /* 681 * Now set the entries corresponding to the actual scanset 682 * to the opposite of the above. 683 * 684 * The first character may be ']' (or '-') without being special; 685 * the last character may be '-'. 686 */ 687 v = 1 - v; 688 for (;;) { 689 tab[c] = v; /* take character c */ 690 doswitch: 691 n = *fmt++; /* and examine the next */ 692 switch (n) { 693 694 case 0: /* format ended too soon */ 695 return (fmt - 1); 696 697 case '-': 698 /* 699 * A scanset of the form 700 * [01+-] 701 * is defined as `the digit 0, the digit 1, 702 * the character +, the character -', but 703 * the effect of a scanset such as 704 * [a-zA-Z0-9] 705 * is implementation defined. The V7 Unix 706 * scanf treats `a-z' as `the letters a through 707 * z', but treats `a-a' as `the letter a, the 708 * character -, and the letter a'. 709 * 710 * For compatibility, the `-' is not considerd 711 * to define a range if the character following 712 * it is either a close bracket (required by ANSI) 713 * or is not numerically greater than the character 714 * we just stored in the table (c). 715 */ 716 n = *fmt; 717 if (n == ']' || n < c) { 718 c = '-'; 719 break; /* resume the for(;;) */ 720 } 721 fmt++; 722 do { /* fill in the range */ 723 tab[++c] = v; 724 } while (c < n); 725 #if 1 /* XXX another disgusting compatibility hack */ 726 /* 727 * Alas, the V7 Unix scanf also treats formats 728 * such as [a-c-e] as `the letters a through e'. 729 * This too is permitted by the standard.... 730 */ 731 goto doswitch; 732 #else 733 c = *fmt++; 734 if (c == 0) 735 return (fmt - 1); 736 if (c == ']') 737 return (fmt); 738 #endif 739 break; 740 741 case ']': /* end of scanset */ 742 return (fmt); 743 744 default: /* just another character */ 745 c = n; 746 break; 747 } 748 } 749 /* NOTREACHED */ 750 } 751