1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 06/04/93";
13 #endif /* LIBC_SCCS and not lint */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <ctype.h>
18 #if __STDC__
19 #include <stdarg.h>
20 #else
21 #include <varargs.h>
22 #endif
23 #include "local.h"
24
25 #define FLOATING_POINT
26
27 #include "floatio.h"
28 #define BUF 513 /* Maximum length of numeric string. */
29
30 /*
31 * Flags used during conversion.
32 */
33 #define LONG 0x01 /* l: long or double */
34 #define LONGDBL 0x02 /* L: long double; unimplemented */
35 #define SHORT 0x04 /* h: short */
36 #define SUPPRESS 0x08 /* suppress assignment */
37 #define POINTER 0x10 /* weird %p pointer (`fake hex') */
38 #define NOSKIP 0x20 /* do not skip blanks */
39
40 /*
41 * The following are used in numeric conversions only:
42 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
43 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
44 */
45 #define SIGNOK 0x40 /* +/- is (still) legal */
46 #define NDIGITS 0x80 /* no digits detected */
47
48 #define DPTOK 0x100 /* (float) decimal point is still legal */
49 #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
50
51 #define PFXOK 0x100 /* 0x prefix is (still) legal */
52 #define NZDIGITS 0x200 /* no zero digits detected */
53
54 /*
55 * Conversion types.
56 */
57 #define CT_CHAR 0 /* %c conversion */
58 #define CT_CCL 1 /* %[...] conversion */
59 #define CT_STRING 2 /* %s conversion */
60 #define CT_INT 3 /* integer, i.e., strtol or strtoul */
61 #define CT_FLOAT 4 /* floating, i.e., strtod */
62
63 #define u_char unsigned char
64 #define u_long unsigned long
65
66 static u_char *__sccl();
67
68 /*
69 * vfscanf
70 */
__svfscanf(fp,fmt0,ap)71 __svfscanf(fp, fmt0, ap)
72 register FILE *fp;
73 char const *fmt0;
74 va_list ap;
75 {
76 register u_char *fmt = (u_char *)fmt0;
77 register int c; /* character from format, or conversion */
78 register size_t width; /* field width, or 0 */
79 register char *p; /* points into all kinds of strings */
80 register int n; /* handy integer */
81 register int flags; /* flags as defined above */
82 register char *p0; /* saves original value of p when necessary */
83 int nassigned; /* number of fields assigned */
84 int nread; /* number of characters consumed from fp */
85 int base; /* base argument to strtol/strtoul */
86 u_long (*ccfn)(); /* conversion function (strtol/strtoul) */
87 char ccltab[256]; /* character class table for %[...] */
88 char buf[BUF]; /* buffer for numeric conversions */
89
90 /* `basefix' is used to avoid `if' tests in the integer scanner */
91 static short basefix[17] =
92 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
93
94 nassigned = 0;
95 nread = 0;
96 base = 0; /* XXX just to keep gcc happy */
97 ccfn = NULL; /* XXX just to keep gcc happy */
98 for (;;) {
99 c = *fmt++;
100 if (c == 0)
101 return (nassigned);
102 if (isspace(c)) {
103 for (;;) {
104 if (fp->_r <= 0 && __srefill(fp))
105 return (nassigned);
106 if (!isspace(*fp->_p))
107 break;
108 nread++, fp->_r--, fp->_p++;
109 }
110 continue;
111 }
112 if (c != '%')
113 goto literal;
114 width = 0;
115 flags = 0;
116 /*
117 * switch on the format. continue if done;
118 * break once format type is derived.
119 */
120 again: c = *fmt++;
121 switch (c) {
122 case '%':
123 literal:
124 if (fp->_r <= 0 && __srefill(fp))
125 goto input_failure;
126 if (*fp->_p != c)
127 goto match_failure;
128 fp->_r--, fp->_p++;
129 nread++;
130 continue;
131
132 case '*':
133 flags |= SUPPRESS;
134 goto again;
135 case 'l':
136 flags |= LONG;
137 goto again;
138 case 'L':
139 flags |= LONGDBL;
140 goto again;
141 case 'h':
142 flags |= SHORT;
143 goto again;
144
145 case '0': case '1': case '2': case '3': case '4':
146 case '5': case '6': case '7': case '8': case '9':
147 width = width * 10 + c - '0';
148 goto again;
149
150 /*
151 * Conversions.
152 * Those marked `compat' are for 4.[123]BSD compatibility.
153 *
154 * (According to ANSI, E and X formats are supposed
155 * to the same as e and x. Sorry about that.)
156 */
157 case 'D': /* compat */
158 flags |= LONG;
159 /* FALLTHROUGH */
160 case 'd':
161 c = CT_INT;
162 ccfn = (u_long (*)())strtol;
163 base = 10;
164 break;
165
166 case 'i':
167 c = CT_INT;
168 ccfn = (u_long (*)())strtol;
169 base = 0;
170 break;
171
172 case 'O': /* compat */
173 flags |= LONG;
174 /* FALLTHROUGH */
175 case 'o':
176 c = CT_INT;
177 ccfn = strtoul;
178 base = 8;
179 break;
180
181 case 'u':
182 c = CT_INT;
183 ccfn = strtoul;
184 base = 10;
185 break;
186
187 case 'X': /* compat XXX */
188 flags |= LONG;
189 /* FALLTHROUGH */
190 case 'x':
191 flags |= PFXOK; /* enable 0x prefixing */
192 c = CT_INT;
193 ccfn = strtoul;
194 base = 16;
195 break;
196
197 #ifdef FLOATING_POINT
198 case 'E': /* compat XXX */
199 case 'F': /* compat */
200 flags |= LONG;
201 /* FALLTHROUGH */
202 case 'e': case 'f': case 'g':
203 c = CT_FLOAT;
204 break;
205 #endif
206
207 case 's':
208 c = CT_STRING;
209 break;
210
211 case '[':
212 fmt = __sccl(ccltab, fmt);
213 flags |= NOSKIP;
214 c = CT_CCL;
215 break;
216
217 case 'c':
218 flags |= NOSKIP;
219 c = CT_CHAR;
220 break;
221
222 case 'p': /* pointer format is like hex */
223 flags |= POINTER | PFXOK;
224 c = CT_INT;
225 ccfn = strtoul;
226 base = 16;
227 break;
228
229 case 'n':
230 if (flags & SUPPRESS) /* ??? */
231 continue;
232 if (flags & SHORT)
233 *va_arg(ap, short *) = nread;
234 else if (flags & LONG)
235 *va_arg(ap, long *) = nread;
236 else
237 *va_arg(ap, int *) = nread;
238 continue;
239
240 /*
241 * Disgusting backwards compatibility hacks. XXX
242 */
243 case '\0': /* compat */
244 return (EOF);
245
246 default: /* compat */
247 if (isupper(c))
248 flags |= LONG;
249 c = CT_INT;
250 ccfn = (u_long (*)())strtol;
251 base = 10;
252 break;
253 }
254
255 /*
256 * We have a conversion that requires input.
257 */
258 if (fp->_r <= 0 && __srefill(fp))
259 goto input_failure;
260
261 /*
262 * Consume leading white space, except for formats
263 * that suppress this.
264 */
265 if ((flags & NOSKIP) == 0) {
266 while (isspace(*fp->_p)) {
267 nread++;
268 if (--fp->_r > 0)
269 fp->_p++;
270 else if (__srefill(fp))
271 goto input_failure;
272 }
273 /*
274 * Note that there is at least one character in
275 * the buffer, so conversions that do not set NOSKIP
276 * ca no longer result in an input failure.
277 */
278 }
279
280 /*
281 * Do the conversion.
282 */
283 switch (c) {
284
285 case CT_CHAR:
286 /* scan arbitrary characters (sets NOSKIP) */
287 if (width == 0)
288 width = 1;
289 if (flags & SUPPRESS) {
290 size_t sum = 0;
291 for (;;) {
292 if ((n = fp->_r) < width) {
293 sum += n;
294 width -= n;
295 fp->_p += n;
296 if (__srefill(fp)) {
297 if (sum == 0)
298 goto input_failure;
299 break;
300 }
301 } else {
302 sum += width;
303 fp->_r -= width;
304 fp->_p += width;
305 break;
306 }
307 }
308 nread += sum;
309 } else {
310 size_t r = fread((void *)va_arg(ap, char *), 1,
311 width, fp);
312
313 if (r == 0)
314 goto input_failure;
315 nread += r;
316 nassigned++;
317 }
318 break;
319
320 case CT_CCL:
321 /* scan a (nonempty) character class (sets NOSKIP) */
322 if (width == 0)
323 width = ~0; /* `infinity' */
324 /* take only those things in the class */
325 if (flags & SUPPRESS) {
326 n = 0;
327 while (ccltab[*fp->_p]) {
328 n++, fp->_r--, fp->_p++;
329 if (--width == 0)
330 break;
331 if (fp->_r <= 0 && __srefill(fp)) {
332 if (n == 0)
333 goto input_failure;
334 break;
335 }
336 }
337 if (n == 0)
338 goto match_failure;
339 } else {
340 p0 = p = va_arg(ap, char *);
341 while (ccltab[*fp->_p]) {
342 fp->_r--;
343 *p++ = *fp->_p++;
344 if (--width == 0)
345 break;
346 if (fp->_r <= 0 && __srefill(fp)) {
347 if (p == p0)
348 goto input_failure;
349 break;
350 }
351 }
352 n = p - p0;
353 if (n == 0)
354 goto match_failure;
355 *p = 0;
356 nassigned++;
357 }
358 nread += n;
359 break;
360
361 case CT_STRING:
362 /* like CCL, but zero-length string OK, & no NOSKIP */
363 if (width == 0)
364 width = ~0;
365 if (flags & SUPPRESS) {
366 n = 0;
367 while (!isspace(*fp->_p)) {
368 n++, fp->_r--, fp->_p++;
369 if (--width == 0)
370 break;
371 if (fp->_r <= 0 && __srefill(fp))
372 break;
373 }
374 nread += n;
375 } else {
376 p0 = p = va_arg(ap, char *);
377 while (!isspace(*fp->_p)) {
378 fp->_r--;
379 *p++ = *fp->_p++;
380 if (--width == 0)
381 break;
382 if (fp->_r <= 0 && __srefill(fp))
383 break;
384 }
385 *p = 0;
386 nread += p - p0;
387 nassigned++;
388 }
389 continue;
390
391 case CT_INT:
392 /* scan an integer as if by strtol/strtoul */
393 #ifdef hardway
394 if (width == 0 || width > sizeof(buf) - 1)
395 width = sizeof(buf) - 1;
396 #else
397 /* size_t is unsigned, hence this optimisation */
398 if (--width > sizeof(buf) - 2)
399 width = sizeof(buf) - 2;
400 width++;
401 #endif
402 flags |= SIGNOK | NDIGITS | NZDIGITS;
403 for (p = buf; width; width--) {
404 c = *fp->_p;
405 /*
406 * Switch on the character; `goto ok'
407 * if we accept it as a part of number.
408 */
409 switch (c) {
410
411 /*
412 * The digit 0 is always legal, but is
413 * special. For %i conversions, if no
414 * digits (zero or nonzero) have been
415 * scanned (only signs), we will have
416 * base==0. In that case, we should set
417 * it to 8 and enable 0x prefixing.
418 * Also, if we have not scanned zero digits
419 * before this, do not turn off prefixing
420 * (someone else will turn it off if we
421 * have scanned any nonzero digits).
422 */
423 case '0':
424 if (base == 0) {
425 base = 8;
426 flags |= PFXOK;
427 }
428 if (flags & NZDIGITS)
429 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
430 else
431 flags &= ~(SIGNOK|PFXOK|NDIGITS);
432 goto ok;
433
434 /* 1 through 7 always legal */
435 case '1': case '2': case '3':
436 case '4': case '5': case '6': case '7':
437 base = basefix[base];
438 flags &= ~(SIGNOK | PFXOK | NDIGITS);
439 goto ok;
440
441 /* digits 8 and 9 ok iff decimal or hex */
442 case '8': case '9':
443 base = basefix[base];
444 if (base <= 8)
445 break; /* not legal here */
446 flags &= ~(SIGNOK | PFXOK | NDIGITS);
447 goto ok;
448
449 /* letters ok iff hex */
450 case 'A': case 'B': case 'C':
451 case 'D': case 'E': case 'F':
452 case 'a': case 'b': case 'c':
453 case 'd': case 'e': case 'f':
454 /* no need to fix base here */
455 if (base <= 10)
456 break; /* not legal here */
457 flags &= ~(SIGNOK | PFXOK | NDIGITS);
458 goto ok;
459
460 /* sign ok only as first character */
461 case '+': case '-':
462 if (flags & SIGNOK) {
463 flags &= ~SIGNOK;
464 goto ok;
465 }
466 break;
467
468 /* x ok iff flag still set & 2nd char */
469 case 'x': case 'X':
470 if (flags & PFXOK && p == buf + 1) {
471 base = 16; /* if %i */
472 flags &= ~PFXOK;
473 goto ok;
474 }
475 break;
476 }
477
478 /*
479 * If we got here, c is not a legal character
480 * for a number. Stop accumulating digits.
481 */
482 break;
483 ok:
484 /*
485 * c is legal: store it and look at the next.
486 */
487 *p++ = c;
488 if (--fp->_r > 0)
489 fp->_p++;
490 else if (__srefill(fp))
491 break; /* EOF */
492 }
493 /*
494 * If we had only a sign, it is no good; push
495 * back the sign. If the number ends in `x',
496 * it was [sign] '0' 'x', so push back the x
497 * and treat it as [sign] '0'.
498 */
499 if (flags & NDIGITS) {
500 if (p > buf)
501 (void) ungetc(*(u_char *)--p, fp);
502 goto match_failure;
503 }
504 c = ((u_char *)p)[-1];
505 if (c == 'x' || c == 'X') {
506 --p;
507 (void) ungetc(c, fp);
508 }
509 if ((flags & SUPPRESS) == 0) {
510 u_long res;
511
512 *p = 0;
513 res = (*ccfn)(buf, (char **)NULL, base);
514 if (flags & POINTER)
515 *va_arg(ap, void **) = (void *)res;
516 else if (flags & SHORT)
517 *va_arg(ap, short *) = res;
518 else if (flags & LONG)
519 *va_arg(ap, long *) = res;
520 else
521 *va_arg(ap, int *) = res;
522 nassigned++;
523 }
524 nread += p - buf;
525 break;
526
527 #ifdef FLOATING_POINT
528 case CT_FLOAT:
529 /* scan a floating point number as if by strtod */
530 #ifdef hardway
531 if (width == 0 || width > sizeof(buf) - 1)
532 width = sizeof(buf) - 1;
533 #else
534 /* size_t is unsigned, hence this optimisation */
535 if (--width > sizeof(buf) - 2)
536 width = sizeof(buf) - 2;
537 width++;
538 #endif
539 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
540 for (p = buf; width; width--) {
541 c = *fp->_p;
542 /*
543 * This code mimicks the integer conversion
544 * code, but is much simpler.
545 */
546 switch (c) {
547
548 case '0': case '1': case '2': case '3':
549 case '4': case '5': case '6': case '7':
550 case '8': case '9':
551 flags &= ~(SIGNOK | NDIGITS);
552 goto fok;
553
554 case '+': case '-':
555 if (flags & SIGNOK) {
556 flags &= ~SIGNOK;
557 goto fok;
558 }
559 break;
560 case '.':
561 if (flags & DPTOK) {
562 flags &= ~(SIGNOK | DPTOK);
563 goto fok;
564 }
565 break;
566 case 'e': case 'E':
567 /* no exponent without some digits */
568 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
569 flags =
570 (flags & ~(EXPOK|DPTOK)) |
571 SIGNOK | NDIGITS;
572 goto fok;
573 }
574 break;
575 }
576 break;
577 fok:
578 *p++ = c;
579 if (--fp->_r > 0)
580 fp->_p++;
581 else if (__srefill(fp))
582 break; /* EOF */
583 }
584 /*
585 * If no digits, might be missing exponent digits
586 * (just give back the exponent) or might be missing
587 * regular digits, but had sign and/or decimal point.
588 */
589 if (flags & NDIGITS) {
590 if (flags & EXPOK) {
591 /* no digits at all */
592 while (p > buf)
593 ungetc(*(u_char *)--p, fp);
594 goto match_failure;
595 }
596 /* just a bad exponent (e and maybe sign) */
597 c = *(u_char *)--p;
598 if (c != 'e' && c != 'E') {
599 (void) ungetc(c, fp);/* sign */
600 c = *(u_char *)--p;
601 }
602 (void) ungetc(c, fp);
603 }
604 if ((flags & SUPPRESS) == 0) {
605 double res;
606
607 *p = 0;
608 res = strtod(buf,(char **) NULL);
609 if (flags & LONG)
610 *va_arg(ap, double *) = res;
611 else
612 *va_arg(ap, float *) = res;
613 nassigned++;
614 }
615 nread += p - buf;
616 break;
617 #endif /* FLOATING_POINT */
618 }
619 }
620 input_failure:
621 return (nassigned ? nassigned : -1);
622 match_failure:
623 return (nassigned);
624 }
625
626 /*
627 * Fill in the given table from the scanset at the given format
628 * (just after `['). Return a pointer to the character past the
629 * closing `]'. The table has a 1 wherever characters should be
630 * considered part of the scanset.
631 */
632 static u_char *
__sccl(tab,fmt)633 __sccl(tab, fmt)
634 register char *tab;
635 register u_char *fmt;
636 {
637 register int c, n, v;
638
639 /* first `clear' the whole table */
640 c = *fmt++; /* first char hat => negated scanset */
641 if (c == '^') {
642 v = 1; /* default => accept */
643 c = *fmt++; /* get new first char */
644 } else
645 v = 0; /* default => reject */
646 /* should probably use memset here */
647 for (n = 0; n < 256; n++)
648 tab[n] = v;
649 if (c == 0)
650 return (fmt - 1);/* format ended before closing ] */
651
652 /*
653 * Now set the entries corresponding to the actual scanset
654 * to the opposite of the above.
655 *
656 * The first character may be ']' (or '-') without being special;
657 * the last character may be '-'.
658 */
659 v = 1 - v;
660 for (;;) {
661 tab[c] = v; /* take character c */
662 doswitch:
663 n = *fmt++; /* and examine the next */
664 switch (n) {
665
666 case 0: /* format ended too soon */
667 return (fmt - 1);
668
669 case '-':
670 /*
671 * A scanset of the form
672 * [01+-]
673 * is defined as `the digit 0, the digit 1,
674 * the character +, the character -', but
675 * the effect of a scanset such as
676 * [a-zA-Z0-9]
677 * is implementation defined. The V7 Unix
678 * scanf treats `a-z' as `the letters a through
679 * z', but treats `a-a' as `the letter a, the
680 * character -, and the letter a'.
681 *
682 * For compatibility, the `-' is not considerd
683 * to define a range if the character following
684 * it is either a close bracket (required by ANSI)
685 * or is not numerically greater than the character
686 * we just stored in the table (c).
687 */
688 n = *fmt;
689 if (n == ']' || n < c) {
690 c = '-';
691 break; /* resume the for(;;) */
692 }
693 fmt++;
694 do { /* fill in the range */
695 tab[++c] = v;
696 } while (c < n);
697 #if 1 /* XXX another disgusting compatibility hack */
698 /*
699 * Alas, the V7 Unix scanf also treats formats
700 * such as [a-c-e] as `the letters a through e'.
701 * This too is permitted by the standard....
702 */
703 goto doswitch;
704 #else
705 c = *fmt++;
706 if (c == 0)
707 return (fmt - 1);
708 if (c == ']')
709 return (fmt);
710 #endif
711 break;
712
713 case ']': /* end of scanset */
714 return (fmt);
715
716 default: /* just another character */
717 c = n;
718 break;
719 }
720 }
721 /* NOTREACHED */
722 }
723