xref: /csrg-svn/lib/libc/stdio/vfscanf.c (revision 46220)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Chris Torek.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)vfscanf.c	5.4 (Berkeley) 02/01/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 #include <sys/cdefs.h>
16 #include <stdio.h>
17 #include <ctype.h>
18 #include <stdlib.h>
19 #if __STDC__
20 #include <stdarg.h>
21 #else
22 #include <varargs.h>
23 #endif
24 #include "local.h"
25 
26 #define FLOATING_POINT
27 
28 #ifdef FLOATING_POINT
29 #include "floatio.h"
30 #define	BUF	(MAXEXP+MAXFRACT+3)	/* 3 = sign + decimal point + NUL */
31 #else
32 #define	BUF	40
33 #endif
34 
35 /*
36  * Flags used during conversion.
37  */
38 #define	LONG		0x01	/* l: long or double */
39 #define	LONGDBL		0x02	/* L: long double; unimplemented */
40 #define	SHORT		0x04	/* h: short */
41 #define	SUPPRESS	0x08	/* suppress assignment */
42 #define	POINTER		0x10	/* weird %p pointer (`fake hex') */
43 #define	NOSKIP		0x20	/* do not skip blanks */
44 
45 /*
46  * The following are used in numeric conversions only:
47  * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
48  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
49  */
50 #define	SIGNOK		0x40	/* +/- is (still) legal */
51 #define	NDIGITS		0x80	/* no digits detected */
52 
53 #define	DPTOK		0x100	/* (float) decimal point is still legal */
54 #define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
55 
56 #define	PFXOK		0x100	/* 0x prefix is (still) legal */
57 #define	NZDIGITS	0x200	/* no zero digits detected */
58 
59 /*
60  * Conversion types.
61  */
62 #define	CT_CHAR		0	/* %c conversion */
63 #define	CT_CCL		1	/* %[...] conversion */
64 #define	CT_STRING	2	/* %s conversion */
65 #define	CT_INT		3	/* integer, i.e., strtol or strtoul */
66 #define	CT_FLOAT	4	/* floating, i.e., strtod */
67 
68 #define u_char unsigned char
69 #define u_long unsigned long
70 
71 static u_char *__sccl();
72 
73 /*
74  * vfscanf
75  */
76 __svfscanf(fp, fmt0, ap)
77 	register FILE *fp;
78 	char const *fmt0;
79 	va_list ap;
80 {
81 	register u_char *fmt = (u_char *)fmt0;
82 	register int c;		/* character from format, or conversion */
83 	register size_t width;	/* field width, or 0 */
84 	register char *p;	/* points into all kinds of strings */
85 	register int n;		/* handy integer */
86 	register int flags;	/* flags as defined above */
87 	register char *p0;	/* saves original value of p when necessary */
88 	int nassigned;		/* number of fields assigned */
89 	int nread;		/* number of characters consumed from fp */
90 	int base;		/* base argument to strtol/strtoul */
91 	u_long (*ccfn)();	/* conversion function (strtol/strtoul) */
92 	char ccltab[256];	/* character class table for %[...] */
93 	char buf[BUF];		/* buffer for numeric conversions */
94 
95 	/* `basefix' is used to avoid `if' tests in the integer scanner */
96 	static short basefix[17] =
97 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
98 
99 	nassigned = 0;
100 	nread = 0;
101 	for (;;) {
102 		c = *fmt++;
103 		if (c == 0)
104 			return (nassigned);
105 		if (isspace(c)) {
106 			for (;;) {
107 				if (fp->_r <= 0 && __srefill(fp))
108 					return (nassigned);
109 				if (!isspace(*fp->_p))
110 					break;
111 				nread++, fp->_r--, fp->_p++;
112 			}
113 			continue;
114 		}
115 		if (c != '%')
116 			goto literal;
117 		width = 0;
118 		flags = 0;
119 		/*
120 		 * switch on the format.  continue if done;
121 		 * break once format type is derived.
122 		 */
123 again:		c = *fmt++;
124 		switch (c) {
125 		case '%':
126 literal:
127 			if (fp->_r <= 0 && __srefill(fp))
128 				goto input_failure;
129 			if (*fp->_p != c)
130 				goto match_failure;
131 			fp->_r--, fp->_p++;
132 			nread++;
133 			continue;
134 
135 		case '*':
136 			flags |= SUPPRESS;
137 			goto again;
138 		case 'l':
139 			flags |= LONG;
140 			goto again;
141 		case 'L':
142 			flags |= LONGDBL;
143 			goto again;
144 		case 'h':
145 			flags |= SHORT;
146 			goto again;
147 
148 		case '0': case '1': case '2': case '3': case '4':
149 		case '5': case '6': case '7': case '8': case '9':
150 			width = width * 10 + c - '0';
151 			goto again;
152 
153 		/*
154 		 * Conversions.
155 		 * Those marked `compat' are for 4.[123]BSD compatibility.
156 		 *
157 		 * (According to ANSI, E and X formats are supposed
158 		 * to the same as e and x.  Sorry about that.)
159 		 */
160 		case 'D':	/* compat */
161 			flags |= LONG;
162 			/* FALLTHROUGH */
163 		case 'd':
164 			c = CT_INT;
165 			ccfn = (u_long (*)())strtol;
166 			base = 10;
167 			break;
168 
169 		case 'i':
170 			c = CT_INT;
171 			ccfn = (u_long (*)())strtol;
172 			base = 0;
173 			break;
174 
175 		case 'O':	/* compat */
176 			flags |= LONG;
177 			/* FALLTHROUGH */
178 		case 'o':
179 			c = CT_INT;
180 			ccfn = strtoul;
181 			base = 8;
182 			break;
183 
184 		case 'u':
185 			c = CT_INT;
186 			ccfn = strtoul;
187 			base = 10;
188 			break;
189 
190 		case 'X':	/* compat   XXX */
191 			flags |= LONG;
192 			/* FALLTHROUGH */
193 		case 'x':
194 			flags |= PFXOK;	/* enable 0x prefixing */
195 			c = CT_INT;
196 			ccfn = strtoul;
197 			base = 16;
198 			break;
199 
200 #ifdef FLOATING_POINT
201 		case 'E':	/* compat   XXX */
202 		case 'F':	/* compat */
203 			flags |= LONG;
204 			/* FALLTHROUGH */
205 		case 'e': case 'f': case 'g':
206 			c = CT_FLOAT;
207 			break;
208 #endif
209 
210 		case 's':
211 			c = CT_STRING;
212 			break;
213 
214 		case '[':
215 			fmt = __sccl(ccltab, fmt);
216 			flags |= NOSKIP;
217 			c = CT_CCL;
218 			break;
219 
220 		case 'c':
221 			flags |= NOSKIP;
222 			c = CT_CHAR;
223 			break;
224 
225 		case 'p':	/* pointer format is like hex */
226 			flags |= POINTER | PFXOK;
227 			c = CT_INT;
228 			ccfn = strtoul;
229 			base = 16;
230 			break;
231 
232 		case 'n':
233 			if (flags & SUPPRESS)	/* ??? */
234 				continue;
235 			if (flags & SHORT)
236 				*va_arg(ap, short *) = nread;
237 			else if (flags & LONG)
238 				*va_arg(ap, long *) = nread;
239 			else
240 				*va_arg(ap, int *) = nread;
241 			continue;
242 
243 		/*
244 		 * Disgusting backwards compatibility hacks.	XXX
245 		 */
246 		case '\0':	/* compat */
247 			return (EOF);
248 
249 		default:	/* compat */
250 			if (isupper(c))
251 				flags |= LONG;
252 			c = CT_INT;
253 			ccfn = (u_long (*)())strtol;
254 			base = 10;
255 			break;
256 		}
257 
258 		/*
259 		 * We have a conversion that requires input.
260 		 */
261 		if (fp->_r <= 0 && __srefill(fp))
262 			goto input_failure;
263 
264 		/*
265 		 * Consume leading white space, except for formats
266 		 * that suppress this.
267 		 */
268 		if ((flags & NOSKIP) == 0) {
269 			while (isspace(*fp->_p)) {
270 				nread++;
271 				if (--fp->_r > 0)
272 					fp->_p++;
273 				else if (__srefill(fp))
274 					goto input_failure;
275 			}
276 			/*
277 			 * Note that there is at least one character in
278 			 * the buffer, so conversions that do not set NOSKIP
279 			 * ca no longer result in an input failure.
280 			 */
281 		}
282 
283 		/*
284 		 * Do the conversion.
285 		 */
286 		switch (c) {
287 
288 		case CT_CHAR:
289 			/* scan arbitrary characters (sets NOSKIP) */
290 			if (width == 0)
291 				width = 1;
292 			if (flags & SUPPRESS) {
293 				size_t sum = 0;
294 				for (;;) {
295 					if ((n = fp->_r) < width) {
296 						sum += n;
297 						width -= n;
298 						fp->_p += n;
299 						if (__srefill(fp)) {
300 							if (sum == 0)
301 							    goto input_failure;
302 							break;
303 						}
304 					} else {
305 						sum += width;
306 						fp->_r -= width;
307 						fp->_p += width;
308 						break;
309 					}
310 				}
311 				nread += sum;
312 			} else {
313 				size_t r = fread((void *)va_arg(ap, char *), 1,
314 				    width, fp);
315 
316 				if (r == 0)
317 					goto input_failure;
318 				nread += r;
319 				nassigned++;
320 			}
321 			break;
322 
323 		case CT_CCL:
324 			/* scan a (nonempty) character class (sets NOSKIP) */
325 			if (width == 0)
326 				width = ~0;	/* `infinity' */
327 			/* take only those things in the class */
328 			if (flags & SUPPRESS) {
329 				n = 0;
330 				while (ccltab[*fp->_p]) {
331 					n++, fp->_r--, fp->_p++;
332 					if (--width == 0)
333 						break;
334 					if (fp->_r <= 0 && __srefill(fp)) {
335 						if (n == 0)
336 							goto input_failure;
337 						break;
338 					}
339 				}
340 				if (n == 0)
341 					goto match_failure;
342 			} else {
343 				p0 = p = va_arg(ap, char *);
344 				while (ccltab[*fp->_p]) {
345 					fp->_r--;
346 					*p++ = *fp->_p++;
347 					if (--width == 0)
348 						break;
349 					if (fp->_r <= 0 && __srefill(fp)) {
350 						if (p == p0)
351 							goto input_failure;
352 						break;
353 					}
354 				}
355 				n = p - p0;
356 				if (n == 0)
357 					goto match_failure;
358 				*p = 0;
359 				nassigned++;
360 			}
361 			nread += n;
362 			break;
363 
364 		case CT_STRING:
365 			/* like CCL, but zero-length string OK, & no NOSKIP */
366 			if (width == 0)
367 				width = ~0;
368 			if (flags & SUPPRESS) {
369 				n = 0;
370 				while (!isspace(*fp->_p)) {
371 					n++, fp->_r--, fp->_p++;
372 					if (--width == 0)
373 						break;
374 					if (fp->_r <= 0 && __srefill(fp))
375 						break;
376 				}
377 				nread += n;
378 			} else {
379 				p0 = p = va_arg(ap, char *);
380 				while (!isspace(*fp->_p)) {
381 					fp->_r--;
382 					*p++ = *fp->_p++;
383 					if (--width == 0)
384 						break;
385 					if (fp->_r <= 0 && __srefill(fp))
386 						break;
387 				}
388 				*p = 0;
389 				nread += p - p0;
390 				nassigned++;
391 			}
392 			continue;
393 
394 		case CT_INT:
395 			/* scan an integer as if by strtol/strtoul */
396 #ifdef hardway
397 			if (width == 0 || width > sizeof(buf) - 1)
398 				width = sizeof(buf) - 1;
399 #else
400 			/* size_t is unsigned, hence this optimisation */
401 			if (--width > sizeof(buf) - 2)
402 				width = sizeof(buf) - 2;
403 			width++;
404 #endif
405 			flags |= SIGNOK | NDIGITS | NZDIGITS;
406 			for (p = buf; width; width--) {
407 				c = *fp->_p;
408 				/*
409 				 * Switch on the character; `goto ok'
410 				 * if we accept it as a part of number.
411 				 */
412 				switch (c) {
413 
414 				/*
415 				 * The digit 0 is always legal, but is
416 				 * special.  For %i conversions, if no
417 				 * digits (zero or nonzero) have been
418 				 * scanned (only signs), we will have
419 				 * base==0.  In that case, we should set
420 				 * it to 8 and enable 0x prefixing.
421 				 * Also, if we have not scanned zero digits
422 				 * before this, do not turn off prefixing
423 				 * (someone else will turn it off if we
424 				 * have scanned any nonzero digits).
425 				 */
426 				case '0':
427 					if (base == 0) {
428 						base = 8;
429 						flags |= PFXOK;
430 					}
431 					if (flags & NZDIGITS)
432 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
433 					else
434 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
435 					goto ok;
436 
437 				/* 1 through 7 always legal */
438 				case '1': case '2': case '3':
439 				case '4': case '5': case '6': case '7':
440 					base = basefix[base];
441 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
442 					goto ok;
443 
444 				/* digits 8 and 9 ok iff decimal or hex */
445 				case '8': case '9':
446 					base = basefix[base];
447 					if (base <= 8)
448 						break;	/* not legal here */
449 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
450 					goto ok;
451 
452 				/* letters ok iff hex */
453 				case 'A': case 'B': case 'C':
454 				case 'D': case 'E': case 'F':
455 				case 'a': case 'b': case 'c':
456 				case 'd': case 'e': case 'f':
457 					/* no need to fix base here */
458 					if (base <= 10)
459 						break;	/* not legal here */
460 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
461 					goto ok;
462 
463 				/* sign ok only as first character */
464 				case '+': case '-':
465 					if (flags & SIGNOK) {
466 						flags &= ~SIGNOK;
467 						goto ok;
468 					}
469 					break;
470 
471 				/* x ok iff flag still set & 2nd char */
472 				case 'x': case 'X':
473 					if (flags & PFXOK && p == buf + 1) {
474 						base = 16;	/* if %i */
475 						flags &= ~PFXOK;
476 						goto ok;
477 					}
478 					break;
479 				}
480 
481 				/*
482 				 * If we got here, c is not a legal character
483 				 * for a number.  Stop accumulating digits.
484 				 */
485 				break;
486 		ok:
487 				/*
488 				 * c is legal: store it and look at the next.
489 				 */
490 				*p++ = c;
491 				if (--fp->_r > 0)
492 					fp->_p++;
493 				else if (__srefill(fp))
494 					break;		/* EOF */
495 			}
496 			/*
497 			 * If we had only a sign, it is no good; push
498 			 * back the sign.  If the number ends in `x',
499 			 * it was [sign] '0' 'x', so push back the x
500 			 * and treat it as [sign] '0'.
501 			 */
502 			if (flags & NDIGITS) {
503 				if (p > buf)
504 					(void) ungetc(*(u_char *)--p, fp);
505 				goto match_failure;
506 			}
507 			c = ((u_char *)p)[-1];
508 			if (c == 'x' || c == 'X') {
509 				--p;
510 				(void) ungetc(c, fp);
511 			}
512 			if ((flags & SUPPRESS) == 0) {
513 				u_long res;
514 
515 				*p = 0;
516 				res = (*ccfn)(buf, (char **)NULL, base);
517 				if (flags & POINTER)
518 					*va_arg(ap, void **) = (void *)res;
519 				else if (flags & SHORT)
520 					*va_arg(ap, short *) = res;
521 				else if (flags & LONG)
522 					*va_arg(ap, long *) = res;
523 				else
524 					*va_arg(ap, int *) = res;
525 				nassigned++;
526 			}
527 			nread += p - buf;
528 			break;
529 
530 #ifdef FLOATING_POINT
531 		case CT_FLOAT:
532 			/* scan a floating point number as if by strtod */
533 #ifdef hardway
534 			if (width == 0 || width > sizeof(buf) - 1)
535 				width = sizeof(buf) - 1;
536 #else
537 			/* size_t is unsigned, hence this optimisation */
538 			if (--width > sizeof(buf) - 2)
539 				width = sizeof(buf) - 2;
540 			width++;
541 #endif
542 			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
543 			for (p = buf; width; width--) {
544 				c = *fp->_p;
545 				/*
546 				 * This code mimicks the integer conversion
547 				 * code, but is much simpler.
548 				 */
549 				switch (c) {
550 
551 				case '0': case '1': case '2': case '3':
552 				case '4': case '5': case '6': case '7':
553 				case '8': case '9':
554 					flags &= ~(SIGNOK | NDIGITS);
555 					goto fok;
556 
557 				case '+': case '-':
558 					if (flags & SIGNOK) {
559 						flags &= ~SIGNOK;
560 						goto fok;
561 					}
562 					break;
563 				case '.':
564 					if (flags & DPTOK) {
565 						flags &= ~(SIGNOK | DPTOK);
566 						goto fok;
567 					}
568 					break;
569 				case 'e': case 'E':
570 					/* no exponent without some digits */
571 					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
572 						flags =
573 						    (flags & ~(EXPOK|DPTOK)) |
574 						    SIGNOK | NDIGITS;
575 						goto fok;
576 					}
577 					break;
578 				}
579 				break;
580 		fok:
581 				*p++ = c;
582 				if (--fp->_r > 0)
583 					fp->_p++;
584 				else if (__srefill(fp))
585 					break;	/* EOF */
586 			}
587 			/*
588 			 * If no digits, might be missing exponent digits
589 			 * (just give back the exponent) or might be missing
590 			 * regular digits, but had sign and/or decimal point.
591 			 */
592 			if (flags & NDIGITS) {
593 				if (flags & EXPOK) {
594 					/* no digits at all */
595 					while (p > buf)
596 						ungetc(*(u_char *)--p, fp);
597 					goto match_failure;
598 				}
599 				/* just a bad exponent (e and maybe sign) */
600 				c = *(u_char *)--p;
601 				if (c != 'e' && c != 'E') {
602 					(void) ungetc(c, fp);/* sign */
603 					c = *(u_char *)--p;
604 				}
605 				(void) ungetc(c, fp);
606 			}
607 			if ((flags & SUPPRESS) == 0) {
608 				double res;
609 
610 				*p = 0;
611 				res = atof(buf);
612 				if (flags & LONG)
613 					*va_arg(ap, double *) = res;
614 				else
615 					*va_arg(ap, float *) = res;
616 				nassigned++;
617 			}
618 			nread += p - buf;
619 			break;
620 #endif /* FLOATING_POINT */
621 		}
622 	}
623 input_failure:
624 	return (nassigned ? nassigned : -1);
625 match_failure:
626 	return (nassigned);
627 }
628 
629 /*
630  * Fill in the given table from the scanset at the given format
631  * (just after `[').  Return a pointer to the character past the
632  * closing `]'.  The table has a 1 wherever characters should be
633  * considered part of the scanset.
634  */
635 static u_char *
636 __sccl(tab, fmt)
637 	register char *tab;
638 	register u_char *fmt;
639 {
640 	register int c, n, v;
641 
642 	/* first `clear' the whole table */
643 	c = *fmt++;		/* first char hat => negated scanset */
644 	if (c == '^') {
645 		v = 1;		/* default => accept */
646 		c = *fmt++;	/* get new first char */
647 	} else
648 		v = 0;		/* default => reject */
649 	/* should probably use memset here */
650 	for (n = 0; n < 256; n++)
651 		tab[n] = v;
652 	if (c == 0)
653 		return (fmt - 1);/* format ended before closing ] */
654 
655 	/*
656 	 * Now set the entries corresponding to the actual scanset
657 	 * to the opposite of the above.
658 	 *
659 	 * The first character may be ']' (or '-') without being special;
660 	 * the last character may be '-'.
661 	 */
662 	v = 1 - v;
663 	for (;;) {
664 		tab[c] = v;		/* take character c */
665 doswitch:
666 		n = *fmt++;		/* and examine the next */
667 		switch (n) {
668 
669 		case 0:			/* format ended too soon */
670 			return (fmt - 1);
671 
672 		case '-':
673 			/*
674 			 * A scanset of the form
675 			 *	[01+-]
676 			 * is defined as `the digit 0, the digit 1,
677 			 * the character +, the character -', but
678 			 * the effect of a scanset such as
679 			 *	[a-zA-Z0-9]
680 			 * is implementation defined.  The V7 Unix
681 			 * scanf treats `a-z' as `the letters a through
682 			 * z', but treats `a-a' as `the letter a, the
683 			 * character -, and the letter a'.
684 			 *
685 			 * For compatibility, the `-' is not considerd
686 			 * to define a range if the character following
687 			 * it is either a close bracket (required by ANSI)
688 			 * or is not numerically greater than the character
689 			 * we just stored in the table (c).
690 			 */
691 			n = *fmt;
692 			if (n == ']' || n < c) {
693 				c = '-';
694 				break;	/* resume the for(;;) */
695 			}
696 			fmt++;
697 			do {		/* fill in the range */
698 				tab[++c] = v;
699 			} while (c < n);
700 #if 1	/* XXX another disgusting compatibility hack */
701 			/*
702 			 * Alas, the V7 Unix scanf also treats formats
703 			 * such as [a-c-e] as `the letters a through e'.
704 			 * This too is permitted by the standard....
705 			 */
706 			goto doswitch;
707 #else
708 			c = *fmt++;
709 			if (c == 0)
710 				return (fmt - 1);
711 			if (c == ']')
712 				return (fmt);
713 #endif
714 			break;
715 
716 		case ']':		/* end of scanset */
717 			return (fmt);
718 
719 		default:		/* just another character */
720 			c = n;
721 			break;
722 		}
723 	}
724 	/* NOTREACHED */
725 }
726