xref: /netbsd-src/lib/libc/stdio/vfscanf.c (revision ce099b40997c43048fb78bd578195f81d2456523)
1 /*	$NetBSD: vfscanf.c,v 1.38 2007/04/01 19:23:55 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Torek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #if defined(LIBC_SCCS) && !defined(lint)
37 #if 0
38 static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
39 __FBSDID("$FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.41 2007/01/09 00:28:07 imp Exp $");
40 #else
41 __RCSID("$NetBSD: vfscanf.c,v 1.38 2007/04/01 19:23:55 christos Exp $");
42 #endif
43 #endif /* LIBC_SCCS and not lint */
44 
45 #include "namespace.h"
46 #include <assert.h>
47 #include <ctype.h>
48 #include <inttypes.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <stddef.h>
52 #include <stdarg.h>
53 #include <string.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 
57 #include "reentrant.h"
58 #include "local.h"
59 
60 #ifndef NO_FLOATING_POINT
61 #include <locale.h>
62 #endif
63 
64 /*
65  * Provide an external name for vfscanf.  Note, we don't use the normal
66  * namespace.h method; stdio routines explicitly use the internal name
67  * __svfscanf.
68  */
69 #ifdef __weak_alias
70 __weak_alias(vfscanf,__svfscanf)
71 #endif
72 
73 #define	BUF		513	/* Maximum length of numeric string. */
74 
75 /*
76  * Flags used during conversion.
77  */
78 #define	LONG		0x0001	/* l: long or double */
79 #define	LONGDBL		0x0002	/* L: long double */
80 #define	SHORT		0x0004	/* h: short */
81 #define	SUPPRESS	0x0008	/* *: suppress assignment */
82 #define	POINTER		0x0010	/* p: void * (as hex) */
83 #define	NOSKIP		0x0020	/* [ or c: do not skip blanks */
84 #define	LONGLONG	0x0400	/* ll: long long (+ deprecated q: quad) */
85 #define	INTMAXT		0x0800	/* j: intmax_t */
86 #define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
87 #define	SIZET		0x2000	/* z: size_t */
88 #define	SHORTSHORT	0x4000	/* hh: char */
89 #define	UNSIGNED	0x8000	/* %[oupxX] conversions */
90 
91 /*
92  * The following are used in integral conversions only:
93  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
94  */
95 #define	SIGNOK		0x00040	/* +/- is (still) legal */
96 #define	NDIGITS		0x00080	/* no digits detected */
97 #define	PFXOK		0x00100	/* 0x prefix is (still) legal */
98 #define	NZDIGITS	0x00200	/* no zero digits detected */
99 #define	HAVESIGN	0x10000	/* sign detected */
100 
101 /*
102  * Conversion types.
103  */
104 #define	CT_CHAR		0	/* %c conversion */
105 #define	CT_CCL		1	/* %[...] conversion */
106 #define	CT_STRING	2	/* %s conversion */
107 #define	CT_INT		3	/* %[dioupxX] conversion */
108 #define	CT_FLOAT	4	/* %[efgEFG] conversion */
109 
110 static const u_char *__sccl(char *, const u_char *);
111 #ifndef NO_FLOATING_POINT
112 static int parsefloat(FILE *, char *, char *);
113 #endif
114 
115 int __scanfdebug = 0;
116 
117 #define __collate_load_error /*CONSTCOND*/0
118 static int
119 __collate_range_cmp(int c1, int c2)
120 {
121 	static char s1[2], s2[2];
122 
123 	s1[0] = c1;
124 	s2[0] = c2;
125 	return strcoll(s1, s2);
126 }
127 
128 
129 /*
130  * __svfscanf - MT-safe version
131  */
132 int
133 __svfscanf(FILE *fp, char const *fmt0, va_list ap)
134 {
135 	int ret;
136 
137 	FLOCKFILE(fp);
138 	ret = __svfscanf_unlocked(fp, fmt0, ap);
139 	FUNLOCKFILE(fp);
140 	return (ret);
141 }
142 
143 /*
144  * __svfscanf_unlocked - non-MT-safe version of __svfscanf
145  */
146 int
147 __svfscanf_unlocked(FILE *fp, const char *fmt0, va_list ap)
148 {
149 	const u_char *fmt = (const u_char *)fmt0;
150 	int c;			/* character from format, or conversion */
151 	size_t width;		/* field width, or 0 */
152 	char *p;		/* points into all kinds of strings */
153 	size_t n;		/* handy size_t */
154 	int flags;		/* flags as defined above */
155 	char *p0;		/* saves original value of p when necessary */
156 	int nassigned;		/* number of fields assigned */
157 	int nconversions;	/* number of conversions */
158 	int nread;		/* number of characters consumed from fp */
159 	int base;		/* base argument to conversion function */
160 	char ccltab[256];	/* character class table for %[...] */
161 	char buf[BUF];		/* buffer for numeric and mb conversions */
162 	wchar_t *wcp;		/* handy wide character pointer */
163 	size_t nconv;		/* length of multibyte sequence converted */
164 	static const mbstate_t initial;
165 	mbstate_t mbs;
166 
167 	/* `basefix' is used to avoid `if' tests in the integer scanner */
168 	static const short basefix[17] =
169 		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
170 
171 	_DIAGASSERT(fp != NULL);
172 	_DIAGASSERT(fmt0 != NULL);
173 
174 	_SET_ORIENTATION(fp, -1);
175 
176 	nassigned = 0;
177 	nconversions = 0;
178 	nread = 0;
179 	base = 0;
180 	for (;;) {
181 		c = (unsigned char)*fmt++;
182 		if (c == 0)
183 			return (nassigned);
184 		if (isspace(c)) {
185 			while ((fp->_r > 0 || __srefill(fp) == 0) &&
186 			    isspace(*fp->_p))
187 				nread++, fp->_r--, fp->_p++;
188 			continue;
189 		}
190 		if (c != '%')
191 			goto literal;
192 		width = 0;
193 		flags = 0;
194 		/*
195 		 * switch on the format.  continue if done;
196 		 * break once format type is derived.
197 		 */
198 again:		c = *fmt++;
199 		switch (c) {
200 		case '%':
201 literal:
202 			if (fp->_r <= 0 && __srefill(fp))
203 				goto input_failure;
204 			if (*fp->_p != c)
205 				goto match_failure;
206 			fp->_r--, fp->_p++;
207 			nread++;
208 			continue;
209 
210 		case '*':
211 			flags |= SUPPRESS;
212 			goto again;
213 		case 'j':
214 			flags |= INTMAXT;
215 			goto again;
216 		case 'l':
217 			if (flags & LONG) {
218 				flags &= ~LONG;
219 				flags |= LONGLONG;
220 			} else
221 				flags |= LONG;
222 			goto again;
223 		case 'q':
224 			flags |= LONGLONG;	/* not quite */
225 			goto again;
226 		case 't':
227 			flags |= PTRDIFFT;
228 			goto again;
229 		case 'z':
230 			flags |= SIZET;
231 			goto again;
232 		case 'L':
233 			flags |= LONGDBL;
234 			goto again;
235 		case 'h':
236 			if (flags & SHORT) {
237 				flags &= ~SHORT;
238 				flags |= SHORTSHORT;
239 			} else
240 				flags |= SHORT;
241 			goto again;
242 
243 		case '0': case '1': case '2': case '3': case '4':
244 		case '5': case '6': case '7': case '8': case '9':
245 			width = width * 10 + c - '0';
246 			goto again;
247 
248 		/*
249 		 * Conversions.
250 		 */
251 		case 'd':
252 			c = CT_INT;
253 			base = 10;
254 			break;
255 
256 		case 'i':
257 			c = CT_INT;
258 			base = 0;
259 			break;
260 
261 		case 'o':
262 			c = CT_INT;
263 			flags |= UNSIGNED;
264 			base = 8;
265 			break;
266 
267 		case 'u':
268 			c = CT_INT;
269 			flags |= UNSIGNED;
270 			base = 10;
271 			break;
272 
273 		case 'X':
274 		case 'x':
275 			flags |= PFXOK;	/* enable 0x prefixing */
276 			c = CT_INT;
277 			flags |= UNSIGNED;
278 			base = 16;
279 			break;
280 
281 #ifndef NO_FLOATING_POINT
282 		case 'A': case 'E': case 'F': case 'G':
283 		case 'a': case 'e': case 'f': case 'g':
284 			c = CT_FLOAT;
285 			break;
286 #endif
287 
288 		case 'S':
289 			flags |= LONG;
290 			/* FALLTHROUGH */
291 		case 's':
292 			c = CT_STRING;
293 			break;
294 
295 		case '[':
296 			fmt = __sccl(ccltab, fmt);
297 			flags |= NOSKIP;
298 			c = CT_CCL;
299 			break;
300 
301 		case 'C':
302 			flags |= LONG;
303 			/* FALLTHROUGH */
304 		case 'c':
305 			flags |= NOSKIP;
306 			c = CT_CHAR;
307 			break;
308 
309 		case 'p':	/* pointer format is like hex */
310 			flags |= POINTER | PFXOK;
311 			c = CT_INT;		/* assumes sizeof(uintmax_t) */
312 			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
313 			base = 16;
314 			break;
315 
316 		case 'n':
317 			nconversions++;
318 			if (flags & SUPPRESS)	/* ??? */
319 				continue;
320 			if (flags & SHORTSHORT)
321 				*va_arg(ap, char *) = nread;
322 			else if (flags & SHORT)
323 				*va_arg(ap, short *) = nread;
324 			else if (flags & LONG)
325 				*va_arg(ap, long *) = nread;
326 			else if (flags & LONGLONG)
327 				*va_arg(ap, long long *) = nread;
328 			else if (flags & INTMAXT)
329 				*va_arg(ap, intmax_t *) = nread;
330 			else if (flags & SIZET)
331 				*va_arg(ap, size_t *) = nread;
332 			else if (flags & PTRDIFFT)
333 				*va_arg(ap, ptrdiff_t *) = nread;
334 			else
335 				*va_arg(ap, int *) = nread;
336 			continue;
337 
338 		default:
339 			goto match_failure;
340 
341 		/*
342 		 * Disgusting backwards compatibility hack.	XXX
343 		 */
344 		case '\0':	/* compat */
345 			return (EOF);
346 		}
347 
348 		/*
349 		 * We have a conversion that requires input.
350 		 */
351 		if (fp->_r <= 0 && __srefill(fp))
352 			goto input_failure;
353 
354 		/*
355 		 * Consume leading white space, except for formats
356 		 * that suppress this.
357 		 */
358 		if ((flags & NOSKIP) == 0) {
359 			while (isspace(*fp->_p)) {
360 				nread++;
361 				if (--fp->_r > 0)
362 					fp->_p++;
363 				else if (__srefill(fp))
364 					goto input_failure;
365 			}
366 			/*
367 			 * Note that there is at least one character in
368 			 * the buffer, so conversions that do not set NOSKIP
369 			 * ca no longer result in an input failure.
370 			 */
371 		}
372 
373 		/*
374 		 * Do the conversion.
375 		 */
376 		switch (c) {
377 
378 		case CT_CHAR:
379 			/* scan arbitrary characters (sets NOSKIP) */
380 			if (width == 0)
381 				width = 1;
382 			if (flags & LONG) {
383 				if ((flags & SUPPRESS) == 0)
384 					wcp = va_arg(ap, wchar_t *);
385 				else
386 					wcp = NULL;
387 				n = 0;
388 				while (width != 0) {
389 					if (n == MB_CUR_MAX) {
390 						fp->_flags |= __SERR;
391 						goto input_failure;
392 					}
393 					buf[n++] = *fp->_p;
394 					fp->_p++;
395 					fp->_r--;
396 					mbs = initial;
397 					nconv = mbrtowc(wcp, buf, n, &mbs);
398 					if (nconv == (size_t)-1) {
399 						fp->_flags |= __SERR;
400 						goto input_failure;
401 					}
402 					if (nconv == 0 && !(flags & SUPPRESS))
403 						*wcp = L'\0';
404 					if (nconv != (size_t)-2) {
405 						nread += n;
406 						width--;
407 						if (!(flags & SUPPRESS))
408 							wcp++;
409 						n = 0;
410 					}
411 					if (fp->_r <= 0 && __srefill(fp)) {
412 						if (n != 0) {
413 							fp->_flags |= __SERR;
414 							goto input_failure;
415 						}
416 						break;
417 					}
418 				}
419 				if (!(flags & SUPPRESS))
420 					nassigned++;
421 			} else if (flags & SUPPRESS) {
422 				size_t sum = 0;
423 				for (;;) {
424 					if ((n = fp->_r) < width) {
425 						sum += n;
426 						width -= n;
427 						fp->_p += n;
428 						if (__srefill(fp)) {
429 							if (sum == 0)
430 							    goto input_failure;
431 							break;
432 						}
433 					} else {
434 						sum += width;
435 						fp->_r -= width;
436 						fp->_p += width;
437 						break;
438 					}
439 				}
440 				nread += sum;
441 			} else {
442 				size_t r = fread(va_arg(ap, char *), 1,
443 				    width, fp);
444 
445 				if (r == 0)
446 					goto input_failure;
447 				nread += r;
448 				nassigned++;
449 			}
450 			nconversions++;
451 			break;
452 
453 		case CT_CCL:
454 			/* scan a (nonempty) character class (sets NOSKIP) */
455 			if (width == 0)
456 				width = (size_t)~0;	/* `infinity' */
457 			/* take only those things in the class */
458 			if (flags & LONG) {
459 				wchar_t twc;
460 				int nchars;
461 
462 				if ((flags & SUPPRESS) == 0)
463 					wcp = va_arg(ap, wchar_t *);
464 				else
465 					wcp = &twc;
466 				n = 0;
467 				nchars = 0;
468 				while (width != 0) {
469 					if (n == MB_CUR_MAX) {
470 						fp->_flags |= __SERR;
471 						goto input_failure;
472 					}
473 					buf[n++] = *fp->_p;
474 					fp->_p++;
475 					fp->_r--;
476 					mbs = initial;
477 					nconv = mbrtowc(wcp, buf, n, &mbs);
478 					if (nconv == (size_t)-1) {
479 						fp->_flags |= __SERR;
480 						goto input_failure;
481 					}
482 					if (nconv == 0)
483 						*wcp = L'\0';
484 					if (nconv != (size_t)-2) {
485 						if (wctob(*wcp) != EOF &&
486 						    !ccltab[wctob(*wcp)]) {
487 							while (n != 0) {
488 								n--;
489 								(void)ungetc(buf[n],
490 								    fp);
491 							}
492 							break;
493 						}
494 						nread += n;
495 						width--;
496 						if (!(flags & SUPPRESS))
497 							wcp++;
498 						nchars++;
499 						n = 0;
500 					}
501 					if (fp->_r <= 0 && __srefill(fp)) {
502 						if (n != 0) {
503 							fp->_flags |= __SERR;
504 							goto input_failure;
505 						}
506 						break;
507 					}
508 				}
509 				if (n != 0) {
510 					fp->_flags |= __SERR;
511 					goto input_failure;
512 				}
513 				n = nchars;
514 				if (n == 0)
515 					goto match_failure;
516 				if (!(flags & SUPPRESS)) {
517 					*wcp = L'\0';
518 					nassigned++;
519 				}
520 			} else if (flags & SUPPRESS) {
521 				n = 0;
522 				while (ccltab[*fp->_p]) {
523 					n++, fp->_r--, fp->_p++;
524 					if (--width == 0)
525 						break;
526 					if (fp->_r <= 0 && __srefill(fp)) {
527 						if (n == 0)
528 							goto input_failure;
529 						break;
530 					}
531 				}
532 				if (n == 0)
533 					goto match_failure;
534 			} else {
535 				p0 = p = va_arg(ap, char *);
536 				while (ccltab[*fp->_p]) {
537 					fp->_r--;
538 					*p++ = *fp->_p++;
539 					if (--width == 0)
540 						break;
541 					if (fp->_r <= 0 && __srefill(fp)) {
542 						if (p == p0)
543 							goto input_failure;
544 						break;
545 					}
546 				}
547 				n = p - p0;
548 				if (n == 0)
549 					goto match_failure;
550 				*p = 0;
551 				nassigned++;
552 			}
553 			nread += n;
554 			nconversions++;
555 			break;
556 
557 		case CT_STRING:
558 			/* like CCL, but zero-length string OK, & no NOSKIP */
559 			if (width == 0)
560 				width = (size_t)~0;
561 			if (flags & LONG) {
562 				wchar_t twc;
563 
564 				if ((flags & SUPPRESS) == 0)
565 					wcp = va_arg(ap, wchar_t *);
566 				else
567 					wcp = &twc;
568 				n = 0;
569 				while (!isspace(*fp->_p) && width != 0) {
570 					if (n == MB_CUR_MAX) {
571 						fp->_flags |= __SERR;
572 						goto input_failure;
573 					}
574 					buf[n++] = *fp->_p;
575 					fp->_p++;
576 					fp->_r--;
577 					mbs = initial;
578 					nconv = mbrtowc(wcp, buf, n, &mbs);
579 					if (nconv == (size_t)-1) {
580 						fp->_flags |= __SERR;
581 						goto input_failure;
582 					}
583 					if (nconv == 0)
584 						*wcp = L'\0';
585 					if (nconv != (size_t)-2) {
586 						if (iswspace(*wcp)) {
587 							while (n != 0) {
588 								n--;
589 								(void)ungetc(buf[n],
590 								    fp);
591 							}
592 							break;
593 						}
594 						nread += n;
595 						width--;
596 						if (!(flags & SUPPRESS))
597 							wcp++;
598 						n = 0;
599 					}
600 					if (fp->_r <= 0 && __srefill(fp)) {
601 						if (n != 0) {
602 							fp->_flags |= __SERR;
603 							goto input_failure;
604 						}
605 						break;
606 					}
607 				}
608 				if (!(flags & SUPPRESS)) {
609 					*wcp = L'\0';
610 					nassigned++;
611 				}
612 			} else if (flags & SUPPRESS) {
613 				n = 0;
614 				while (!isspace(*fp->_p)) {
615 					n++, fp->_r--, fp->_p++;
616 					if (--width == 0)
617 						break;
618 					if (fp->_r <= 0 && __srefill(fp))
619 						break;
620 				}
621 				nread += n;
622 			} else {
623 				p0 = p = va_arg(ap, char *);
624 				while (!isspace(*fp->_p)) {
625 					fp->_r--;
626 					*p++ = *fp->_p++;
627 					if (--width == 0)
628 						break;
629 					if (fp->_r <= 0 && __srefill(fp))
630 						break;
631 				}
632 				*p = 0;
633 				nread += p - p0;
634 				nassigned++;
635 			}
636 			nconversions++;
637 			continue;
638 
639 		case CT_INT:
640 			/* scan an integer as if by the conversion function */
641 #ifdef hardway
642 			if (width == 0 || width > sizeof(buf) - 1)
643 				width = sizeof(buf) - 1;
644 #else
645 			/* size_t is unsigned, hence this optimisation */
646 			if (--width > sizeof(buf) - 2)
647 				width = sizeof(buf) - 2;
648 			width++;
649 #endif
650 			flags |= SIGNOK | NDIGITS | NZDIGITS;
651 			for (p = buf; width; width--) {
652 				c = *fp->_p;
653 				/*
654 				 * Switch on the character; `goto ok'
655 				 * if we accept it as a part of number.
656 				 */
657 				switch (c) {
658 
659 				/*
660 				 * The digit 0 is always legal, but is
661 				 * special.  For %i conversions, if no
662 				 * digits (zero or nonzero) have been
663 				 * scanned (only signs), we will have
664 				 * base==0.  In that case, we should set
665 				 * it to 8 and enable 0x prefixing.
666 				 * Also, if we have not scanned zero digits
667 				 * before this, do not turn off prefixing
668 				 * (someone else will turn it off if we
669 				 * have scanned any nonzero digits).
670 				 */
671 				case '0':
672 					if (base == 0) {
673 						base = 8;
674 						flags |= PFXOK;
675 					}
676 					if (flags & NZDIGITS)
677 					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
678 					else
679 					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
680 					goto ok;
681 
682 				/* 1 through 7 always legal */
683 				case '1': case '2': case '3':
684 				case '4': case '5': case '6': case '7':
685 					base = basefix[base];
686 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
687 					goto ok;
688 
689 				/* digits 8 and 9 ok iff decimal or hex */
690 				case '8': case '9':
691 					base = basefix[base];
692 					if (base <= 8)
693 						break;	/* not legal here */
694 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
695 					goto ok;
696 
697 				/* letters ok iff hex */
698 				case 'A': case 'B': case 'C':
699 				case 'D': case 'E': case 'F':
700 				case 'a': case 'b': case 'c':
701 				case 'd': case 'e': case 'f':
702 					/* no need to fix base here */
703 					if (base <= 10)
704 						break;	/* not legal here */
705 					flags &= ~(SIGNOK | PFXOK | NDIGITS);
706 					goto ok;
707 
708 				/* sign ok only as first character */
709 				case '+': case '-':
710 					if (flags & SIGNOK) {
711 						flags &= ~SIGNOK;
712 						flags |= HAVESIGN;
713 						goto ok;
714 					}
715 					break;
716 
717 				/*
718 				 * x ok iff flag still set & 2nd char (or
719 				 * 3rd char if we have a sign).
720 				 */
721 				case 'x': case 'X':
722 					if (flags & PFXOK && p ==
723 					    buf + 1 + !!(flags & HAVESIGN)) {
724 						base = 16;	/* if %i */
725 						flags &= ~PFXOK;
726 						goto ok;
727 					}
728 					break;
729 				}
730 
731 				/*
732 				 * If we got here, c is not a legal character
733 				 * for a number.  Stop accumulating digits.
734 				 */
735 				break;
736 		ok:
737 				/*
738 				 * c is legal: store it and look at the next.
739 				 */
740 				*p++ = c;
741 				if (--fp->_r > 0)
742 					fp->_p++;
743 				else if (__srefill(fp))
744 					break;		/* EOF */
745 			}
746 			/*
747 			 * If we had only a sign, it is no good; push
748 			 * back the sign.  If the number ends in `x',
749 			 * it was [sign] '0' 'x', so push back the x
750 			 * and treat it as [sign] '0'.
751 			 */
752 			if (flags & NDIGITS) {
753 				if (p > buf)
754 					(void)ungetc(*(u_char *)--p, fp);
755 				goto match_failure;
756 			}
757 			c = ((u_char *)p)[-1];
758 			if (c == 'x' || c == 'X') {
759 				--p;
760 				(void)ungetc(c, fp);
761 			}
762 			if ((flags & SUPPRESS) == 0) {
763 				uintmax_t res;
764 
765 				*p = 0;
766 				if ((flags & UNSIGNED) == 0)
767 				    res = strtoimax(buf, (char **)NULL, base);
768 				else
769 				    res = strtoumax(buf, (char **)NULL, base);
770 				if (flags & POINTER)
771 					*va_arg(ap, void **) =
772 							(void *)(uintptr_t)res;
773 				else if (flags & SHORTSHORT)
774 					*va_arg(ap, char *) = (char)res;
775 				else if (flags & SHORT)
776 					*va_arg(ap, short *) = (short)res;
777 				else if (flags & LONG)
778 					*va_arg(ap, long *) = (long)res;
779 				else if (flags & LONGLONG)
780 					*va_arg(ap, long long *) = res;
781 				else if (flags & INTMAXT)
782 					*va_arg(ap, intmax_t *) = res;
783 				else if (flags & PTRDIFFT)
784 					*va_arg(ap, ptrdiff_t *) =
785 					    (ptrdiff_t)res;
786 				else if (flags & SIZET)
787 					*va_arg(ap, size_t *) = (size_t)res;
788 				else
789 					*va_arg(ap, int *) = (int)res;
790 				nassigned++;
791 			}
792 			nread += p - buf;
793 			nconversions++;
794 			break;
795 
796 #ifndef NO_FLOATING_POINT
797 		case CT_FLOAT:
798 			/* scan a floating point number as if by strtod */
799 			if (width == 0 || width > sizeof(buf) - 1)
800 				width = sizeof(buf) - 1;
801 			if ((width = parsefloat(fp, buf, buf + width)) == 0)
802 				goto match_failure;
803 			if ((flags & SUPPRESS) == 0) {
804 				if (flags & LONGDBL) {
805 					long double res = strtold(buf, &p);
806 					*va_arg(ap, long double *) = res;
807 				} else if (flags & LONG) {
808 					double res = strtod(buf, &p);
809 					*va_arg(ap, double *) = res;
810 				} else {
811 					float res = strtof(buf, &p);
812 					*va_arg(ap, float *) = res;
813 				}
814 				if (__scanfdebug && p - buf != width)
815 					abort();
816 				nassigned++;
817 			}
818 			nread += width;
819 			nconversions++;
820 			break;
821 #endif /* !NO_FLOATING_POINT */
822 		}
823 	}
824 input_failure:
825 	return (nconversions != 0 ? nassigned : EOF);
826 match_failure:
827 	return (nassigned);
828 }
829 
830 /*
831  * Fill in the given table from the scanset at the given format
832  * (just after `[').  Return a pointer to the character past the
833  * closing `]'.  The table has a 1 wherever characters should be
834  * considered part of the scanset.
835  */
836 static const u_char *
837 __sccl(tab, fmt)
838 	char *tab;
839 	const u_char *fmt;
840 {
841 	int c, n, v, i;
842 
843 	_DIAGASSERT(tab != NULL);
844 	_DIAGASSERT(fmt != NULL);
845 	/* first `clear' the whole table */
846 	c = *fmt++;		/* first char hat => negated scanset */
847 	if (c == '^') {
848 		v = 1;		/* default => accept */
849 		c = *fmt++;	/* get new first char */
850 	} else
851 		v = 0;		/* default => reject */
852 
853 	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
854 	(void)memset(tab, v, 256);
855 
856 	if (c == 0)
857 		return (fmt - 1);/* format ended before closing ] */
858 
859 	/*
860 	 * Now set the entries corresponding to the actual scanset
861 	 * to the opposite of the above.
862 	 *
863 	 * The first character may be ']' (or '-') without being special;
864 	 * the last character may be '-'.
865 	 */
866 	v = 1 - v;
867 	for (;;) {
868 		tab[c] = v;		/* take character c */
869 doswitch:
870 		n = *fmt++;		/* and examine the next */
871 		switch (n) {
872 
873 		case 0:			/* format ended too soon */
874 			return (fmt - 1);
875 
876 		case '-':
877 			/*
878 			 * A scanset of the form
879 			 *	[01+-]
880 			 * is defined as `the digit 0, the digit 1,
881 			 * the character +, the character -', but
882 			 * the effect of a scanset such as
883 			 *	[a-zA-Z0-9]
884 			 * is implementation defined.  The V7 Unix
885 			 * scanf treats `a-z' as `the letters a through
886 			 * z', but treats `a-a' as `the letter a, the
887 			 * character -, and the letter a'.
888 			 *
889 			 * For compatibility, the `-' is not considerd
890 			 * to define a range if the character following
891 			 * it is either a close bracket (required by ANSI)
892 			 * or is not numerically greater than the character
893 			 * we just stored in the table (c).
894 			 */
895 			n = *fmt;
896 			if (n == ']' || (__collate_load_error ? n < c :
897 			    __collate_range_cmp(n, c) < 0)) {
898 				c = '-';
899 				break;	/* resume the for(;;) */
900 			}
901 			fmt++;
902 			/* fill in the range */
903 			if (__collate_load_error) {
904 				do
905 					tab[++c] = v;
906 				while (c < n);
907 			} else {
908 				for (i = 0; i < 256; i ++)
909 					if (__collate_range_cmp(c, i) < 0 &&
910 					    __collate_range_cmp(i, n) <= 0)
911 						tab[i] = v;
912 			}
913 #if 1	/* XXX another disgusting compatibility hack */
914 			c = n;
915 			/*
916 			 * Alas, the V7 Unix scanf also treats formats
917 			 * such as [a-c-e] as `the letters a through e'.
918 			 * This too is permitted by the standard....
919 			 */
920 			goto doswitch;
921 #else
922 			c = *fmt++;
923 			if (c == 0)
924 				return (fmt - 1);
925 			if (c == ']')
926 				return (fmt);
927 #endif
928 
929 		case ']':		/* end of scanset */
930 			return (fmt);
931 
932 		default:		/* just another character */
933 			c = n;
934 			break;
935 		}
936 	}
937 	/* NOTREACHED */
938 }
939 
940 #ifndef NO_FLOATING_POINT
941 static int
942 parsefloat(FILE *fp, char *buf, char *end)
943 {
944 	char *commit, *p;
945 	int infnanpos = 0;
946 	enum {
947 		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
948 		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
949 	} state = S_START;
950 	unsigned char c;
951 	char decpt = *localeconv()->decimal_point;
952 	_Bool gotmantdig = 0, ishex = 0;
953 
954 	/*
955 	 * We set commit = p whenever the string we have read so far
956 	 * constitutes a valid representation of a floating point
957 	 * number by itself.  At some point, the parse will complete
958 	 * or fail, and we will ungetc() back to the last commit point.
959 	 * To ensure that the file offset gets updated properly, it is
960 	 * always necessary to read at least one character that doesn't
961 	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
962 	 */
963 	commit = buf - 1;
964 	for (p = buf; p < end; ) {
965 		c = *fp->_p;
966 reswitch:
967 		switch (state) {
968 		case S_START:
969 			state = S_GOTSIGN;
970 			if (c == '-' || c == '+')
971 				break;
972 			else
973 				goto reswitch;
974 		case S_GOTSIGN:
975 			switch (c) {
976 			case '0':
977 				state = S_MAYBEHEX;
978 				commit = p;
979 				break;
980 			case 'I':
981 			case 'i':
982 				state = S_INF;
983 				break;
984 			case 'N':
985 			case 'n':
986 				state = S_NAN;
987 				break;
988 			default:
989 				state = S_DIGITS;
990 				goto reswitch;
991 			}
992 			break;
993 		case S_INF:
994 			if (infnanpos > 6 ||
995 			    (c != "nfinity"[infnanpos] &&
996 			     c != "NFINITY"[infnanpos]))
997 				goto parsedone;
998 			if (infnanpos == 1 || infnanpos == 6)
999 				commit = p;	/* inf or infinity */
1000 			infnanpos++;
1001 			break;
1002 		case S_NAN:
1003 			switch (infnanpos) {
1004 			case -1:	/* XXX kludge to deal with nan(...) */
1005 				goto parsedone;
1006 			case 0:
1007 				if (c != 'A' && c != 'a')
1008 					goto parsedone;
1009 				break;
1010 			case 1:
1011 				if (c != 'N' && c != 'n')
1012 					goto parsedone;
1013 				else
1014 					commit = p;
1015 				break;
1016 			case 2:
1017 				if (c != '(')
1018 					goto parsedone;
1019 				break;
1020 			default:
1021 				if (c == ')') {
1022 					commit = p;
1023 					infnanpos = -2;
1024 				} else if (!isalnum(c) && c != '_')
1025 					goto parsedone;
1026 				break;
1027 			}
1028 			infnanpos++;
1029 			break;
1030 		case S_MAYBEHEX:
1031 			state = S_DIGITS;
1032 			if (c == 'X' || c == 'x') {
1033 				ishex = 1;
1034 				break;
1035 			} else {	/* we saw a '0', but no 'x' */
1036 				gotmantdig = 1;
1037 				goto reswitch;
1038 			}
1039 		case S_DIGITS:
1040 			if ((ishex && isxdigit(c)) || isdigit(c))
1041 				gotmantdig = 1;
1042 			else {
1043 				state = S_FRAC;
1044 				if (c != decpt)
1045 					goto reswitch;
1046 			}
1047 			if (gotmantdig)
1048 				commit = p;
1049 			break;
1050 		case S_FRAC:
1051 			if (((c == 'E' || c == 'e') && !ishex) ||
1052 			    ((c == 'P' || c == 'p') && ishex)) {
1053 				if (!gotmantdig)
1054 					goto parsedone;
1055 				else
1056 					state = S_EXP;
1057 			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1058 				commit = p;
1059 				gotmantdig = 1;
1060 			} else
1061 				goto parsedone;
1062 			break;
1063 		case S_EXP:
1064 			state = S_EXPDIGITS;
1065 			if (c == '-' || c == '+')
1066 				break;
1067 			else
1068 				goto reswitch;
1069 		case S_EXPDIGITS:
1070 			if (isdigit(c))
1071 				commit = p;
1072 			else
1073 				goto parsedone;
1074 			break;
1075 		default:
1076 			abort();
1077 		}
1078 		*p++ = c;
1079 		if (--fp->_r > 0)
1080 			fp->_p++;
1081 		else if (__srefill(fp))
1082 			break;	/* EOF */
1083 	}
1084 
1085 parsedone:
1086 	while (commit < --p)
1087 		(void)ungetc(*(u_char *)p, fp);
1088 	*++commit = '\0';
1089 	return (commit - buf);
1090 }
1091 #endif
1092