186d7f5d3SJohn Marino /* __gmp_doscan -- formatted input internals.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
486d7f5d3SJohn Marino CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
586d7f5d3SJohn Marino FUTURE GNU MP RELEASES.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
886d7f5d3SJohn Marino
986d7f5d3SJohn Marino This file is part of the GNU MP Library.
1086d7f5d3SJohn Marino
1186d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1286d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1386d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1486d7f5d3SJohn Marino option) any later version.
1586d7f5d3SJohn Marino
1686d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1786d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1886d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1986d7f5d3SJohn Marino License for more details.
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2286d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2386d7f5d3SJohn Marino
2486d7f5d3SJohn Marino #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
2586d7f5d3SJohn Marino
2686d7f5d3SJohn Marino #include "config.h"
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino #if HAVE_STDARG
2986d7f5d3SJohn Marino #include <stdarg.h>
3086d7f5d3SJohn Marino #else
3186d7f5d3SJohn Marino #include <varargs.h>
3286d7f5d3SJohn Marino #endif
3386d7f5d3SJohn Marino
3486d7f5d3SJohn Marino #include <ctype.h>
3586d7f5d3SJohn Marino #include <stddef.h> /* for ptrdiff_t */
3686d7f5d3SJohn Marino #include <stdio.h>
3786d7f5d3SJohn Marino #include <stdlib.h> /* for strtol */
3886d7f5d3SJohn Marino #include <string.h>
3986d7f5d3SJohn Marino
4086d7f5d3SJohn Marino #if HAVE_LANGINFO_H
4186d7f5d3SJohn Marino #include <langinfo.h> /* for nl_langinfo */
4286d7f5d3SJohn Marino #endif
4386d7f5d3SJohn Marino
4486d7f5d3SJohn Marino #if HAVE_LOCALE_H
4586d7f5d3SJohn Marino #include <locale.h> /* for localeconv */
4686d7f5d3SJohn Marino #endif
4786d7f5d3SJohn Marino
4886d7f5d3SJohn Marino #if HAVE_INTTYPES_H
4986d7f5d3SJohn Marino # include <inttypes.h> /* for intmax_t */
5086d7f5d3SJohn Marino #else
5186d7f5d3SJohn Marino # if HAVE_STDINT_H
5286d7f5d3SJohn Marino # include <stdint.h>
5386d7f5d3SJohn Marino # endif
5486d7f5d3SJohn Marino #endif
5586d7f5d3SJohn Marino
5686d7f5d3SJohn Marino #if HAVE_SYS_TYPES_H
5786d7f5d3SJohn Marino #include <sys/types.h> /* for quad_t */
5886d7f5d3SJohn Marino #endif
5986d7f5d3SJohn Marino
6086d7f5d3SJohn Marino #include "gmp.h"
6186d7f5d3SJohn Marino #include "gmp-impl.h"
6286d7f5d3SJohn Marino
6386d7f5d3SJohn Marino
6486d7f5d3SJohn Marino /* Change this to "#define TRACE(x) x" for some traces. */
6586d7f5d3SJohn Marino #define TRACE(x)
6686d7f5d3SJohn Marino
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino /* General:
6986d7f5d3SJohn Marino
7086d7f5d3SJohn Marino It's necessary to parse up the format string to recognise the GMP
7186d7f5d3SJohn Marino extra types F, Q and Z. Other types and conversions are passed
7286d7f5d3SJohn Marino across to the standard sscanf or fscanf via funs->scan, for ease of
7386d7f5d3SJohn Marino implementation. This is essential in the case of something like glibc
7486d7f5d3SJohn Marino %p where the pointer format isn't actually documented.
7586d7f5d3SJohn Marino
7686d7f5d3SJohn Marino Because funs->scan doesn't get the whole input it can't put the right
7786d7f5d3SJohn Marino values in for %n, so that's handled in __gmp_doscan. Neither sscanf
7886d7f5d3SJohn Marino nor fscanf directly indicate how many characters were read, so an
7986d7f5d3SJohn Marino extra %n is appended to each run for that. For fscanf this merely
8086d7f5d3SJohn Marino supports our %n output, but for sscanf it lets funs->step move us
8186d7f5d3SJohn Marino along the input string.
8286d7f5d3SJohn Marino
8386d7f5d3SJohn Marino Whitespace and literal matches in the format string, including %%,
8486d7f5d3SJohn Marino are handled directly within __gmp_doscan. This is reasonably
8586d7f5d3SJohn Marino efficient, and avoids some suspicious behaviour observed in various
8686d7f5d3SJohn Marino system libc's. GLIBC 2.2.4 for instance returns 0 on
8786d7f5d3SJohn Marino
8886d7f5d3SJohn Marino sscanf(" ", " x")
8986d7f5d3SJohn Marino or
9086d7f5d3SJohn Marino sscanf(" ", " x%d",&n)
9186d7f5d3SJohn Marino
9286d7f5d3SJohn Marino whereas we think they should return EOF, since end-of-string is
9386d7f5d3SJohn Marino reached when a match of "x" is required.
9486d7f5d3SJohn Marino
9586d7f5d3SJohn Marino For standard % conversions, funs->scan is called once for each
9686d7f5d3SJohn Marino conversion. If we had vfscanf and vsscanf and could rely on their
9786d7f5d3SJohn Marino fixed text matching behaviour then we could call them with multiple
9886d7f5d3SJohn Marino consecutive standard conversions. But plain fscanf and sscanf work
9986d7f5d3SJohn Marino fine, and parsing one field at a time shouldn't be too much of a
10086d7f5d3SJohn Marino slowdown.
10186d7f5d3SJohn Marino
10286d7f5d3SJohn Marino gmpscan:
10386d7f5d3SJohn Marino
10486d7f5d3SJohn Marino gmpscan reads a gmp type. It's only used from one place, but is a
10586d7f5d3SJohn Marino separate subroutine to avoid a big chunk of complicated code in the
10686d7f5d3SJohn Marino middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
10786d7f5d3SJohn Marino possible to share code for parsing integers, rationals and floats.
10886d7f5d3SJohn Marino
10986d7f5d3SJohn Marino In gmpscan normally one char of lookahead is maintained, but when width
11086d7f5d3SJohn Marino is reached that stops, on the principle that an fgetc/ungetc of a char
11186d7f5d3SJohn Marino past where we're told to stop would be undesirable. "chars" is how many
11286d7f5d3SJohn Marino characters have been read so far, including the current c. When
11386d7f5d3SJohn Marino chars==width and another character is desired then a jump is done to the
11486d7f5d3SJohn Marino "convert" stage. c is invalid and mustn't be unget'ed in this case;
11586d7f5d3SJohn Marino chars is set to width+1 to indicate that.
11686d7f5d3SJohn Marino
11786d7f5d3SJohn Marino gmpscan normally returns the number of characters read. -1 means an
11886d7f5d3SJohn Marino invalid field, -2 means EOF reached before any matching characters
11986d7f5d3SJohn Marino were read.
12086d7f5d3SJohn Marino
12186d7f5d3SJohn Marino For hex floats, the mantissa part is passed to mpf_set_str, then the
12286d7f5d3SJohn Marino exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier
12386d7f5d3SJohn Marino than teaching mpf_set_str about an exponent factor (ie. 2) differing
12486d7f5d3SJohn Marino from the mantissa radix point factor (ie. 16). mpf_mul_exp and
12586d7f5d3SJohn Marino mpf_div_2exp will preserve the application requested precision, so
12686d7f5d3SJohn Marino nothing in that respect is lost by making this a two-step process.
12786d7f5d3SJohn Marino
12886d7f5d3SJohn Marino Matching and errors:
12986d7f5d3SJohn Marino
13086d7f5d3SJohn Marino C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest
13186d7f5d3SJohn Marino string which is a match for the appropriate type, or a prefix of a
13286d7f5d3SJohn Marino match. With that done, if it's only a prefix then the result is a
13386d7f5d3SJohn Marino matching failure, ie. invalid input.
13486d7f5d3SJohn Marino
13586d7f5d3SJohn Marino This rule seems fairly clear, but doesn't seem to be universally
13686d7f5d3SJohn Marino applied in system C libraries. Even GLIBC doesn't seem to get it
13786d7f5d3SJohn Marino right, insofar as it seems to accept some apparently invalid forms.
13886d7f5d3SJohn Marino Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the
13986d7f5d3SJohn Marino standard would suggest a non-empty sequence of digits should be
14086d7f5d3SJohn Marino required after an "0x".
14186d7f5d3SJohn Marino
14286d7f5d3SJohn Marino A footnote to 7.19.6.2 para 17 notes how this input item reading can
14386d7f5d3SJohn Marino mean inputs acceptable to strtol are not acceptable to fscanf. We
14486d7f5d3SJohn Marino think this confirms our reading of "0x" as invalid.
14586d7f5d3SJohn Marino
14686d7f5d3SJohn Marino Clearly gmp_sscanf could backtrack to a longest input which was a
14786d7f5d3SJohn Marino valid match for a given item, but this is not done, since C99 says
14886d7f5d3SJohn Marino sscanf is identical to fscanf, so we make gmp_sscanf identical to
14986d7f5d3SJohn Marino gmp_fscanf.
15086d7f5d3SJohn Marino
15186d7f5d3SJohn Marino Types:
15286d7f5d3SJohn Marino
15386d7f5d3SJohn Marino C99 says "ll" is for long long, and "L" is for long double floats.
15486d7f5d3SJohn Marino Unfortunately in GMP 4.1.1 we documented the two as equivalent. This
15586d7f5d3SJohn Marino doesn't affect us directly, since both are passed through to plain
15686d7f5d3SJohn Marino scanf. It seems wisest not to try to enforce the C99 rule. This is
15786d7f5d3SJohn Marino consistent with what we said before, though whether it actually
15886d7f5d3SJohn Marino worked was always up to the C library.
15986d7f5d3SJohn Marino
16086d7f5d3SJohn Marino Alternatives:
16186d7f5d3SJohn Marino
16286d7f5d3SJohn Marino Consideration was given to using separate code for gmp_fscanf and
16386d7f5d3SJohn Marino gmp_sscanf. The sscanf case could zip across a string doing literal
16486d7f5d3SJohn Marino matches or recognising digits in gmpscan, rather than making a
16586d7f5d3SJohn Marino function call fun->get per character. The fscanf could use getc
16686d7f5d3SJohn Marino rather than fgetc too, which might help those systems where getc is a
16786d7f5d3SJohn Marino macro or otherwise inlined. But none of this scanning and converting
16886d7f5d3SJohn Marino will be particularly fast, so the two are done together to keep it a
16986d7f5d3SJohn Marino little simpler for now.
17086d7f5d3SJohn Marino
17186d7f5d3SJohn Marino Various multibyte string issues are not addressed, for a start C99
17286d7f5d3SJohn Marino scanf says the format string is multibyte. Since we pass %c, %s and
17386d7f5d3SJohn Marino %[ to the system scanf, they might do multibyte reads already, but
17486d7f5d3SJohn Marino it's another matter whether or not that can be used, since our digit
17586d7f5d3SJohn Marino and whitespace parsing is only unibyte. The plan is to quietly
17686d7f5d3SJohn Marino ignore multibyte locales for now. This is not as bad as it sounds,
17786d7f5d3SJohn Marino since GMP is presumably used mostly on numbers, which can be
17886d7f5d3SJohn Marino perfectly adequately treated in plain ASCII.
17986d7f5d3SJohn Marino
18086d7f5d3SJohn Marino */
18186d7f5d3SJohn Marino
18286d7f5d3SJohn Marino
18386d7f5d3SJohn Marino struct gmp_doscan_params_t {
18486d7f5d3SJohn Marino int base;
18586d7f5d3SJohn Marino int ignore;
18686d7f5d3SJohn Marino char type;
18786d7f5d3SJohn Marino int width;
18886d7f5d3SJohn Marino };
18986d7f5d3SJohn Marino
19086d7f5d3SJohn Marino
19186d7f5d3SJohn Marino #define GET(c) \
19286d7f5d3SJohn Marino do { \
19386d7f5d3SJohn Marino ASSERT (chars <= width); \
19486d7f5d3SJohn Marino chars++; \
19586d7f5d3SJohn Marino if (chars > width) \
19686d7f5d3SJohn Marino goto convert; \
19786d7f5d3SJohn Marino (c) = (*funs->get) (data); \
19886d7f5d3SJohn Marino } while (0)
19986d7f5d3SJohn Marino
20086d7f5d3SJohn Marino /* store into "s", extending if necessary */
20186d7f5d3SJohn Marino #define STORE(c) \
20286d7f5d3SJohn Marino do { \
20386d7f5d3SJohn Marino ASSERT (s_upto <= s_alloc); \
20486d7f5d3SJohn Marino if (s_upto >= s_alloc) \
20586d7f5d3SJohn Marino { \
20686d7f5d3SJohn Marino size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
20786d7f5d3SJohn Marino s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
20886d7f5d3SJohn Marino s_alloc = s_alloc_new; \
20986d7f5d3SJohn Marino } \
21086d7f5d3SJohn Marino s[s_upto++] = c; \
21186d7f5d3SJohn Marino } while (0)
21286d7f5d3SJohn Marino
21386d7f5d3SJohn Marino #define S_ALLOC_STEP 512
21486d7f5d3SJohn Marino
21586d7f5d3SJohn Marino static int
gmpscan(const struct gmp_doscan_funs_t * funs,void * data,const struct gmp_doscan_params_t * p,void * dst)21686d7f5d3SJohn Marino gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
21786d7f5d3SJohn Marino const struct gmp_doscan_params_t *p, void *dst)
21886d7f5d3SJohn Marino {
21986d7f5d3SJohn Marino int chars, c, base, first, width, seen_point, seen_digit, hexfloat;
22086d7f5d3SJohn Marino size_t s_upto, s_alloc, hexexp;
22186d7f5d3SJohn Marino char *s;
22286d7f5d3SJohn Marino int invalid = 0;
22386d7f5d3SJohn Marino
22486d7f5d3SJohn Marino TRACE (printf ("gmpscan\n"));
22586d7f5d3SJohn Marino
22686d7f5d3SJohn Marino ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
22786d7f5d3SJohn Marino
22886d7f5d3SJohn Marino c = (*funs->get) (data);
22986d7f5d3SJohn Marino if (c == EOF)
23086d7f5d3SJohn Marino return -2;
23186d7f5d3SJohn Marino
23286d7f5d3SJohn Marino chars = 1;
23386d7f5d3SJohn Marino first = 1;
23486d7f5d3SJohn Marino seen_point = 0;
23586d7f5d3SJohn Marino width = (p->width == 0 ? INT_MAX-1 : p->width);
23686d7f5d3SJohn Marino base = p->base;
23786d7f5d3SJohn Marino s_alloc = S_ALLOC_STEP;
23886d7f5d3SJohn Marino s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
23986d7f5d3SJohn Marino s_upto = 0;
24086d7f5d3SJohn Marino hexfloat = 0;
24186d7f5d3SJohn Marino hexexp = 0;
24286d7f5d3SJohn Marino
24386d7f5d3SJohn Marino another:
24486d7f5d3SJohn Marino seen_digit = 0;
24586d7f5d3SJohn Marino if (c == '-')
24686d7f5d3SJohn Marino {
24786d7f5d3SJohn Marino STORE (c);
24886d7f5d3SJohn Marino goto get_for_sign;
24986d7f5d3SJohn Marino }
25086d7f5d3SJohn Marino else if (c == '+')
25186d7f5d3SJohn Marino {
25286d7f5d3SJohn Marino /* don't store '+', it's not accepted by mpz_set_str etc */
25386d7f5d3SJohn Marino get_for_sign:
25486d7f5d3SJohn Marino GET (c);
25586d7f5d3SJohn Marino }
25686d7f5d3SJohn Marino
25786d7f5d3SJohn Marino if (base == 0)
25886d7f5d3SJohn Marino {
25986d7f5d3SJohn Marino base = 10; /* decimal if no base indicator */
26086d7f5d3SJohn Marino if (c == '0')
26186d7f5d3SJohn Marino {
26286d7f5d3SJohn Marino seen_digit = 1; /* 0 alone is a valid number */
26386d7f5d3SJohn Marino if (p->type != 'F')
26486d7f5d3SJohn Marino base = 8; /* leading 0 is octal, for non-floats */
26586d7f5d3SJohn Marino STORE (c);
26686d7f5d3SJohn Marino GET (c);
26786d7f5d3SJohn Marino if (c == 'x' || c == 'X')
26886d7f5d3SJohn Marino {
26986d7f5d3SJohn Marino base = 16;
27086d7f5d3SJohn Marino seen_digit = 0; /* must have digits after an 0x */
27186d7f5d3SJohn Marino if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */
27286d7f5d3SJohn Marino hexfloat = 1;
27386d7f5d3SJohn Marino else
27486d7f5d3SJohn Marino STORE (c);
27586d7f5d3SJohn Marino GET (c);
27686d7f5d3SJohn Marino }
27786d7f5d3SJohn Marino }
27886d7f5d3SJohn Marino }
27986d7f5d3SJohn Marino
28086d7f5d3SJohn Marino digits:
28186d7f5d3SJohn Marino for (;;)
28286d7f5d3SJohn Marino {
28386d7f5d3SJohn Marino if (base == 16)
28486d7f5d3SJohn Marino {
28586d7f5d3SJohn Marino if (! isxdigit (c))
28686d7f5d3SJohn Marino break;
28786d7f5d3SJohn Marino }
28886d7f5d3SJohn Marino else
28986d7f5d3SJohn Marino {
29086d7f5d3SJohn Marino if (! isdigit (c))
29186d7f5d3SJohn Marino break;
29286d7f5d3SJohn Marino if (base == 8 && (c == '8' || c == '9'))
29386d7f5d3SJohn Marino break;
29486d7f5d3SJohn Marino }
29586d7f5d3SJohn Marino
29686d7f5d3SJohn Marino seen_digit = 1;
29786d7f5d3SJohn Marino STORE (c);
29886d7f5d3SJohn Marino GET (c);
29986d7f5d3SJohn Marino }
30086d7f5d3SJohn Marino
30186d7f5d3SJohn Marino if (first)
30286d7f5d3SJohn Marino {
30386d7f5d3SJohn Marino /* decimal point */
30486d7f5d3SJohn Marino if (p->type == 'F' && ! seen_point)
30586d7f5d3SJohn Marino {
30686d7f5d3SJohn Marino /* For a multi-character decimal point, if the first character is
30786d7f5d3SJohn Marino present then all of it must be, otherwise the input is
30886d7f5d3SJohn Marino considered invalid. */
30986d7f5d3SJohn Marino const char *point = GMP_DECIMAL_POINT;
31086d7f5d3SJohn Marino int pc = (unsigned char) *point++;
31186d7f5d3SJohn Marino if (c == pc)
31286d7f5d3SJohn Marino {
31386d7f5d3SJohn Marino for (;;)
31486d7f5d3SJohn Marino {
31586d7f5d3SJohn Marino STORE (c);
31686d7f5d3SJohn Marino GET (c);
31786d7f5d3SJohn Marino pc = (unsigned char) *point++;
31886d7f5d3SJohn Marino if (pc == '\0')
31986d7f5d3SJohn Marino break;
32086d7f5d3SJohn Marino if (c != pc)
32186d7f5d3SJohn Marino goto set_invalid;
32286d7f5d3SJohn Marino }
32386d7f5d3SJohn Marino seen_point = 1;
32486d7f5d3SJohn Marino goto digits;
32586d7f5d3SJohn Marino }
32686d7f5d3SJohn Marino }
32786d7f5d3SJohn Marino
32886d7f5d3SJohn Marino /* exponent */
32986d7f5d3SJohn Marino if (p->type == 'F')
33086d7f5d3SJohn Marino {
33186d7f5d3SJohn Marino if (hexfloat && (c == 'p' || c == 'P'))
33286d7f5d3SJohn Marino {
33386d7f5d3SJohn Marino hexexp = s_upto; /* exponent location */
33486d7f5d3SJohn Marino base = 10; /* exponent in decimal */
33586d7f5d3SJohn Marino goto exponent;
33686d7f5d3SJohn Marino }
33786d7f5d3SJohn Marino else if (! hexfloat && (c == 'e' || c == 'E'))
33886d7f5d3SJohn Marino {
33986d7f5d3SJohn Marino exponent:
34086d7f5d3SJohn Marino /* must have at least one digit in the mantissa, just an exponent
34186d7f5d3SJohn Marino is not good enough */
34286d7f5d3SJohn Marino if (! seen_digit)
34386d7f5d3SJohn Marino goto set_invalid;
34486d7f5d3SJohn Marino
34586d7f5d3SJohn Marino do_second:
34686d7f5d3SJohn Marino first = 0;
34786d7f5d3SJohn Marino STORE (c);
34886d7f5d3SJohn Marino GET (c);
34986d7f5d3SJohn Marino goto another;
35086d7f5d3SJohn Marino }
35186d7f5d3SJohn Marino }
35286d7f5d3SJohn Marino
35386d7f5d3SJohn Marino /* denominator */
35486d7f5d3SJohn Marino if (p->type == 'Q' && c == '/')
35586d7f5d3SJohn Marino {
35686d7f5d3SJohn Marino /* must have at least one digit in the numerator */
35786d7f5d3SJohn Marino if (! seen_digit)
35886d7f5d3SJohn Marino goto set_invalid;
35986d7f5d3SJohn Marino
36086d7f5d3SJohn Marino /* now look for at least one digit in the denominator */
36186d7f5d3SJohn Marino seen_digit = 0;
36286d7f5d3SJohn Marino
36386d7f5d3SJohn Marino /* allow the base to be redetermined for "%i" */
36486d7f5d3SJohn Marino base = p->base;
36586d7f5d3SJohn Marino goto do_second;
36686d7f5d3SJohn Marino }
36786d7f5d3SJohn Marino }
36886d7f5d3SJohn Marino
36986d7f5d3SJohn Marino convert:
37086d7f5d3SJohn Marino if (! seen_digit)
37186d7f5d3SJohn Marino {
37286d7f5d3SJohn Marino set_invalid:
37386d7f5d3SJohn Marino invalid = 1;
37486d7f5d3SJohn Marino goto done;
37586d7f5d3SJohn Marino }
37686d7f5d3SJohn Marino
37786d7f5d3SJohn Marino if (! p->ignore)
37886d7f5d3SJohn Marino {
37986d7f5d3SJohn Marino STORE ('\0');
38086d7f5d3SJohn Marino TRACE (printf (" convert \"%s\"\n", s));
38186d7f5d3SJohn Marino
38286d7f5d3SJohn Marino /* We ought to have parsed out a valid string above, so just test
38386d7f5d3SJohn Marino mpz_set_str etc with an ASSERT. */
38486d7f5d3SJohn Marino switch (p->type) {
38586d7f5d3SJohn Marino case 'F':
38686d7f5d3SJohn Marino {
38786d7f5d3SJohn Marino mpf_ptr f = (mpf_ptr) dst;
38886d7f5d3SJohn Marino if (hexexp != 0)
38986d7f5d3SJohn Marino s[hexexp] = '\0';
39086d7f5d3SJohn Marino ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10));
39186d7f5d3SJohn Marino if (hexexp != 0)
39286d7f5d3SJohn Marino {
39386d7f5d3SJohn Marino char *dummy;
39486d7f5d3SJohn Marino long exp;
39586d7f5d3SJohn Marino exp = strtol (s + hexexp + 1, &dummy, 10);
39686d7f5d3SJohn Marino if (exp >= 0)
39786d7f5d3SJohn Marino mpf_mul_2exp (f, f, (unsigned long) exp);
39886d7f5d3SJohn Marino else
39986d7f5d3SJohn Marino mpf_div_2exp (f, f, - (unsigned long) exp);
40086d7f5d3SJohn Marino }
40186d7f5d3SJohn Marino }
40286d7f5d3SJohn Marino break;
40386d7f5d3SJohn Marino case 'Q':
40486d7f5d3SJohn Marino ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
40586d7f5d3SJohn Marino break;
40686d7f5d3SJohn Marino case 'Z':
40786d7f5d3SJohn Marino ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
40886d7f5d3SJohn Marino break;
40986d7f5d3SJohn Marino default:
41086d7f5d3SJohn Marino ASSERT (0);
41186d7f5d3SJohn Marino /*FALLTHRU*/
41286d7f5d3SJohn Marino break;
41386d7f5d3SJohn Marino }
41486d7f5d3SJohn Marino }
41586d7f5d3SJohn Marino
41686d7f5d3SJohn Marino done:
41786d7f5d3SJohn Marino ASSERT (chars <= width+1);
41886d7f5d3SJohn Marino if (chars != width+1)
41986d7f5d3SJohn Marino {
42086d7f5d3SJohn Marino (*funs->unget) (c, data);
42186d7f5d3SJohn Marino TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
42286d7f5d3SJohn Marino }
42386d7f5d3SJohn Marino chars--;
42486d7f5d3SJohn Marino
42586d7f5d3SJohn Marino (*__gmp_free_func) (s, s_alloc);
42686d7f5d3SJohn Marino
42786d7f5d3SJohn Marino if (invalid)
42886d7f5d3SJohn Marino {
42986d7f5d3SJohn Marino TRACE (printf (" invalid\n"));
43086d7f5d3SJohn Marino return -1;
43186d7f5d3SJohn Marino }
43286d7f5d3SJohn Marino
43386d7f5d3SJohn Marino TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
43486d7f5d3SJohn Marino return chars;
43586d7f5d3SJohn Marino }
43686d7f5d3SJohn Marino
43786d7f5d3SJohn Marino
43886d7f5d3SJohn Marino /* Read and discard whitespace, if any. Return number of chars skipped.
43986d7f5d3SJohn Marino Whitespace skipping never provokes the EOF return from __gmp_doscan, so
44086d7f5d3SJohn Marino it's not necessary to watch for EOF from funs->get, */
44186d7f5d3SJohn Marino static int
skip_white(const struct gmp_doscan_funs_t * funs,void * data)44286d7f5d3SJohn Marino skip_white (const struct gmp_doscan_funs_t *funs, void *data)
44386d7f5d3SJohn Marino {
44486d7f5d3SJohn Marino int c;
44586d7f5d3SJohn Marino int ret = 0;
44686d7f5d3SJohn Marino
44786d7f5d3SJohn Marino do
44886d7f5d3SJohn Marino {
44986d7f5d3SJohn Marino c = (funs->get) (data);
45086d7f5d3SJohn Marino ret++;
45186d7f5d3SJohn Marino }
45286d7f5d3SJohn Marino while (isspace (c));
45386d7f5d3SJohn Marino
45486d7f5d3SJohn Marino (funs->unget) (c, data);
45586d7f5d3SJohn Marino ret--;
45686d7f5d3SJohn Marino
45786d7f5d3SJohn Marino TRACE (printf (" skip white %d\n", ret));
45886d7f5d3SJohn Marino return ret;
45986d7f5d3SJohn Marino }
46086d7f5d3SJohn Marino
46186d7f5d3SJohn Marino
46286d7f5d3SJohn Marino int
__gmp_doscan(const struct gmp_doscan_funs_t * funs,void * data,const char * orig_fmt,va_list orig_ap)46386d7f5d3SJohn Marino __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
46486d7f5d3SJohn Marino const char *orig_fmt, va_list orig_ap)
46586d7f5d3SJohn Marino {
46686d7f5d3SJohn Marino struct gmp_doscan_params_t param;
46786d7f5d3SJohn Marino va_list ap;
46886d7f5d3SJohn Marino char *alloc_fmt;
46986d7f5d3SJohn Marino const char *fmt, *this_fmt, *end_fmt;
47086d7f5d3SJohn Marino size_t orig_fmt_len, alloc_fmt_size, len;
47186d7f5d3SJohn Marino int new_fields, new_chars;
47286d7f5d3SJohn Marino char fchar;
47386d7f5d3SJohn Marino int fields = 0;
47486d7f5d3SJohn Marino int chars = 0;
47586d7f5d3SJohn Marino
47686d7f5d3SJohn Marino TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
47786d7f5d3SJohn Marino if (funs->scan == (gmp_doscan_scan_t) sscanf)
47886d7f5d3SJohn Marino printf (" s=\"%s\"\n", * (const char **) data));
47986d7f5d3SJohn Marino
48086d7f5d3SJohn Marino /* Don't modify orig_ap, if va_list is actually an array and hence call by
48186d7f5d3SJohn Marino reference. It could be argued that it'd be more efficient to leave
48286d7f5d3SJohn Marino callers to make a copy if they care, but doing so here is going to be a
48386d7f5d3SJohn Marino very small part of the total work, and we may as well keep applications
48486d7f5d3SJohn Marino out of trouble. */
48586d7f5d3SJohn Marino va_copy (ap, orig_ap);
48686d7f5d3SJohn Marino
48786d7f5d3SJohn Marino /* Parts of the format string are going to be copied so that a " %n" can
48886d7f5d3SJohn Marino be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
48986d7f5d3SJohn Marino needed if fmt consists of a single "%" specifier, but otherwise is an
49086d7f5d3SJohn Marino overestimate. We're not going to be very fast here, so use
49186d7f5d3SJohn Marino __gmp_allocate_func rather than TMP_ALLOC. */
49286d7f5d3SJohn Marino orig_fmt_len = strlen (orig_fmt);
49386d7f5d3SJohn Marino alloc_fmt_size = orig_fmt_len + 4;
49486d7f5d3SJohn Marino alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
49586d7f5d3SJohn Marino
49686d7f5d3SJohn Marino fmt = orig_fmt;
49786d7f5d3SJohn Marino end_fmt = orig_fmt + orig_fmt_len;
49886d7f5d3SJohn Marino
49986d7f5d3SJohn Marino for (;;)
50086d7f5d3SJohn Marino {
50186d7f5d3SJohn Marino next:
50286d7f5d3SJohn Marino fchar = *fmt++;
50386d7f5d3SJohn Marino
50486d7f5d3SJohn Marino if (fchar == '\0')
50586d7f5d3SJohn Marino break;
50686d7f5d3SJohn Marino
50786d7f5d3SJohn Marino if (isspace (fchar))
50886d7f5d3SJohn Marino {
50986d7f5d3SJohn Marino chars += skip_white (funs, data);
51086d7f5d3SJohn Marino continue;
51186d7f5d3SJohn Marino }
51286d7f5d3SJohn Marino
51386d7f5d3SJohn Marino if (fchar != '%')
51486d7f5d3SJohn Marino {
51586d7f5d3SJohn Marino int c;
51686d7f5d3SJohn Marino literal:
51786d7f5d3SJohn Marino c = (funs->get) (data);
51886d7f5d3SJohn Marino if (c != fchar)
51986d7f5d3SJohn Marino {
52086d7f5d3SJohn Marino (funs->unget) (c, data);
52186d7f5d3SJohn Marino if (c == EOF)
52286d7f5d3SJohn Marino {
52386d7f5d3SJohn Marino eof_no_match:
52486d7f5d3SJohn Marino if (fields == 0)
52586d7f5d3SJohn Marino fields = EOF;
52686d7f5d3SJohn Marino }
52786d7f5d3SJohn Marino goto done;
52886d7f5d3SJohn Marino }
52986d7f5d3SJohn Marino chars++;
53086d7f5d3SJohn Marino continue;
53186d7f5d3SJohn Marino }
53286d7f5d3SJohn Marino
53386d7f5d3SJohn Marino param.type = '\0';
53486d7f5d3SJohn Marino param.base = 0; /* for e,f,g,i */
53586d7f5d3SJohn Marino param.ignore = 0;
53686d7f5d3SJohn Marino param.width = 0;
53786d7f5d3SJohn Marino
53886d7f5d3SJohn Marino this_fmt = fmt-1;
53986d7f5d3SJohn Marino TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
54086d7f5d3SJohn Marino
54186d7f5d3SJohn Marino for (;;)
54286d7f5d3SJohn Marino {
54386d7f5d3SJohn Marino ASSERT (fmt <= end_fmt);
54486d7f5d3SJohn Marino
54586d7f5d3SJohn Marino fchar = *fmt++;
54686d7f5d3SJohn Marino switch (fchar) {
54786d7f5d3SJohn Marino
54886d7f5d3SJohn Marino case '\0': /* unterminated % sequence */
54986d7f5d3SJohn Marino ASSERT (0);
55086d7f5d3SJohn Marino goto done;
55186d7f5d3SJohn Marino
55286d7f5d3SJohn Marino case '%': /* literal % */
55386d7f5d3SJohn Marino goto literal;
55486d7f5d3SJohn Marino
55586d7f5d3SJohn Marino case '[': /* character range */
55686d7f5d3SJohn Marino fchar = *fmt++;
55786d7f5d3SJohn Marino if (fchar == '^')
55886d7f5d3SJohn Marino fchar = *fmt++;
55986d7f5d3SJohn Marino /* ']' allowed as the first char (possibly after '^') */
56086d7f5d3SJohn Marino if (fchar == ']')
56186d7f5d3SJohn Marino fchar = *fmt++;
56286d7f5d3SJohn Marino for (;;)
56386d7f5d3SJohn Marino {
56486d7f5d3SJohn Marino ASSERT (fmt <= end_fmt);
56586d7f5d3SJohn Marino if (fchar == '\0')
56686d7f5d3SJohn Marino {
56786d7f5d3SJohn Marino /* unterminated % sequence */
56886d7f5d3SJohn Marino ASSERT (0);
56986d7f5d3SJohn Marino goto done;
57086d7f5d3SJohn Marino }
57186d7f5d3SJohn Marino if (fchar == ']')
57286d7f5d3SJohn Marino break;
57386d7f5d3SJohn Marino fchar = *fmt++;
57486d7f5d3SJohn Marino }
57586d7f5d3SJohn Marino /*FALLTHRU*/
57686d7f5d3SJohn Marino case 'c': /* characters */
57786d7f5d3SJohn Marino case 's': /* string of non-whitespace */
57886d7f5d3SJohn Marino case 'p': /* pointer */
57986d7f5d3SJohn Marino libc_type:
58086d7f5d3SJohn Marino len = fmt - this_fmt;
58186d7f5d3SJohn Marino memcpy (alloc_fmt, this_fmt, len);
58286d7f5d3SJohn Marino alloc_fmt[len++] = '%';
58386d7f5d3SJohn Marino alloc_fmt[len++] = 'n';
58486d7f5d3SJohn Marino alloc_fmt[len] = '\0';
58586d7f5d3SJohn Marino
58686d7f5d3SJohn Marino TRACE (printf (" scan \"%s\"\n", alloc_fmt);
58786d7f5d3SJohn Marino if (funs->scan == (gmp_doscan_scan_t) sscanf)
58886d7f5d3SJohn Marino printf (" s=\"%s\"\n", * (const char **) data));
58986d7f5d3SJohn Marino
59086d7f5d3SJohn Marino new_chars = -1;
59186d7f5d3SJohn Marino if (param.ignore)
59286d7f5d3SJohn Marino {
59386d7f5d3SJohn Marino new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL);
59486d7f5d3SJohn Marino ASSERT (new_fields == 0 || new_fields == EOF);
59586d7f5d3SJohn Marino }
59686d7f5d3SJohn Marino else
59786d7f5d3SJohn Marino {
59886d7f5d3SJohn Marino void *arg = va_arg (ap, void *);
59986d7f5d3SJohn Marino new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars);
60086d7f5d3SJohn Marino ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
60186d7f5d3SJohn Marino
60286d7f5d3SJohn Marino if (new_fields == 0)
60386d7f5d3SJohn Marino goto done; /* invalid input */
60486d7f5d3SJohn Marino
60586d7f5d3SJohn Marino if (new_fields == 1)
60686d7f5d3SJohn Marino ASSERT (new_chars != -1);
60786d7f5d3SJohn Marino }
60886d7f5d3SJohn Marino TRACE (printf (" new_fields %d new_chars %d\n",
60986d7f5d3SJohn Marino new_fields, new_chars));
61086d7f5d3SJohn Marino
61186d7f5d3SJohn Marino if (new_fields == -1)
61286d7f5d3SJohn Marino goto eof_no_match; /* EOF before anything matched */
61386d7f5d3SJohn Marino
61486d7f5d3SJohn Marino /* Under param.ignore, when new_fields==0 we don't know if
61586d7f5d3SJohn Marino it's a successful match or an invalid field. new_chars
61686d7f5d3SJohn Marino won't have been assigned if it was an invalid field. */
61786d7f5d3SJohn Marino if (new_chars == -1)
61886d7f5d3SJohn Marino goto done; /* invalid input */
61986d7f5d3SJohn Marino
62086d7f5d3SJohn Marino chars += new_chars;
62186d7f5d3SJohn Marino (*funs->step) (data, new_chars);
62286d7f5d3SJohn Marino
62386d7f5d3SJohn Marino increment_fields:
62486d7f5d3SJohn Marino if (! param.ignore)
62586d7f5d3SJohn Marino fields++;
62686d7f5d3SJohn Marino goto next;
62786d7f5d3SJohn Marino
62886d7f5d3SJohn Marino case 'd': /* decimal */
62986d7f5d3SJohn Marino case 'u': /* decimal */
63086d7f5d3SJohn Marino param.base = 10;
63186d7f5d3SJohn Marino goto numeric;
63286d7f5d3SJohn Marino
63386d7f5d3SJohn Marino case 'e': /* float */
63486d7f5d3SJohn Marino case 'E': /* float */
63586d7f5d3SJohn Marino case 'f': /* float */
63686d7f5d3SJohn Marino case 'g': /* float */
63786d7f5d3SJohn Marino case 'G': /* float */
63886d7f5d3SJohn Marino case 'i': /* integer with base marker */
63986d7f5d3SJohn Marino numeric:
64086d7f5d3SJohn Marino if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
64186d7f5d3SJohn Marino goto libc_type;
64286d7f5d3SJohn Marino
64386d7f5d3SJohn Marino chars += skip_white (funs, data);
64486d7f5d3SJohn Marino
64586d7f5d3SJohn Marino new_chars = gmpscan (funs, data, ¶m,
64686d7f5d3SJohn Marino param.ignore ? NULL : va_arg (ap, void*));
64786d7f5d3SJohn Marino if (new_chars == -2)
64886d7f5d3SJohn Marino goto eof_no_match;
64986d7f5d3SJohn Marino if (new_chars == -1)
65086d7f5d3SJohn Marino goto done;
65186d7f5d3SJohn Marino
65286d7f5d3SJohn Marino ASSERT (new_chars >= 0);
65386d7f5d3SJohn Marino chars += new_chars;
65486d7f5d3SJohn Marino goto increment_fields;
65586d7f5d3SJohn Marino
65686d7f5d3SJohn Marino case 'a': /* glibc allocate string */
65786d7f5d3SJohn Marino case '\'': /* glibc digit groupings */
65886d7f5d3SJohn Marino break;
65986d7f5d3SJohn Marino
66086d7f5d3SJohn Marino case 'F': /* mpf_t */
66186d7f5d3SJohn Marino case 'j': /* intmax_t */
66286d7f5d3SJohn Marino case 'L': /* long long */
66386d7f5d3SJohn Marino case 'q': /* quad_t */
66486d7f5d3SJohn Marino case 'Q': /* mpq_t */
66586d7f5d3SJohn Marino case 't': /* ptrdiff_t */
66686d7f5d3SJohn Marino case 'z': /* size_t */
66786d7f5d3SJohn Marino case 'Z': /* mpz_t */
66886d7f5d3SJohn Marino set_type:
66986d7f5d3SJohn Marino param.type = fchar;
67086d7f5d3SJohn Marino break;
67186d7f5d3SJohn Marino
67286d7f5d3SJohn Marino case 'h': /* short or char */
67386d7f5d3SJohn Marino if (param.type != 'h')
67486d7f5d3SJohn Marino goto set_type;
67586d7f5d3SJohn Marino param.type = 'H'; /* internal code for "hh" */
67686d7f5d3SJohn Marino break;
67786d7f5d3SJohn Marino
67886d7f5d3SJohn Marino goto numeric;
67986d7f5d3SJohn Marino
68086d7f5d3SJohn Marino case 'l': /* long, long long, double or long double */
68186d7f5d3SJohn Marino if (param.type != 'l')
68286d7f5d3SJohn Marino goto set_type;
68386d7f5d3SJohn Marino param.type = 'L'; /* "ll" means "L" */
68486d7f5d3SJohn Marino break;
68586d7f5d3SJohn Marino
68686d7f5d3SJohn Marino case 'n':
68786d7f5d3SJohn Marino if (! param.ignore)
68886d7f5d3SJohn Marino {
68986d7f5d3SJohn Marino void *p;
69086d7f5d3SJohn Marino p = va_arg (ap, void *);
69186d7f5d3SJohn Marino TRACE (printf (" store %%n to %p\n", p));
69286d7f5d3SJohn Marino switch (param.type) {
69386d7f5d3SJohn Marino case '\0': * (int *) p = chars; break;
69486d7f5d3SJohn Marino case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
69586d7f5d3SJohn Marino case 'H': * (char *) p = chars; break;
69686d7f5d3SJohn Marino case 'h': * (short *) p = chars; break;
69786d7f5d3SJohn Marino #if HAVE_INTMAX_T
69886d7f5d3SJohn Marino case 'j': * (intmax_t *) p = chars; break;
69986d7f5d3SJohn Marino #else
70086d7f5d3SJohn Marino case 'j': ASSERT_FAIL (intmax_t not available); break;
70186d7f5d3SJohn Marino #endif
70286d7f5d3SJohn Marino case 'l': * (long *) p = chars; break;
70386d7f5d3SJohn Marino #if HAVE_QUAD_T && HAVE_LONG_LONG
70486d7f5d3SJohn Marino case 'q':
70586d7f5d3SJohn Marino ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
70686d7f5d3SJohn Marino /*FALLTHRU*/
70786d7f5d3SJohn Marino #else
70886d7f5d3SJohn Marino case 'q': ASSERT_FAIL (quad_t not available); break;
70986d7f5d3SJohn Marino #endif
71086d7f5d3SJohn Marino #if HAVE_LONG_LONG
71186d7f5d3SJohn Marino case 'L': * (long long *) p = chars; break;
71286d7f5d3SJohn Marino #else
71386d7f5d3SJohn Marino case 'L': ASSERT_FAIL (long long not available); break;
71486d7f5d3SJohn Marino #endif
71586d7f5d3SJohn Marino case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
71686d7f5d3SJohn Marino #if HAVE_PTRDIFF_T
71786d7f5d3SJohn Marino case 't': * (ptrdiff_t *) p = chars; break;
71886d7f5d3SJohn Marino #else
71986d7f5d3SJohn Marino case 't': ASSERT_FAIL (ptrdiff_t not available); break;
72086d7f5d3SJohn Marino #endif
72186d7f5d3SJohn Marino case 'z': * (size_t *) p = chars; break;
72286d7f5d3SJohn Marino case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
72386d7f5d3SJohn Marino default: ASSERT (0); break;
72486d7f5d3SJohn Marino }
72586d7f5d3SJohn Marino }
72686d7f5d3SJohn Marino goto next;
72786d7f5d3SJohn Marino
72886d7f5d3SJohn Marino case 'o':
72986d7f5d3SJohn Marino param.base = 8;
73086d7f5d3SJohn Marino goto numeric;
73186d7f5d3SJohn Marino
73286d7f5d3SJohn Marino case 'x':
73386d7f5d3SJohn Marino case 'X':
73486d7f5d3SJohn Marino param.base = 16;
73586d7f5d3SJohn Marino goto numeric;
73686d7f5d3SJohn Marino
73786d7f5d3SJohn Marino case '0': case '1': case '2': case '3': case '4':
73886d7f5d3SJohn Marino case '5': case '6': case '7': case '8': case '9':
73986d7f5d3SJohn Marino param.width = 0;
74086d7f5d3SJohn Marino do {
74186d7f5d3SJohn Marino param.width = param.width * 10 + (fchar-'0');
74286d7f5d3SJohn Marino fchar = *fmt++;
74386d7f5d3SJohn Marino } while (isdigit (fchar));
74486d7f5d3SJohn Marino fmt--; /* unget the non-digit */
74586d7f5d3SJohn Marino break;
74686d7f5d3SJohn Marino
74786d7f5d3SJohn Marino case '*':
74886d7f5d3SJohn Marino param.ignore = 1;
74986d7f5d3SJohn Marino break;
75086d7f5d3SJohn Marino
75186d7f5d3SJohn Marino default:
75286d7f5d3SJohn Marino /* something invalid in a % sequence */
75386d7f5d3SJohn Marino ASSERT (0);
75486d7f5d3SJohn Marino goto next;
75586d7f5d3SJohn Marino }
75686d7f5d3SJohn Marino }
75786d7f5d3SJohn Marino }
75886d7f5d3SJohn Marino
75986d7f5d3SJohn Marino done:
76086d7f5d3SJohn Marino (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
76186d7f5d3SJohn Marino return fields;
76286d7f5d3SJohn Marino }
763