xref: /netbsd-src/external/lgpl3/mpfr/dist/src/strtofr.c (revision ec6772edaf0cdcb5f52a48f4aca5e33a8fb8ecfd)
1 /* mpfr_strtofr -- set a floating-point number from a string
2 
3 Copyright 2004-2023 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <ctype.h>  /* For isspace */
24 
25 #define MPFR_NEED_LONGLONG_H
26 #include "mpfr-impl.h"
27 
28 #define MPFR_MAX_BASE 62
29 
30 struct parsed_string {
31   int            negative; /* non-zero iff the number is negative */
32   int            base;     /* base of the string */
33   unsigned char *mantissa; /* raw significand (without any point) */
34   unsigned char *mant;     /* stripped significand (without starting and
35                               ending zeroes). This points inside the area
36                               allocated for the mantissa field. */
37   size_t         prec;     /* length of mant (zero for +/-0) */
38   size_t         alloc;    /* allocation size of mantissa */
39   mpfr_exp_t     exp_base; /* number of digits before the point, + exponent
40                               except in case of binary exponent (exp_bin) */
41   mpfr_exp_t     exp_bin;  /* binary exponent of the pxxx format for
42                               base = 2 or 16 */
43 };
44 
45 /* This table has been generated by the following program.
46    For 2 <= b <= MPFR_MAX_BASE,
47    RedInvLog2Table[b-2][0] / RedInvLog2Table[b-2][1]
48    is an upper approximation to log(2)/log(b), no larger than 1.
49    Note: these numbers must fit on 16 bits, thus unsigned int is OK.
50 */
51 static const unsigned int RedInvLog2Table[MPFR_MAX_BASE-1][2] = {
52   {1, 1},
53   {53, 84},
54   {1, 2},
55   {4004, 9297},
56   {53, 137},
57   {2393, 6718},
58   {1, 3},
59   {665, 2108},
60   {4004, 13301},
61   {949, 3283},
62   {53, 190},
63   {5231, 19357},
64   {2393, 9111},
65   {247, 965},
66   {1, 4},
67   {4036, 16497},
68   {665, 2773},
69   {5187, 22034},
70   {4004, 17305},
71   {51, 224},
72   {949, 4232},
73   {3077, 13919},
74   {53, 243},
75   {73, 339},
76   {5231, 24588},
77   {665, 3162},
78   {2393, 11504},
79   {4943, 24013},
80   {247, 1212},
81   {3515, 17414},
82   {1, 5},
83   {4415, 22271},
84   {4036, 20533},
85   {263, 1349},
86   {665, 3438},
87   {1079, 5621},
88   {5187, 27221},
89   {2288, 12093},
90   {4004, 21309},
91   {179, 959},
92   {51, 275},
93   {495, 2686},
94   {949, 5181},
95   {3621, 19886},
96   {3077, 16996},
97   {229, 1272},
98   {53, 296},
99   {109, 612},
100   {73, 412},
101   {1505, 8537},
102   {5231, 29819},
103   {283, 1621},
104   {665, 3827},
105   {32, 185},
106   {2393, 13897},
107   {1879, 10960},
108   {4943, 28956},
109   {409, 2406},
110   {247, 1459},
111   {231, 1370},
112   {3515, 20929} };
113 #if 0
114 #define N 8
115 int main ()
116 {
117   unsigned long tab[N];
118   int i, n, base;
119   mpfr_t x, y;
120   mpq_t q1, q2;
121   int overflow = 0, base_overflow;
122 
123   mpfr_init2 (x, 200);
124   mpfr_init2 (y, 200);
125   mpq_init (q1);
126   mpq_init (q2);
127 
128   for (base = 2 ; base < 63 ; base ++)
129     {
130       mpfr_set_ui (x, base, MPFR_RNDN);
131       mpfr_log2 (x, x, MPFR_RNDN);
132       mpfr_ui_div (x, 1, x, MPFR_RNDN);
133       printf ("Base: %d x=%e ", base, mpfr_get_d1 (x));
134       for (i = 0 ; i < N ; i++)
135         {
136           mpfr_floor (y, x);
137           tab[i] = mpfr_get_ui (y, MPFR_RNDN);
138           mpfr_sub (x, x, y, MPFR_RNDN);
139           mpfr_ui_div (x, 1, x, MPFR_RNDN);
140         }
141       for (i = N-1 ; i >= 0 ; i--)
142         if (tab[i] != 0)
143           break;
144       mpq_set_ui (q1, tab[i], 1);
145       for (i = i-1 ; i >= 0 ; i--)
146           {
147             mpq_inv (q1, q1);
148             mpq_set_ui (q2, tab[i], 1);
149             mpq_add (q1, q1, q2);
150           }
151       printf("Approx: ", base);
152       mpq_out_str (stdout, 10, q1);
153       printf (" = %e\n", mpq_get_d (q1) );
154       fprintf (stderr, "{");
155       mpz_out_str (stderr, 10, mpq_numref (q1));
156       fprintf (stderr, "UL, ");
157       mpz_out_str (stderr, 10, mpq_denref (q1));
158       fprintf (stderr, "UL},\n");
159       if (mpz_cmp_ui (mpq_numref (q1), 1<<16-1) >= 0
160           || mpz_cmp_ui (mpq_denref (q1), 1<<16-1) >= 0)
161         overflow = 1, base_overflow = base;
162     }
163 
164   mpq_clear (q2);
165   mpq_clear (q1);
166   mpfr_clear (y);
167   mpfr_clear (x);
168   if (overflow )
169     printf ("OVERFLOW for base =%d!\n", base_overflow);
170 }
171 #endif
172 
173 
174 /* Compatible with any locale, but one still assumes that 'a', 'b', 'c',
175    ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like
176    in any ASCII-based character set). */
177 static int
digit_value_in_base(int c,int base)178 digit_value_in_base (int c, int base)
179 {
180   int digit;
181 
182   MPFR_ASSERTD (base > 0 && base <= MPFR_MAX_BASE);
183 
184   if (c >= '0' && c <= '9')
185     digit = c - '0';
186   else if (c >= 'a' && c <= 'z')
187     digit = (base >= 37) ? c - 'a' + 36 : c - 'a' + 10;
188   else if (c >= 'A' && c <= 'Z')
189     digit = c - 'A' + 10;
190   else
191     return -1;
192 
193   return MPFR_LIKELY (digit < base) ? digit : -1;
194 }
195 
196 /* Compatible with any locale, but one still assumes that 'a', 'b', 'c',
197    ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like
198    in any ASCII-based character set). */
199 /* TODO: support EBCDIC. */
200 static int
fast_casecmp(const char * s1,const char * s2)201 fast_casecmp (const char *s1, const char *s2)
202 {
203   unsigned char c1, c2;
204 
205   do
206     {
207       c2 = *(const unsigned char *) s2++;
208       if (c2 == '\0')
209         return 0;
210       c1 = *(const unsigned char *) s1++;
211       if (c1 >= 'A' && c1 <= 'Z')
212         c1 = c1 - 'A' + 'a';
213     }
214   while (c1 == c2);
215   return 1;
216 }
217 
218 /* Parse a string and fill pstr.
219    Return the advanced ptr too.
220    It returns:
221       -1 if invalid string,
222       0 if special string (like nan),
223       1 if the string is OK.
224       2 if overflows
225    So it doesn't return the ternary value
226    BUT if it returns 0 (NAN or INF), the ternary value is also '0'
227    (ie NAN and INF are exact) */
228 static int
parse_string(mpfr_ptr x,struct parsed_string * pstr,const char ** string,int base)229 parse_string (mpfr_ptr x, struct parsed_string *pstr,
230               const char **string, int base)
231 {
232   const char *str = *string;
233   unsigned char *mant;
234   int point;
235   int res = -1;  /* Invalid input return value */
236   const char *prefix_str;
237   int decimal_point;
238 
239   decimal_point = (unsigned char) MPFR_DECIMAL_POINT;
240 
241   /* Init variable */
242   pstr->mantissa = NULL;
243 
244   /* Optional leading whitespace */
245   /* For non-"C" locales, the ISO C standard allows isspace(0) to
246      return true. So we need to stop explicitly on '\0'. */
247   while (*str != '\0' && isspace ((unsigned char) *str))
248     str++;
249 
250   /* An optional sign `+' or `-' */
251   pstr->negative = (*str == '-');
252   if (*str == '-' || *str == '+')
253     str++;
254 
255   /* Can be case-insensitive NAN */
256   if (fast_casecmp (str, "@nan@") == 0)
257     {
258       str += 5;
259       goto set_nan;
260     }
261   if (base <= 16 && fast_casecmp (str, "nan") == 0)
262     {
263       str += 3;
264     set_nan:
265       /* Check for "(dummychars)" */
266       if (*str == '(')
267         {
268           const char *s;
269           for (s = str+1 ; *s != ')' ; s++)
270             if (!(*s >= 'A' && *s <= 'Z')
271                 && !(*s >= 'a' && *s <= 'z')
272                 && !(*s >= '0' && *s <= '9')
273                 && *s != '_')
274               break;
275           if (*s == ')')
276             str = s+1;
277         }
278       *string = str;
279       MPFR_SET_NAN(x);
280       /* MPFR_RET_NAN not used as the return value isn't a ternary value */
281       __gmpfr_flags |= MPFR_FLAGS_NAN;
282       return 0;
283     }
284 
285   /* Can be case-insensitive INF */
286   if (fast_casecmp (str, "@inf@") == 0)
287     {
288       str += 5;
289       goto set_inf;
290     }
291   if (base <= 16 && fast_casecmp (str, "infinity") == 0)
292     {
293       str += 8;
294       goto set_inf;
295     }
296   if (base <= 16 && fast_casecmp (str, "inf") == 0)
297     {
298       str += 3;
299     set_inf:
300       *string = str;
301       MPFR_SET_INF (x);
302       (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x);
303       return 0;
304     }
305 
306   /* If base=0 or 16, it may include '0x' prefix */
307   prefix_str = NULL;
308   if ((base == 0 || base == 16) && str[0]=='0'
309       && (str[1]=='x' || str[1] == 'X'))
310     {
311       prefix_str = str;
312       base = 16;
313       str += 2;
314     }
315   /* If base=0 or 2, it may include '0b' prefix */
316   if ((base == 0 || base == 2) && str[0]=='0'
317       && (str[1]=='b' || str[1] == 'B'))
318     {
319       prefix_str = str;
320       base = 2;
321       str += 2;
322     }
323   /* Else if base=0, we assume decimal base */
324   if (base == 0)
325     base = 10;
326   pstr->base = base;
327 
328   /* Alloc mantissa */
329   pstr->alloc = (size_t) strlen (str) + 1;
330   pstr->mantissa = (unsigned char*) mpfr_allocate_func (pstr->alloc);
331 
332   /* Read mantissa digits */
333  parse_begin:
334   mant = pstr->mantissa;
335   point = 0;
336   pstr->exp_base = 0;
337   pstr->exp_bin  = 0;
338 
339   for (;;) /* Loop until an invalid character is read */
340     {
341       int c = (unsigned char) *str++;
342       /* The cast to unsigned char is needed because of digit_value_in_base;
343          decimal_point uses this convention too. */
344       if (c == '.' || c == decimal_point)
345         {
346           if (MPFR_UNLIKELY(point)) /* Second '.': stop parsing */
347             break;
348           point = 1;
349           continue;
350         }
351       c = digit_value_in_base (c, base);
352       if (c == -1)
353         break;
354       MPFR_ASSERTN (c >= 0); /* c is representable in an unsigned char */
355       *mant++ = (unsigned char) c;
356       if (!point)
357         pstr->exp_base ++;
358     }
359   str--; /* The last read character was invalid */
360 
361   /* Update the # of char in the mantissa */
362   pstr->prec = mant - pstr->mantissa;
363   /* Check if there are no characters in the mantissa (Invalid argument) */
364   if (pstr->prec == 0)
365     {
366       /* Check if there was a prefix (in such a case, we have to read
367          again the mantissa without skipping the prefix)
368          The allocated mantissa is still big enough since we will
369          read only 0, and we alloc one more char than needed.
370          FIXME: Not really friendly. Maybe cleaner code? */
371       if (prefix_str != NULL)
372         {
373           str = prefix_str;
374           prefix_str = NULL;
375           goto parse_begin;
376         }
377       goto end;
378     }
379 
380   /* Valid entry */
381   res = 1;
382   MPFR_ASSERTD (pstr->exp_base >= 0);
383 
384   /* FIXME: In the code below (both cases), if the exponent from the
385      string is large, it will be replaced by MPFR_EXP_MIN or MPFR_EXP_MAX,
386      i.e. it will have a different value. This may not change the result
387      in most cases, but there is no guarantee on very long strings when
388      mpfr_exp_t is a 32-bit type, as the exponent could be brought back
389      to the current exponent range. */
390 
391   /* an optional exponent (e or E, p or P, @) */
392   if ( (*str == '@' || (base <= 10 && (*str == 'e' || *str == 'E')))
393        && (!isspace((unsigned char) str[1])) )
394     {
395       char *endptr;
396       /* the exponent digits are kept in ASCII */
397       mpfr_exp_t sum;
398       long read_exp = strtol (str + 1, &endptr, 10);
399       if (endptr != str+1)
400         str = endptr;
401       sum =
402         read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) :
403         read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) :
404         (mpfr_exp_t) read_exp;
405       MPFR_SADD_OVERFLOW (sum, sum, pstr->exp_base,
406                           mpfr_exp_t, mpfr_uexp_t,
407                           MPFR_EXP_MIN, MPFR_EXP_MAX,
408                           res = 2, res = 3);
409       /* Since exp_base was positive, read_exp + exp_base can't
410          do a negative overflow. */
411       MPFR_ASSERTD (res != 3);
412       pstr->exp_base = sum;
413     }
414   else if ((base == 2 || base == 16)
415            && (*str == 'p' || *str == 'P')
416            && (!isspace((unsigned char) str[1])))
417     {
418       char *endptr;
419       long read_exp = strtol (str + 1, &endptr, 10);
420       if (endptr != str+1)
421         str = endptr;
422       pstr->exp_bin =
423         read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) :
424         read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) :
425         (mpfr_exp_t) read_exp;
426     }
427 
428   /* Remove 0's at the beginning and end of mantissa[0..prec-1] */
429   mant = pstr->mantissa;
430   for ( ; (pstr->prec > 0) && (*mant == 0) ; mant++, pstr->prec--)
431     if (MPFR_LIKELY (pstr->exp_base != MPFR_EXP_MIN))
432       pstr->exp_base--;
433   for ( ; (pstr->prec > 0) && (mant[pstr->prec - 1] == 0); pstr->prec--);
434   pstr->mant = mant;
435 
436   /* Check if x = 0 */
437   if (pstr->prec == 0)
438     {
439       MPFR_SET_ZERO (x);
440       if (pstr->negative)
441         MPFR_SET_NEG(x);
442       else
443         MPFR_SET_POS(x);
444       res = 0;
445     }
446 
447   *string = str;
448  end:
449   if (pstr->mantissa != NULL && res != 1)
450     mpfr_free_func (pstr->mantissa, pstr->alloc);
451   return res;
452 }
453 
454 /* Transform a parsed string to a mpfr_t according to the rounding mode
455    and the precision of x.
456    Returns the ternary value. */
457 static int
parsed_string_to_mpfr(mpfr_ptr x,struct parsed_string * pstr,mpfr_rnd_t rnd)458 parsed_string_to_mpfr (mpfr_ptr x, struct parsed_string *pstr, mpfr_rnd_t rnd)
459 {
460   mpfr_prec_t precx, prec, ysize_bits, pstr_size;
461   mpfr_exp_t exp;
462   mp_limb_t *result;
463   int count, exact;
464   mp_size_t ysize, real_ysize, diff_ysize;
465   int res, err;
466   const int extra_limbs = GMP_NUMB_BITS >= 12 ? 1 : 2; /* see below */
467   MPFR_ZIV_DECL (loop);
468   MPFR_TMP_DECL (marker);
469 
470   /* initialize the working precision */
471   precx = MPFR_GET_PREC (x);
472   prec = precx + MPFR_INT_CEIL_LOG2 (precx);
473 
474   /* Compute the value y of the leading characters as long as rounding is not
475      possible.
476      Note: We have some integer overflow checking using MPFR_EXP_MIN and
477      MPFR_EXP_MAX in this loop. Thanks to the large margin between these
478      extremal values of the mpfr_exp_t type and the valid minimum/maximum
479      exponents, such integer overflows would correspond to real underflow
480      or overflow on the result (possibly except in huge precisions, which
481      are disregarded here; anyway, in practice, such issues could occur
482      only with 32-bit precision and exponent types). Such checks could be
483      extended to real early underflow/overflow checking, in order to avoid
484      useless computations in such cases; in such a case, be careful that
485      the approximation errors need to be taken into account. */
486   MPFR_TMP_MARK(marker);
487   MPFR_ZIV_INIT (loop, prec);
488   for (;;)
489     {
490       mp_limb_t *y0, *y;
491 
492       /* y will be regarded as a number with precision prec. */
493       ysize = MPFR_PREC2LIMBS (prec);
494       /* prec bits corresponds to ysize limbs */
495       ysize_bits = (mpfr_prec_t) ysize * GMP_NUMB_BITS;
496       MPFR_ASSERTD (ysize_bits >= prec);
497       /* and to ysize_bits >= prec > precx bits. */
498       /* We need to allocate one more limb as specified by mpn_set_str
499          (a limb may be written in rp[rn]). Note that the manual of GMP
500          up to 5.1.3 was incorrect on this point.
501          See the following discussion:
502          https://gmplib.org/list-archives/gmp-bugs/2013-December/003267.html */
503       y0 = MPFR_TMP_LIMBS_ALLOC (2 * ysize + extra_limbs + 1);
504       y = y0 + ysize; /* y has (ysize + extra_limbs + 1) allocated limbs */
505 
506       /* pstr_size is the number of bytes we want to read from pstr->mant
507          to fill at least ysize full limbs with mpn_set_str.
508          We must have base^(pstr_size-1) >= (2^(GMP_NUMB_BITS))^ysize
509          (in the worst case, the first digit is one and all others are zero).
510          i.e., pstr_size >= 1 + ysize*GMP_NUMB_BITS/log2(base)
511           Since ysize ~ prec/GMP_NUMB_BITS and prec < Umax/2 =>
512           ysize*GMP_NUMB_BITS can not overflow.
513          We compute pstr_size = 1 + ceil(ysize_bits * Num / Den)
514           where 1/log2(base) <= Num/Den <= 1
515          It is not exactly ceil(1/log2(base)) but could be one more (base 2).
516          Quite ugly since it tries to avoid overflow:
517          let Num = RedInvLog2Table[pstr->base-2][0]
518          and Den = RedInvLog2Table[pstr->base-2][1],
519          and ysize_bits = a*Den+b,
520          then ysize_bits * Num/Den = a*Num + (b * Num)/Den,
521          thus ceil(ysize_bits * Num/Den) = a*Num + floor(b * Num + Den - 1)/Den
522 
523          Note: denoting m = pstr_size and n = ysize_bits, assuming we have
524          m = 1 + ceil(n/log2(b)), i.e., b^(m-1) >= 2^n > b^(m-2), then
525          b^(m-1)/2^n < b, and since we consider m characters of the input,
526          the corresponding part is less than b^m < b^2*2^n.
527          This implies that if b^2 < 2^GMP_NUMB_BITS, which for b <= 62 holds
528          for GMP_NUMB_BITS >= 12, we have real_ysize <= ysize+1 below
529          (this also implies that for GMP_NUMB_BITS >= 13, the number of bits
530          of y[real_ysize-1] below is less than GMP_NUMB_BITS, thus
531          count < GMP_NUMB_BITS).
532          Warning: for GMP_NUMB_BITS=8, we can have real_ysize = ysize + 2!
533          Hence the allocation above for ysize + extra_limbs limbs.
534       */
535       {
536         unsigned int Num = RedInvLog2Table[pstr->base-2][0];
537         unsigned int Den = RedInvLog2Table[pstr->base-2][1];
538         MPFR_ASSERTD (Num <= Den && Den <= 65535); /* thus no overflow */
539         pstr_size = (ysize_bits / Den) * Num
540           + ((unsigned long) (ysize_bits % Den) * Num + Den - 1) / Den
541           + 1;
542       }
543 
544       /* Since pstr_size corresponds to at least ysize_bits bits,
545          and ysize_bits >= prec, the weight of the neglected part of
546          pstr->mant (if any) is < ulp(y) < ulp(x). */
547 
548       /* If the number of wanted bytes is more than what is available
549          in pstr->mant, i.e. pstr->prec, reduce it to pstr->prec. */
550       if (pstr_size > pstr->prec)
551         pstr_size = pstr->prec;
552 
553       /* Convert str (potentially truncated to pstr_size) into binary.
554          Note that pstr->mant is big endian, thus no offset is needed. */
555       real_ysize = mpn_set_str (y, pstr->mant, pstr_size, pstr->base);
556 
557       /* See above for the explanation of the following assertion. */
558       MPFR_ASSERTD (real_ysize <= ysize + extra_limbs);
559 
560       /* The Boolean "exact" will attempt to track exactness of the result:
561          If it is true, then this means that the result is exact, allowing
562          termination, even though the rounding test may not succeed.
563          Conversely, if the result is exact, then "exact" will not
564          necessarily be true at the end of the Ziv loop, but we will need
565          to make sure that at some point, "exact" will be true in order to
566          guarantee termination. FIXME: check that. */
567       /* First, consider the part of the input string that has been ignored.
568          Note that the trailing zeros have been removed in parse_string, so
569          that if something has been ignored, it must be non-zero. */
570       exact = pstr_size == pstr->prec;
571 
572       /* Normalize y and set the initial value of its exponent exp, which
573          is 0 when y is not shifted.
574          Since pstr->mant was normalized, mpn_set_str guarantees that
575          the most significant limb is non-zero. */
576       MPFR_ASSERTD (y[real_ysize - 1] != 0); /* mpn_set_str guarantees this */
577       count_leading_zeros (count, y[real_ysize - 1]);
578       diff_ysize = ysize - real_ysize;
579       MPFR_LOG_MSG (("diff_ysize = %ld\n", (long) diff_ysize));
580       if (diff_ysize >= 0)
581         {
582           /* We have enough limbs to store {y, real_ysize} exactly
583              in {y, ysize}, so that we can do a left shift, without
584              losing any information ("exact" will not change). */
585           if (count != 0)
586             mpn_lshift (y + diff_ysize, y, real_ysize, count);
587           if (diff_ysize > 0)
588             {
589               if (count == 0)
590                 mpn_copyd (y + diff_ysize, y, real_ysize);
591               MPN_ZERO (y, diff_ysize);
592             }
593           /* exp = negation of the total shift count, avoiding overflows. */
594           exp = - ((mpfr_exp_t) diff_ysize * GMP_NUMB_BITS + count);
595         }
596       else
597         {
598           /* Shift {y, real_ysize} for (GMP_NUMB_BITS - count) bits to the
599              right, and put the ysize most significant limbs into {y, ysize}.
600              We have either real_ysize = ysize + 1 or real_ysize = ysize + 2
601              (only possible with extra_limbs == 2). */
602           MPFR_ASSERTD (diff_ysize == -1 ||
603                         (extra_limbs == 2 && diff_ysize == -2));
604           if (count != 0)
605             {
606               /* Before doing the shift, consider the limb that will entirely
607                  be lost if real_ysize = ysize + 2. */
608               exact = exact && (diff_ysize == -1 || y[0] == MPFR_LIMB_ZERO);
609               /* mpn_rshift allows overlap, provided destination <= source */
610               /* FIXME: The bits lost due to mpn_rshift are not taken
611                  into account in the error analysis below! */
612               if (mpn_rshift (y, y - (diff_ysize + 1), real_ysize,
613                               GMP_NUMB_BITS - count) != MPFR_LIMB_ZERO)
614                 exact = 0; /* some non-zero bits have been shifted out */
615             }
616           else
617             {
618               /* the case real_ysize = ysize + 2 with count = 0 cannot happen
619                  even with GMP_NUMB_BITS = 8 since 62^2 < 256^2/2 */
620               MPFR_ASSERTD (diff_ysize == -1);
621               exact = exact && y[0] == MPFR_LIMB_ZERO;
622               /* copy {y+real_ysize-ysize, ysize} to {y, ysize} */
623               mpn_copyi (y, y + 1, real_ysize - 1);
624             }
625           /* exp = shift count */
626           /* TODO: add some explanations about what exp means exactly. */
627           exp = GMP_NUMB_BITS * (- diff_ysize) - count;
628         }
629 
630       /* compute base^(exp_base - pstr_size) on n limbs */
631       if (IS_POW2 (pstr->base))
632         {
633           /* Base: 2, 4, 8, 16, 32 */
634           int pow2;
635           mpfr_exp_t tmp;
636 
637           MPFR_LOG_MSG (("case 1 (base = power of 2)\n", 0));
638 
639           count_leading_zeros (pow2, (mp_limb_t) pstr->base);
640           pow2 = GMP_NUMB_BITS - pow2 - 1; /* base = 2^pow2 */
641           MPFR_ASSERTD (0 < pow2 && pow2 <= 5);
642           /* exp += pow2 * (pstr->exp_base - pstr_size) + pstr->exp_bin
643              with overflow checking
644              and check that we can add/subtract 2 to exp without overflow */
645           MPFR_SADD_OVERFLOW (tmp, pstr->exp_base, -(mpfr_exp_t) pstr_size,
646                               mpfr_exp_t, mpfr_uexp_t,
647                               MPFR_EXP_MIN, MPFR_EXP_MAX,
648                               goto overflow, goto underflow);
649           /* On some FreeBsd/Alpha, LONG_MIN/1 produced an exception
650              so we used to check for this before doing the division.
651              Since this bug is closed now (Nov 26, 2009), we remove
652              that check
653              <https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=72024> */
654           if (tmp > 0 && MPFR_EXP_MAX / pow2 <= tmp)
655             goto overflow;
656           else if (tmp < 0 && MPFR_EXP_MIN / pow2 >= tmp)
657             goto underflow;
658           tmp *= pow2;
659           MPFR_SADD_OVERFLOW (tmp, tmp, pstr->exp_bin,
660                               mpfr_exp_t, mpfr_uexp_t,
661                               MPFR_EXP_MIN, MPFR_EXP_MAX,
662                               goto overflow, goto underflow);
663           MPFR_SADD_OVERFLOW (exp, exp, tmp,
664                               mpfr_exp_t, mpfr_uexp_t,
665                               MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
666                               goto overflow, goto underflow);
667           result = y;
668           err = 0;
669         }
670       /* case non-power-of-two-base, and pstr->exp_base > pstr_size */
671       else if (pstr->exp_base > (mpfr_exp_t) pstr_size)
672         {
673           mp_limb_t *z;
674           mpfr_exp_t exp_z;
675 
676           MPFR_LOG_MSG (("case 2 (exp_base > pstr_size)\n", 0));
677 
678           result = MPFR_TMP_LIMBS_ALLOC (2 * ysize + 1);
679 
680           /* z = base^(exp_base-sptr_size) using space allocated at y-ysize */
681           z = y0;
682           /* NOTE: exp_base-pstr_size can't overflow since pstr_size > 0 */
683           err = mpfr_mpn_exp (z, &exp_z, pstr->base,
684                               pstr->exp_base - pstr_size, ysize);
685           if (err == -2)
686             goto overflow;
687           exact = exact && (err == -1);
688 
689           /* If exact is non zero, then z equals exactly the value of the
690              pstr_size most significant digits from pstr->mant, i.e., the
691              only difference can come from the neglected pstr->prec-pstr_size
692              least significant digits of pstr->mant.
693              If exact is zero, then z is rounded toward zero with respect
694              to that value. */
695 
696           /* multiply(y = 0.mant[0]...mant[pr-1])_base by base^(exp-g):
697              since both y and z are rounded toward zero, so is "result" */
698           mpn_mul_n (result, y, z, ysize);
699 
700           /* compute the error on the product */
701           if (err == -1)
702             err = 0;
703           err ++;
704 
705           /* compute the exponent of y */
706           /* exp += exp_z + ysize_bits with overflow checking
707              and check that we can add/subtract 2 to exp without overflow */
708           MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits,
709                               mpfr_exp_t, mpfr_uexp_t,
710                               MPFR_EXP_MIN, MPFR_EXP_MAX,
711                               goto overflow, goto underflow);
712           MPFR_SADD_OVERFLOW (exp, exp, exp_z,
713                               mpfr_exp_t, mpfr_uexp_t,
714                               MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
715                               goto overflow, goto underflow);
716 
717           /* normalize result */
718           if (MPFR_LIMB_MSB (result[2 * ysize - 1]) == 0)
719             {
720               mp_limb_t *r = result + ysize - 1;
721               mpn_lshift (r, r, ysize + 1, 1);
722               /* Overflow checking not needed */
723               exp --;
724             }
725 
726           /* if the low ysize limbs of {result, 2*ysize} are all zero,
727              then the result is still "exact" (if it was before) */
728           exact = exact && (mpn_scan1 (result, 0) >= ysize_bits);
729           result += ysize;
730         }
731       /* case exp_base < pstr_size */
732       else if (pstr->exp_base < (mpfr_exp_t) pstr_size)
733         {
734           mp_limb_t *z;
735           mpfr_exp_t exp_z;
736 
737           MPFR_LOG_MSG (("case 3 (exp_base < pstr_size)\n", 0));
738 
739           result = MPFR_TMP_LIMBS_ALLOC (3 * ysize + 1);
740 
741           /* y0 = y * K^ysize */
742           MPN_ZERO (y0, ysize);
743 
744           /* pstr_size - pstr->exp_base can overflow */
745           exp_z = pstr->exp_base == MPFR_EXP_MIN ?
746             MPFR_EXP_MAX : -pstr->exp_base;  /* avoid integer overflow */
747           MPFR_SADD_OVERFLOW (exp_z, (mpfr_exp_t) pstr_size, exp_z,
748                               mpfr_exp_t, mpfr_uexp_t,
749                               MPFR_EXP_MIN, MPFR_EXP_MAX,
750                               goto underflow, goto overflow);
751 
752           /* (z, exp_z) = base^(pstr_size - exp_base) */
753           z = result + 2*ysize + 1;
754           err = mpfr_mpn_exp (z, &exp_z, pstr->base, exp_z, ysize);
755 
756           /* Now {z, ysize} * 2^(exp_z_out - ysize_bits) is an approximation
757              to base^exp_z_in (denoted b^e below), rounded toward zero, with:
758              * if err = -1, the result is exact;
759              * if err = -2, an overflow occurred in the computation of exp_z;
760              * otherwise the error is bounded by 2^err ulps.
761              Thus the exact value of b^e is between z and z + 2^err, where
762              z is {z, ysize} properly scaled by a power of 2. Then the error
763              will be:
764                y/b^e - trunc(y/z) = eps1 + eps2
765              with
766                eps1 = y/b^e - y/z <= 0
767                eps2 = y/z - trunc(y/z) >= 0
768              thus the errors will (partly) compensate, giving a bound
769              max(|eps1|,|eps2|).
770              In addition, there is a 3rd error eps3 since y might be the
771              conversion of only a part of the character string, and/or y
772              might be truncated by the mpn_rshift call above:
773                eps3 = exact_y/b^e - y/b^e >= 0.
774           */
775           if (err == -2)
776             goto underflow; /* FIXME: Sure? */
777           else if (err == -1)
778             err = 0; /* see the note below */
779           else
780             exact = 0;
781 
782           /* exp -= exp_z + ysize_bits with overflow checking
783              and check that we can add/subtract 2 to exp without overflow */
784           MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits,
785                               mpfr_exp_t, mpfr_uexp_t,
786                               MPFR_EXP_MIN, MPFR_EXP_MAX,
787                               goto underflow, goto overflow);
788           MPFR_SADD_OVERFLOW (exp, exp, -exp_z,
789                               mpfr_exp_t, mpfr_uexp_t,
790                               MPFR_EXP_MIN+2, MPFR_EXP_MAX-2,
791                               goto overflow, goto underflow);
792 
793           /* Compute the integer division y/z rounded toward zero.
794              The quotient will be put at result + ysize (size: ysize + 1),
795              and the remainder at result (size: ysize).
796              Both the dividend {y, 2*ysize} and the divisor {z, ysize} are
797              normalized, i.e., the most significant bit of their most
798              significant limb is 1. */
799           MPFR_ASSERTD (MPFR_LIMB_MSB (y0[2 * ysize - 1]) != 0);
800           MPFR_ASSERTD (MPFR_LIMB_MSB (z[ysize - 1]) != 0);
801           mpn_tdiv_qr (result + ysize, result, (mp_size_t) 0, y0,
802                        2 * ysize, z, ysize);
803 
804           /* The truncation error of the mpn_tdiv_qr call (eps2 above) is at
805              most 1 ulp. Idem for the error eps3, which has the same sign,
806              thus eps2 + eps3 <= 2 ulps.
807              FIXME: For eps3, this is not obvious and should be explained.
808              For the error eps1 coming from the approximation to b^e,
809              we have (still up to a power-of-2 normalization):
810              y/z - y/b^e = y * (b^e-z) / (z * b^e) <= y * 2^err / (z * b^e).
811              We have to convert that error in terms of ulp(trunc(y/z)).
812              We first have ulp(trunc(y/z)) = ulp(y/z).
813 
814              FIXME: There must be some discussion about the exponents,
815                     because up to a power of 2, 1/2 <= |y/z| < 1 and
816                     1 <= |y/z| < 2 are equivalent and give no information.
817                     Moreover 1/2 <= b^e < 1 has not been explained and may
818                     hide mistakes since one may have 1/2 <= z < 1 < b^e.
819 
820              Since both y and z are normalized, the quotient
821              {result+ysize, ysize+1} has exactly ysize limbs, plus maybe one
822              bit (this corresponds to the MPFR_ASSERTD below):
823              * if the quotient has exactly ysize limbs, then 1/2 <= |y/z| < 1
824                (up to a power of 2) and since 1/2 <= b^e < 1, the error is at
825                most 2^(err+1) ulps;
826              * if the quotient has one extra bit, then 1 <= |y/z| < 2
827                (up to a power of 2) and since 1/2 <= b^e < 1, the error is at
828                most 2^(err+2) ulps; but since we will shift the result right
829                below by one bit, the final error will be at most 2^(err+1) ulps
830                too.
831 
832              Thus the error is:
833              * at most 2^(err+1) ulps for eps1
834              * at most 2 ulps for eps2 + eps3, which is of opposite sign
835              and we can bound the error by 2^(err+1) ulps in all cases.
836 
837              Note: If eps1 was 0, the error would be bounded by 2 ulps,
838              thus replacing err = -1 by err = 0 above was the right thing
839              to do, since 2^(0+1) = 2.
840           */
841           MPFR_ASSERTD (result[2 * ysize] <= 1);
842 
843           err += 1; /* see above for the explanation of the +1 term */
844 
845           /* if the remainder of the division is zero, then the result is
846              still "exact" if it was before */
847           exact = exact && (mpn_popcount (result, ysize) == 0);
848 
849           /* normalize result */
850           if (result[2 * ysize] == MPFR_LIMB_ONE)
851             {
852               mp_limb_t *r = result + ysize;
853 
854               exact = exact && ((*r & MPFR_LIMB_ONE) == 0);
855               mpn_rshift (r, r, ysize + 1, 1);
856               /* Overflow Checking not needed */
857               exp ++;
858             }
859           result += ysize;
860         }
861       /* case exp_base = pstr_size: no multiplication or division needed */
862       else
863         {
864           MPFR_LOG_MSG (("case 4 (exp_base = pstr_size)\n", 0));
865 
866           /* base^(exp-pr) = 1             nothing to compute */
867           result = y;
868           err = 0;
869         }
870 
871       MPFR_LOG_MSG (("exact = %d, err = %d, precx = %Pd\n",
872                      exact, err, precx));
873 
874       /* at this point, result is an approximation rounded toward zero
875          of the pstr_size most significant digits of pstr->mant, with
876          equality in case exact is non-zero. */
877 
878       /* test if rounding is possible, and if so exit the loop.
879          Note: we also need to be able to determine the correct ternary value,
880          thus we use the precx + (rnd == MPFR_RNDN) trick.
881          For example if result = xxx...xxx111...111 and rnd = RNDN,
882          then we know the correct rounding is xxx...xx(x+1), but we cannot know
883          the correct ternary value. */
884       if (exact || mpfr_round_p (result, ysize, ysize_bits - err - 1,
885                                  precx + (rnd == MPFR_RNDN)))
886         break;
887 
888       /* update the prec for next loop */
889       MPFR_ZIV_NEXT (loop, prec);
890     } /* loop */
891   MPFR_ZIV_FREE (loop);
892 
893   /* round y */
894   if (mpfr_round_raw (MPFR_MANT (x), result, ysize_bits,
895                       pstr->negative, precx, rnd, &res))
896     {
897       /* overflow when rounding y */
898       MPFR_MANT (x)[MPFR_LIMB_SIZE (x) - 1] = MPFR_LIMB_HIGHBIT;
899       /* Overflow Checking not needed */
900       exp ++;
901     }
902 
903   /* Note: if exact <> 0, then the approximation {result, ysize} is exact,
904      thus no double-rounding can occur:
905      (a) either the ternary value res is non-zero, and it is the correct
906          ternary value that we should return
907      (b) or the ternary value res is zero, and we should return 0. */
908 
909   /* Set sign of x before exp since check_range needs a valid sign */
910   (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x);
911 
912   /* DO NOT USE MPFR_SET_EXP. The exp may be out of range! */
913   MPFR_SADD_OVERFLOW (exp, exp, ysize_bits,
914                       mpfr_exp_t, mpfr_uexp_t,
915                       MPFR_EXP_MIN, MPFR_EXP_MAX,
916                       goto overflow, goto underflow);
917   MPFR_EXP (x) = exp;
918   res = mpfr_check_range (x, res, rnd);
919   goto end;
920 
921  underflow:
922   /* This is called when there is a huge overflow
923      (Real expo < MPFR_EXP_MIN << __gmpfr_emin */
924   if (rnd == MPFR_RNDN)
925     rnd = MPFR_RNDZ;
926   res = mpfr_underflow (x, rnd, (pstr->negative) ? -1 : 1);
927   goto end;
928 
929  overflow:
930   res = mpfr_overflow (x, rnd, (pstr->negative) ? -1 : 1);
931 
932  end:
933   MPFR_TMP_FREE (marker);
934   return res;
935 }
936 
937 static void
free_parsed_string(struct parsed_string * pstr)938 free_parsed_string (struct parsed_string *pstr)
939 {
940   mpfr_free_func (pstr->mantissa, pstr->alloc);
941 }
942 
943 int
mpfr_strtofr(mpfr_ptr x,const char * string,char ** end,int base,mpfr_rnd_t rnd)944 mpfr_strtofr (mpfr_ptr x, const char *string, char **end, int base,
945               mpfr_rnd_t rnd)
946 {
947   int res;
948   struct parsed_string pstr;
949 
950   /* For base <= 36, parsing is case-insensitive. */
951   MPFR_ASSERTN (base == 0 || (base >= 2 && base <= 62));
952 
953   /* If an error occurred, it must return 0. */
954   MPFR_SET_ZERO (x);
955   MPFR_SET_POS (x);
956 
957   MPFR_STAT_STATIC_ASSERT (MPFR_MAX_BASE >= 62);
958   res = parse_string (x, &pstr, &string, base);
959   /* If res == 0, then it was exact (NAN or INF),
960      so it is also the ternary value */
961   if (MPFR_UNLIKELY (res == -1))  /* invalid data */
962     res = 0;  /* x is set to 0, which is exact, thus ternary value is 0 */
963   else if (res == 1)
964     {
965       res = parsed_string_to_mpfr (x, &pstr, rnd);
966       free_parsed_string (&pstr);
967     }
968   else if (res == 2)
969     res = mpfr_overflow (x, rnd, (pstr.negative) ? -1 : 1);
970   MPFR_ASSERTD (res != 3);
971 #if 0
972   else if (res == 3)
973     {
974       /* This is called when there is a huge overflow
975          (Real expo < MPFR_EXP_MIN << __gmpfr_emin */
976       if (rnd == MPFR_RNDN)
977         rnd = MPFR_RNDZ;
978       res = mpfr_underflow (x, rnd, (pstr.negative) ? -1 : 1);
979     }
980 #endif
981 
982   if (end != NULL)
983     *end = (char *) string;
984   return res;
985 }
986