1 /* mpfr_strtofr -- set a floating-point number from a string 2 3 Copyright 2004-2020 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #include <ctype.h> /* For isspace */ 24 25 #define MPFR_NEED_LONGLONG_H 26 #include "mpfr-impl.h" 27 28 #define MPFR_MAX_BASE 62 29 30 struct parsed_string { 31 int negative; /* non-zero iff the number is negative */ 32 int base; /* base of the string */ 33 unsigned char *mantissa; /* raw significand (without any point) */ 34 unsigned char *mant; /* stripped significand (without starting and 35 ending zeroes). This points inside the area 36 allocated for the mantissa field. */ 37 size_t prec; /* length of mant (zero for +/-0) */ 38 size_t alloc; /* allocation size of mantissa */ 39 mpfr_exp_t exp_base; /* number of digits before the point, + exponent 40 except in case of binary exponent (exp_bin) */ 41 mpfr_exp_t exp_bin; /* binary exponent of the pxxx format for 42 base = 2 or 16 */ 43 }; 44 45 /* This table has been generated by the following program. 46 For 2 <= b <= MPFR_MAX_BASE, 47 RedInvLog2Table[b-2][0] / RedInvLog2Table[b-2][1] 48 is an upper approximation to log(2)/log(b), no larger than 1. 49 Note: these numbers must fit on 16 bits, thus unsigned int is OK. 50 */ 51 static const unsigned int RedInvLog2Table[MPFR_MAX_BASE-1][2] = { 52 {1, 1}, 53 {53, 84}, 54 {1, 2}, 55 {4004, 9297}, 56 {53, 137}, 57 {2393, 6718}, 58 {1, 3}, 59 {665, 2108}, 60 {4004, 13301}, 61 {949, 3283}, 62 {53, 190}, 63 {5231, 19357}, 64 {2393, 9111}, 65 {247, 965}, 66 {1, 4}, 67 {4036, 16497}, 68 {665, 2773}, 69 {5187, 22034}, 70 {4004, 17305}, 71 {51, 224}, 72 {949, 4232}, 73 {3077, 13919}, 74 {53, 243}, 75 {73, 339}, 76 {5231, 24588}, 77 {665, 3162}, 78 {2393, 11504}, 79 {4943, 24013}, 80 {247, 1212}, 81 {3515, 17414}, 82 {1, 5}, 83 {4415, 22271}, 84 {4036, 20533}, 85 {263, 1349}, 86 {665, 3438}, 87 {1079, 5621}, 88 {5187, 27221}, 89 {2288, 12093}, 90 {4004, 21309}, 91 {179, 959}, 92 {51, 275}, 93 {495, 2686}, 94 {949, 5181}, 95 {3621, 19886}, 96 {3077, 16996}, 97 {229, 1272}, 98 {53, 296}, 99 {109, 612}, 100 {73, 412}, 101 {1505, 8537}, 102 {5231, 29819}, 103 {283, 1621}, 104 {665, 3827}, 105 {32, 185}, 106 {2393, 13897}, 107 {1879, 10960}, 108 {4943, 28956}, 109 {409, 2406}, 110 {247, 1459}, 111 {231, 1370}, 112 {3515, 20929} }; 113 #if 0 114 #define N 8 115 int main () 116 { 117 unsigned long tab[N]; 118 int i, n, base; 119 mpfr_t x, y; 120 mpq_t q1, q2; 121 int overflow = 0, base_overflow; 122 123 mpfr_init2 (x, 200); 124 mpfr_init2 (y, 200); 125 mpq_init (q1); 126 mpq_init (q2); 127 128 for (base = 2 ; base < 63 ; base ++) 129 { 130 mpfr_set_ui (x, base, MPFR_RNDN); 131 mpfr_log2 (x, x, MPFR_RNDN); 132 mpfr_ui_div (x, 1, x, MPFR_RNDN); 133 printf ("Base: %d x=%e ", base, mpfr_get_d1 (x)); 134 for (i = 0 ; i < N ; i++) 135 { 136 mpfr_floor (y, x); 137 tab[i] = mpfr_get_ui (y, MPFR_RNDN); 138 mpfr_sub (x, x, y, MPFR_RNDN); 139 mpfr_ui_div (x, 1, x, MPFR_RNDN); 140 } 141 for (i = N-1 ; i >= 0 ; i--) 142 if (tab[i] != 0) 143 break; 144 mpq_set_ui (q1, tab[i], 1); 145 for (i = i-1 ; i >= 0 ; i--) 146 { 147 mpq_inv (q1, q1); 148 mpq_set_ui (q2, tab[i], 1); 149 mpq_add (q1, q1, q2); 150 } 151 printf("Approx: ", base); 152 mpq_out_str (stdout, 10, q1); 153 printf (" = %e\n", mpq_get_d (q1) ); 154 fprintf (stderr, "{"); 155 mpz_out_str (stderr, 10, mpq_numref (q1)); 156 fprintf (stderr, "UL, "); 157 mpz_out_str (stderr, 10, mpq_denref (q1)); 158 fprintf (stderr, "UL},\n"); 159 if (mpz_cmp_ui (mpq_numref (q1), 1<<16-1) >= 0 160 || mpz_cmp_ui (mpq_denref (q1), 1<<16-1) >= 0) 161 overflow = 1, base_overflow = base; 162 } 163 164 mpq_clear (q2); 165 mpq_clear (q1); 166 mpfr_clear (y); 167 mpfr_clear (x); 168 if (overflow ) 169 printf ("OVERFLOW for base =%d!\n", base_overflow); 170 } 171 #endif 172 173 174 /* Compatible with any locale, but one still assumes that 'a', 'b', 'c', 175 ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like 176 in any ASCII-based character set). */ 177 static int 178 digit_value_in_base (int c, int base) 179 { 180 int digit; 181 182 MPFR_ASSERTD (base > 0 && base <= MPFR_MAX_BASE); 183 184 if (c >= '0' && c <= '9') 185 digit = c - '0'; 186 else if (c >= 'a' && c <= 'z') 187 digit = (base >= 37) ? c - 'a' + 36 : c - 'a' + 10; 188 else if (c >= 'A' && c <= 'Z') 189 digit = c - 'A' + 10; 190 else 191 return -1; 192 193 return MPFR_LIKELY (digit < base) ? digit : -1; 194 } 195 196 /* Compatible with any locale, but one still assumes that 'a', 'b', 'c', 197 ..., 'z', and 'A', 'B', 'C', ..., 'Z' are consecutive values (like 198 in any ASCII-based character set). */ 199 /* TODO: support EBCDIC. */ 200 static int 201 fast_casecmp (const char *s1, const char *s2) 202 { 203 unsigned char c1, c2; 204 205 do 206 { 207 c2 = *(const unsigned char *) s2++; 208 if (c2 == '\0') 209 return 0; 210 c1 = *(const unsigned char *) s1++; 211 if (c1 >= 'A' && c1 <= 'Z') 212 c1 = c1 - 'A' + 'a'; 213 } 214 while (c1 == c2); 215 return 1; 216 } 217 218 /* Parse a string and fill pstr. 219 Return the advanced ptr too. 220 It returns: 221 -1 if invalid string, 222 0 if special string (like nan), 223 1 if the string is OK. 224 2 if overflows 225 So it doesn't return the ternary value 226 BUT if it returns 0 (NAN or INF), the ternary value is also '0' 227 (ie NAN and INF are exact) */ 228 static int 229 parse_string (mpfr_t x, struct parsed_string *pstr, 230 const char **string, int base) 231 { 232 const char *str = *string; 233 unsigned char *mant; 234 int point; 235 int res = -1; /* Invalid input return value */ 236 const char *prefix_str; 237 int decimal_point; 238 239 decimal_point = (unsigned char) MPFR_DECIMAL_POINT; 240 241 /* Init variable */ 242 pstr->mantissa = NULL; 243 244 /* Optional leading whitespace */ 245 while (isspace((unsigned char) *str)) str++; 246 247 /* An optional sign `+' or `-' */ 248 pstr->negative = (*str == '-'); 249 if (*str == '-' || *str == '+') 250 str++; 251 252 /* Can be case-insensitive NAN */ 253 if (fast_casecmp (str, "@nan@") == 0) 254 { 255 str += 5; 256 goto set_nan; 257 } 258 if (base <= 16 && fast_casecmp (str, "nan") == 0) 259 { 260 str += 3; 261 set_nan: 262 /* Check for "(dummychars)" */ 263 if (*str == '(') 264 { 265 const char *s; 266 for (s = str+1 ; *s != ')' ; s++) 267 if (!(*s >= 'A' && *s <= 'Z') 268 && !(*s >= 'a' && *s <= 'z') 269 && !(*s >= '0' && *s <= '9') 270 && *s != '_') 271 break; 272 if (*s == ')') 273 str = s+1; 274 } 275 *string = str; 276 MPFR_SET_NAN(x); 277 /* MPFR_RET_NAN not used as the return value isn't a ternary value */ 278 __gmpfr_flags |= MPFR_FLAGS_NAN; 279 return 0; 280 } 281 282 /* Can be case-insensitive INF */ 283 if (fast_casecmp (str, "@inf@") == 0) 284 { 285 str += 5; 286 goto set_inf; 287 } 288 if (base <= 16 && fast_casecmp (str, "infinity") == 0) 289 { 290 str += 8; 291 goto set_inf; 292 } 293 if (base <= 16 && fast_casecmp (str, "inf") == 0) 294 { 295 str += 3; 296 set_inf: 297 *string = str; 298 MPFR_SET_INF (x); 299 (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x); 300 return 0; 301 } 302 303 /* If base=0 or 16, it may include '0x' prefix */ 304 prefix_str = NULL; 305 if ((base == 0 || base == 16) && str[0]=='0' 306 && (str[1]=='x' || str[1] == 'X')) 307 { 308 prefix_str = str; 309 base = 16; 310 str += 2; 311 } 312 /* If base=0 or 2, it may include '0b' prefix */ 313 if ((base == 0 || base == 2) && str[0]=='0' 314 && (str[1]=='b' || str[1] == 'B')) 315 { 316 prefix_str = str; 317 base = 2; 318 str += 2; 319 } 320 /* Else if base=0, we assume decimal base */ 321 if (base == 0) 322 base = 10; 323 pstr->base = base; 324 325 /* Alloc mantissa */ 326 pstr->alloc = (size_t) strlen (str) + 1; 327 pstr->mantissa = (unsigned char*) mpfr_allocate_func (pstr->alloc); 328 329 /* Read mantissa digits */ 330 parse_begin: 331 mant = pstr->mantissa; 332 point = 0; 333 pstr->exp_base = 0; 334 pstr->exp_bin = 0; 335 336 for (;;) /* Loop until an invalid character is read */ 337 { 338 int c = (unsigned char) *str++; 339 /* The cast to unsigned char is needed because of digit_value_in_base; 340 decimal_point uses this convention too. */ 341 if (c == '.' || c == decimal_point) 342 { 343 if (MPFR_UNLIKELY(point)) /* Second '.': stop parsing */ 344 break; 345 point = 1; 346 continue; 347 } 348 c = digit_value_in_base (c, base); 349 if (c == -1) 350 break; 351 MPFR_ASSERTN (c >= 0); /* c is representable in an unsigned char */ 352 *mant++ = (unsigned char) c; 353 if (!point) 354 pstr->exp_base ++; 355 } 356 str--; /* The last read character was invalid */ 357 358 /* Update the # of char in the mantissa */ 359 pstr->prec = mant - pstr->mantissa; 360 /* Check if there are no characters in the mantissa (Invalid argument) */ 361 if (pstr->prec == 0) 362 { 363 /* Check if there was a prefix (in such a case, we have to read 364 again the mantissa without skipping the prefix) 365 The allocated mantissa is still big enough since we will 366 read only 0, and we alloc one more char than needed. 367 FIXME: Not really friendly. Maybe cleaner code? */ 368 if (prefix_str != NULL) 369 { 370 str = prefix_str; 371 prefix_str = NULL; 372 goto parse_begin; 373 } 374 goto end; 375 } 376 377 /* Valid entry */ 378 res = 1; 379 MPFR_ASSERTD (pstr->exp_base >= 0); 380 381 /* FIXME: In the code below (both cases), if the exponent from the 382 string is large, it will be replaced by MPFR_EXP_MIN or MPFR_EXP_MAX, 383 i.e. it will have a different value. This may not change the result 384 in most cases, but there is no guarantee on very long strings when 385 mpfr_exp_t is a 32-bit type, as the exponent could be brought back 386 to the current exponent range. */ 387 388 /* an optional exponent (e or E, p or P, @) */ 389 if ( (*str == '@' || (base <= 10 && (*str == 'e' || *str == 'E'))) 390 && (!isspace((unsigned char) str[1])) ) 391 { 392 char *endptr; 393 /* the exponent digits are kept in ASCII */ 394 mpfr_exp_t sum; 395 long read_exp = strtol (str + 1, &endptr, 10); 396 if (endptr != str+1) 397 str = endptr; 398 sum = 399 read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) : 400 read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) : 401 (mpfr_exp_t) read_exp; 402 MPFR_SADD_OVERFLOW (sum, sum, pstr->exp_base, 403 mpfr_exp_t, mpfr_uexp_t, 404 MPFR_EXP_MIN, MPFR_EXP_MAX, 405 res = 2, res = 3); 406 /* Since exp_base was positive, read_exp + exp_base can't 407 do a negative overflow. */ 408 MPFR_ASSERTD (res != 3); 409 pstr->exp_base = sum; 410 } 411 else if ((base == 2 || base == 16) 412 && (*str == 'p' || *str == 'P') 413 && (!isspace((unsigned char) str[1]))) 414 { 415 char *endptr; 416 long read_exp = strtol (str + 1, &endptr, 10); 417 if (endptr != str+1) 418 str = endptr; 419 pstr->exp_bin = 420 read_exp < MPFR_EXP_MIN ? (str = endptr, MPFR_EXP_MIN) : 421 read_exp > MPFR_EXP_MAX ? (str = endptr, MPFR_EXP_MAX) : 422 (mpfr_exp_t) read_exp; 423 } 424 425 /* Remove 0's at the beginning and end of mantissa[0..prec-1] */ 426 mant = pstr->mantissa; 427 for ( ; (pstr->prec > 0) && (*mant == 0) ; mant++, pstr->prec--) 428 pstr->exp_base--; 429 for ( ; (pstr->prec > 0) && (mant[pstr->prec - 1] == 0); pstr->prec--); 430 pstr->mant = mant; 431 432 /* Check if x = 0 */ 433 if (pstr->prec == 0) 434 { 435 MPFR_SET_ZERO (x); 436 if (pstr->negative) 437 MPFR_SET_NEG(x); 438 else 439 MPFR_SET_POS(x); 440 res = 0; 441 } 442 443 *string = str; 444 end: 445 if (pstr->mantissa != NULL && res != 1) 446 mpfr_free_func (pstr->mantissa, pstr->alloc); 447 return res; 448 } 449 450 /* Transform a parsed string to a mpfr_t according to the rounding mode 451 and the precision of x. 452 Returns the ternary value. */ 453 static int 454 parsed_string_to_mpfr (mpfr_t x, struct parsed_string *pstr, mpfr_rnd_t rnd) 455 { 456 mpfr_prec_t precx, prec, ysize_bits, pstr_size; 457 mpfr_exp_t exp; 458 mp_limb_t *result; 459 int count, exact; 460 mp_size_t ysize, real_ysize, diff_ysize; 461 int res, err; 462 const int extra_limbs = GMP_NUMB_BITS >= 12 ? 1 : 2; /* see below */ 463 MPFR_ZIV_DECL (loop); 464 MPFR_TMP_DECL (marker); 465 466 /* initialize the working precision */ 467 precx = MPFR_GET_PREC (x); 468 prec = precx + MPFR_INT_CEIL_LOG2 (precx); 469 470 /* Compute the value y of the leading characters as long as rounding is not 471 possible. 472 Note: We have some integer overflow checking using MPFR_EXP_MIN and 473 MPFR_EXP_MAX in this loop. Thanks to the large margin between these 474 extremal values of the mpfr_exp_t type and the valid minimum/maximum 475 exponents, such integer overflows would correspond to real underflow 476 or overflow on the result (possibly except in huge precisions, which 477 are disregarded here; anyway, in practice, such issues could occur 478 only with 32-bit precision and exponent types). Such checks could be 479 extended to real early underflow/overflow checking, in order to avoid 480 useless computations in such cases; in such a case, be careful that 481 the approximation errors need to be taken into account. */ 482 MPFR_TMP_MARK(marker); 483 MPFR_ZIV_INIT (loop, prec); 484 for (;;) 485 { 486 mp_limb_t *y0, *y; 487 488 /* y will be regarded as a number with precision prec. */ 489 ysize = MPFR_PREC2LIMBS (prec); 490 /* prec bits corresponds to ysize limbs */ 491 ysize_bits = (mpfr_prec_t) ysize * GMP_NUMB_BITS; 492 MPFR_ASSERTD (ysize_bits >= prec); 493 /* and to ysize_bits >= prec > precx bits. */ 494 /* We need to allocate one more limb as specified by mpn_set_str 495 (a limb may be written in rp[rn]). Note that the manual of GMP 496 up to 5.1.3 was incorrect on this point. 497 See the following discussion: 498 https://gmplib.org/list-archives/gmp-bugs/2013-December/003267.html */ 499 y0 = MPFR_TMP_LIMBS_ALLOC (2 * ysize + extra_limbs + 1); 500 y = y0 + ysize; /* y has (ysize + extra_limbs + 1) allocated limbs */ 501 502 /* pstr_size is the number of bytes we want to read from pstr->mant 503 to fill at least ysize full limbs with mpn_set_str. 504 We must have base^(pstr_size-1) >= (2^(GMP_NUMB_BITS))^ysize 505 (in the worst case, the first digit is one and all others are zero). 506 i.e., pstr_size >= 1 + ysize*GMP_NUMB_BITS/log2(base) 507 Since ysize ~ prec/GMP_NUMB_BITS and prec < Umax/2 => 508 ysize*GMP_NUMB_BITS can not overflow. 509 We compute pstr_size = 1 + ceil(ysize_bits * Num / Den) 510 where 1/log2(base) <= Num/Den <= 1 511 It is not exactly ceil(1/log2(base)) but could be one more (base 2). 512 Quite ugly since it tries to avoid overflow: 513 let Num = RedInvLog2Table[pstr->base-2][0] 514 and Den = RedInvLog2Table[pstr->base-2][1], 515 and ysize_bits = a*Den+b, 516 then ysize_bits * Num/Den = a*Num + (b * Num)/Den, 517 thus ceil(ysize_bits * Num/Den) = a*Num + floor(b * Num + Den - 1)/Den 518 519 Note: denoting m = pstr_size and n = ysize_bits, assuming we have 520 m = 1 + ceil(n/log2(b)), i.e., b^(m-1) >= 2^n > b^(m-2), then 521 b^(m-1)/2^n < b, and since we consider m characters of the input, 522 the corresponding part is less than b^m < b^2*2^n. 523 This implies that if b^2 < 2^GMP_NUMB_BITS, which for b <= 62 holds 524 for GMP_NUMB_BITS >= 12, we have real_ysize <= ysize+1 below 525 (this also implies that for GMP_NUMB_BITS >= 13, the number of bits 526 of y[real_ysize-1] below is less than GMP_NUMB_BITS, thus 527 count < GMP_NUMB_BITS). 528 Warning: for GMP_NUMB_BITS=8, we can have real_ysize = ysize + 2! 529 Hence the allocation above for ysize + extra_limbs limbs. 530 */ 531 { 532 unsigned int Num = RedInvLog2Table[pstr->base-2][0]; 533 unsigned int Den = RedInvLog2Table[pstr->base-2][1]; 534 MPFR_ASSERTD (Num <= Den && Den <= 65535); /* thus no overflow */ 535 pstr_size = (ysize_bits / Den) * Num 536 + ((unsigned long) (ysize_bits % Den) * Num + Den - 1) / Den 537 + 1; 538 } 539 540 /* Since pstr_size corresponds to at least ysize_bits bits, 541 and ysize_bits >= prec, the weight of the neglected part of 542 pstr->mant (if any) is < ulp(y) < ulp(x). */ 543 544 /* If the number of wanted bytes is more than what is available 545 in pstr->mant, i.e. pstr->prec, reduce it to pstr->prec. */ 546 if (pstr_size > pstr->prec) 547 pstr_size = pstr->prec; 548 549 /* Convert str (potentially truncated to pstr_size) into binary. 550 Note that pstr->mant is big endian, thus no offset is needed. */ 551 real_ysize = mpn_set_str (y, pstr->mant, pstr_size, pstr->base); 552 553 /* See above for the explanation of the following assertion. */ 554 MPFR_ASSERTD (real_ysize <= ysize + extra_limbs); 555 556 /* The Boolean "exact" will attempt to track exactness of the result: 557 If it is true, then this means that the result is exact, allowing 558 termination, even though the rounding test may not succeed. 559 Conversely, if the result is exact, then "exact" will not 560 necessarily be true at the end of the Ziv loop, but we will need 561 to make sure that at some point, "exact" will be true in order to 562 guarantee termination. FIXME: check that. */ 563 /* First, consider the part of the input string that has been ignored. 564 Note that the trailing zeros have been removed in parse_string, so 565 that if something has been ignored, it must be non-zero. */ 566 exact = pstr_size == pstr->prec; 567 568 /* Normalize y and set the initial value of its exponent exp, which 569 is 0 when y is not shifted. 570 Since pstr->mant was normalized, mpn_set_str guarantees that 571 the most significant limb is non-zero. */ 572 MPFR_ASSERTD (y[real_ysize - 1] != 0); /* mpn_set_str guarantees this */ 573 count_leading_zeros (count, y[real_ysize - 1]); 574 diff_ysize = ysize - real_ysize; 575 MPFR_LOG_MSG (("diff_ysize = %ld\n", (long) diff_ysize)); 576 if (diff_ysize >= 0) 577 { 578 /* We have enough limbs to store {y, real_ysize} exactly 579 in {y, ysize}, so that we can do a left shift, without 580 losing any information ("exact" will not change). */ 581 if (count != 0) 582 mpn_lshift (y + diff_ysize, y, real_ysize, count); 583 if (diff_ysize > 0) 584 { 585 if (count == 0) 586 mpn_copyd (y + diff_ysize, y, real_ysize); 587 MPN_ZERO (y, diff_ysize); 588 } 589 /* exp = negation of the total shift count, avoiding overflows. */ 590 exp = - ((mpfr_exp_t) diff_ysize * GMP_NUMB_BITS + count); 591 } 592 else 593 { 594 /* Shift {y, real_ysize} for (GMP_NUMB_BITS - count) bits to the 595 right, and put the ysize most significant limbs into {y, ysize}. 596 We have either real_ysize = ysize + 1 or real_ysize = ysize + 2 597 (only possible with extra_limbs == 2). */ 598 MPFR_ASSERTD (diff_ysize == -1 || 599 (extra_limbs == 2 && diff_ysize == -2)); 600 if (count != 0) 601 { 602 /* Before doing the shift, consider the limb that will entirely 603 be lost if real_ysize = ysize + 2. */ 604 exact = exact && (diff_ysize == -1 || y[0] == MPFR_LIMB_ZERO); 605 /* mpn_rshift allows overlap, provided destination <= source */ 606 /* FIXME: The bits lost due to mpn_rshift are not taken 607 into account in the error analysis below! */ 608 if (mpn_rshift (y, y - (diff_ysize + 1), real_ysize, 609 GMP_NUMB_BITS - count) != MPFR_LIMB_ZERO) 610 exact = 0; /* some non-zero bits have been shifted out */ 611 } 612 else 613 { 614 /* the case real_ysize = ysize + 2 with count = 0 cannot happen 615 even with GMP_NUMB_BITS = 8 since 62^2 < 256^2/2 */ 616 MPFR_ASSERTD (diff_ysize == -1); 617 exact = exact && y[0] == MPFR_LIMB_ZERO; 618 /* copy {y+real_ysize-ysize, ysize} to {y, ysize} */ 619 mpn_copyi (y, y + 1, real_ysize - 1); 620 } 621 /* exp = shift count */ 622 /* TODO: add some explanations about what exp means exactly. */ 623 exp = GMP_NUMB_BITS * (- diff_ysize) - count; 624 } 625 626 /* compute base^(exp_base - pstr_size) on n limbs */ 627 if (IS_POW2 (pstr->base)) 628 { 629 /* Base: 2, 4, 8, 16, 32 */ 630 int pow2; 631 mpfr_exp_t tmp; 632 633 MPFR_LOG_MSG (("case 1 (base = power of 2)\n", 0)); 634 635 count_leading_zeros (pow2, (mp_limb_t) pstr->base); 636 pow2 = GMP_NUMB_BITS - pow2 - 1; /* base = 2^pow2 */ 637 MPFR_ASSERTD (0 < pow2 && pow2 <= 5); 638 /* exp += pow2 * (pstr->exp_base - pstr_size) + pstr->exp_bin 639 with overflow checking 640 and check that we can add/subtract 2 to exp without overflow */ 641 MPFR_SADD_OVERFLOW (tmp, pstr->exp_base, -(mpfr_exp_t) pstr_size, 642 mpfr_exp_t, mpfr_uexp_t, 643 MPFR_EXP_MIN, MPFR_EXP_MAX, 644 goto overflow, goto underflow); 645 /* On some FreeBsd/Alpha, LONG_MIN/1 produced an exception 646 so we used to check for this before doing the division. 647 Since this bug is closed now (Nov 26, 2009), we remove 648 that check (http://www.freebsd.org/cgi/query-pr.cgi?pr=72024) */ 649 if (tmp > 0 && MPFR_EXP_MAX / pow2 <= tmp) 650 goto overflow; 651 else if (tmp < 0 && MPFR_EXP_MIN / pow2 >= tmp) 652 goto underflow; 653 tmp *= pow2; 654 MPFR_SADD_OVERFLOW (tmp, tmp, pstr->exp_bin, 655 mpfr_exp_t, mpfr_uexp_t, 656 MPFR_EXP_MIN, MPFR_EXP_MAX, 657 goto overflow, goto underflow); 658 MPFR_SADD_OVERFLOW (exp, exp, tmp, 659 mpfr_exp_t, mpfr_uexp_t, 660 MPFR_EXP_MIN+2, MPFR_EXP_MAX-2, 661 goto overflow, goto underflow); 662 result = y; 663 err = 0; 664 } 665 /* case non-power-of-two-base, and pstr->exp_base > pstr_size */ 666 else if (pstr->exp_base > (mpfr_exp_t) pstr_size) 667 { 668 mp_limb_t *z; 669 mpfr_exp_t exp_z; 670 671 MPFR_LOG_MSG (("case 2 (exp_base > pstr_size)\n", 0)); 672 673 result = MPFR_TMP_LIMBS_ALLOC (2 * ysize + 1); 674 675 /* z = base^(exp_base-sptr_size) using space allocated at y-ysize */ 676 z = y0; 677 /* NOTE: exp_base-pstr_size can't overflow since pstr_size > 0 */ 678 err = mpfr_mpn_exp (z, &exp_z, pstr->base, 679 pstr->exp_base - pstr_size, ysize); 680 if (err == -2) 681 goto overflow; 682 exact = exact && (err == -1); 683 684 /* If exact is non zero, then z equals exactly the value of the 685 pstr_size most significant digits from pstr->mant, i.e., the 686 only difference can come from the neglected pstr->prec-pstr_size 687 least significant digits of pstr->mant. 688 If exact is zero, then z is rounded toward zero with respect 689 to that value. */ 690 691 /* multiply(y = 0.mant[0]...mant[pr-1])_base by base^(exp-g): 692 since both y and z are rounded toward zero, so is "result" */ 693 mpn_mul_n (result, y, z, ysize); 694 695 /* compute the error on the product */ 696 if (err == -1) 697 err = 0; 698 err ++; 699 700 /* compute the exponent of y */ 701 /* exp += exp_z + ysize_bits with overflow checking 702 and check that we can add/subtract 2 to exp without overflow */ 703 MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits, 704 mpfr_exp_t, mpfr_uexp_t, 705 MPFR_EXP_MIN, MPFR_EXP_MAX, 706 goto overflow, goto underflow); 707 MPFR_SADD_OVERFLOW (exp, exp, exp_z, 708 mpfr_exp_t, mpfr_uexp_t, 709 MPFR_EXP_MIN+2, MPFR_EXP_MAX-2, 710 goto overflow, goto underflow); 711 712 /* normalize result */ 713 if (MPFR_LIMB_MSB (result[2 * ysize - 1]) == 0) 714 { 715 mp_limb_t *r = result + ysize - 1; 716 mpn_lshift (r, r, ysize + 1, 1); 717 /* Overflow checking not needed */ 718 exp --; 719 } 720 721 /* if the low ysize limbs of {result, 2*ysize} are all zero, 722 then the result is still "exact" (if it was before) */ 723 exact = exact && (mpn_scan1 (result, 0) >= ysize_bits); 724 result += ysize; 725 } 726 /* case exp_base < pstr_size */ 727 else if (pstr->exp_base < (mpfr_exp_t) pstr_size) 728 { 729 mp_limb_t *z; 730 mpfr_exp_t exp_z; 731 732 MPFR_LOG_MSG (("case 3 (exp_base < pstr_size)\n", 0)); 733 734 result = MPFR_TMP_LIMBS_ALLOC (3 * ysize + 1); 735 736 /* y0 = y * K^ysize */ 737 MPN_ZERO (y0, ysize); 738 739 /* pstr_size - pstr->exp_base can overflow */ 740 MPFR_SADD_OVERFLOW (exp_z, (mpfr_exp_t) pstr_size, -pstr->exp_base, 741 mpfr_exp_t, mpfr_uexp_t, 742 MPFR_EXP_MIN, MPFR_EXP_MAX, 743 goto underflow, goto overflow); 744 745 /* (z, exp_z) = base^(pstr_size - exp_base) */ 746 z = result + 2*ysize + 1; 747 err = mpfr_mpn_exp (z, &exp_z, pstr->base, exp_z, ysize); 748 749 /* Now {z, ysize} * 2^(exp_z_out - ysize_bits) is an approximation 750 to base^exp_z_in (denoted b^e below), rounded toward zero, with: 751 * if err = -1, the result is exact; 752 * if err = -2, an overflow occurred in the computation of exp_z; 753 * otherwise the error is bounded by 2^err ulps. 754 Thus the exact value of b^e is between z and z + 2^err, where 755 z is {z, ysize} properly scaled by a power of 2. Then the error 756 will be: 757 y/b^e - trunc(y/z) = eps1 + eps2 758 with 759 eps1 = y/b^e - y/z <= 0 760 eps2 = y/z - trunc(y/z) >= 0 761 thus the errors will (partly) compensate, giving a bound 762 max(|eps1|,|eps2|). 763 In addition, there is a 3rd error eps3 since y might be the 764 conversion of only a part of the character string, and/or y 765 might be truncated by the mpn_rshift call above: 766 eps3 = exact_y/b^e - y/b^e >= 0. 767 */ 768 if (err == -2) 769 goto underflow; /* FIXME: Sure? */ 770 else if (err == -1) 771 err = 0; /* see the note below */ 772 else 773 exact = 0; 774 775 /* exp -= exp_z + ysize_bits with overflow checking 776 and check that we can add/subtract 2 to exp without overflow */ 777 MPFR_SADD_OVERFLOW (exp_z, exp_z, ysize_bits, 778 mpfr_exp_t, mpfr_uexp_t, 779 MPFR_EXP_MIN, MPFR_EXP_MAX, 780 goto underflow, goto overflow); 781 MPFR_SADD_OVERFLOW (exp, exp, -exp_z, 782 mpfr_exp_t, mpfr_uexp_t, 783 MPFR_EXP_MIN+2, MPFR_EXP_MAX-2, 784 goto overflow, goto underflow); 785 786 /* Compute the integer division y/z rounded toward zero. 787 The quotient will be put at result + ysize (size: ysize + 1), 788 and the remainder at result (size: ysize). 789 Both the dividend {y, 2*ysize} and the divisor {z, ysize} are 790 normalized, i.e., the most significant bit of their most 791 significant limb is 1. */ 792 MPFR_ASSERTD (MPFR_LIMB_MSB (y0[2 * ysize - 1]) != 0); 793 MPFR_ASSERTD (MPFR_LIMB_MSB (z[ysize - 1]) != 0); 794 mpn_tdiv_qr (result + ysize, result, (mp_size_t) 0, y0, 795 2 * ysize, z, ysize); 796 797 /* The truncation error of the mpn_tdiv_qr call (eps2 above) is at 798 most 1 ulp. Idem for the error eps3, which has the same sign, 799 thus eps2 + eps3 <= 2 ulps. 800 FIXME: For eps3, this is not obvious and should be explained. 801 For the error eps1 coming from the approximation to b^e, 802 we have (still up to a power-of-2 normalization): 803 y/z - y/b^e = y * (b^e-z) / (z * b^e) <= y * 2^err / (z * b^e). 804 We have to convert that error in terms of ulp(trunc(y/z)). 805 We first have ulp(trunc(y/z)) = ulp(y/z). 806 807 FIXME: There must be some discussion about the exponents, 808 because up to a power of 2, 1/2 <= |y/z| < 1 and 809 1 <= |y/z| < 2 are equivalent and give no information. 810 Moreover 1/2 <= b^e < 1 has not been explained and may 811 hide mistakes since one may have 1/2 <= z < 1 < b^e. 812 813 Since both y and z are normalized, the quotient 814 {result+ysize, ysize+1} has exactly ysize limbs, plus maybe one 815 bit (this corresponds to the MPFR_ASSERTD below): 816 * if the quotient has exactly ysize limbs, then 1/2 <= |y/z| < 1 817 (up to a power of 2) and since 1/2 <= b^e < 1, the error is at 818 most 2^(err+1) ulps; 819 * if the quotient has one extra bit, then 1 <= |y/z| < 2 820 (up to a power of 2) and since 1/2 <= b^e < 1, the error is at 821 most 2^(err+2) ulps; but since we will shift the result right 822 below by one bit, the final error will be at most 2^(err+1) ulps 823 too. 824 825 Thus the error is: 826 * at most 2^(err+1) ulps for eps1 827 * at most 2 ulps for eps2 + eps3, which is of opposite sign 828 and we can bound the error by 2^(err+1) ulps in all cases. 829 830 Note: If eps1 was 0, the error would be bounded by 2 ulps, 831 thus replacing err = -1 by err = 0 above was the right thing 832 to do, since 2^(0+1) = 2. 833 */ 834 MPFR_ASSERTD (result[2 * ysize] <= 1); 835 836 err += 1; /* see above for the explanation of the +1 term */ 837 838 /* if the remainder of the division is zero, then the result is 839 still "exact" if it was before */ 840 exact = exact && (mpn_popcount (result, ysize) == 0); 841 842 /* normalize result */ 843 if (result[2 * ysize] == MPFR_LIMB_ONE) 844 { 845 mp_limb_t *r = result + ysize; 846 847 exact = exact && ((*r & MPFR_LIMB_ONE) == 0); 848 mpn_rshift (r, r, ysize + 1, 1); 849 /* Overflow Checking not needed */ 850 exp ++; 851 } 852 result += ysize; 853 } 854 /* case exp_base = pstr_size: no multiplication or division needed */ 855 else 856 { 857 MPFR_LOG_MSG (("case 4 (exp_base = pstr_size)\n", 0)); 858 859 /* base^(exp-pr) = 1 nothing to compute */ 860 result = y; 861 err = 0; 862 } 863 864 MPFR_LOG_MSG (("exact = %d, err = %d, precx = %Pu\n", 865 exact, err, precx)); 866 867 /* at this point, result is an approximation rounded toward zero 868 of the pstr_size most significant digits of pstr->mant, with 869 equality in case exact is non-zero. */ 870 871 /* test if rounding is possible, and if so exit the loop. 872 Note: we also need to be able to determine the correct ternary value, 873 thus we use the precx + (rnd == MPFR_RNDN) trick. 874 For example if result = xxx...xxx111...111 and rnd = RNDN, 875 then we know the correct rounding is xxx...xx(x+1), but we cannot know 876 the correct ternary value. */ 877 if (exact || mpfr_round_p (result, ysize, ysize_bits - err - 1, 878 precx + (rnd == MPFR_RNDN))) 879 break; 880 881 /* update the prec for next loop */ 882 MPFR_ZIV_NEXT (loop, prec); 883 } /* loop */ 884 MPFR_ZIV_FREE (loop); 885 886 /* round y */ 887 if (mpfr_round_raw (MPFR_MANT (x), result, ysize_bits, 888 pstr->negative, precx, rnd, &res)) 889 { 890 /* overflow when rounding y */ 891 MPFR_MANT (x)[MPFR_LIMB_SIZE (x) - 1] = MPFR_LIMB_HIGHBIT; 892 /* Overflow Checking not needed */ 893 exp ++; 894 } 895 896 /* Note: if exact <> 0, then the approximation {result, ysize} is exact, 897 thus no double-rounding can occur: 898 (a) either the ternary value res is non-zero, and it is the correct 899 ternary value that we should return 900 (b) or the ternary value res is zero, and we should return 0. */ 901 902 /* Set sign of x before exp since check_range needs a valid sign */ 903 (pstr->negative) ? MPFR_SET_NEG (x) : MPFR_SET_POS (x); 904 905 /* DO NOT USE MPFR_SET_EXP. The exp may be out of range! */ 906 MPFR_SADD_OVERFLOW (exp, exp, ysize_bits, 907 mpfr_exp_t, mpfr_uexp_t, 908 MPFR_EXP_MIN, MPFR_EXP_MAX, 909 goto overflow, goto underflow); 910 MPFR_EXP (x) = exp; 911 res = mpfr_check_range (x, res, rnd); 912 goto end; 913 914 underflow: 915 /* This is called when there is a huge overflow 916 (Real expo < MPFR_EXP_MIN << __gmpfr_emin */ 917 if (rnd == MPFR_RNDN) 918 rnd = MPFR_RNDZ; 919 res = mpfr_underflow (x, rnd, (pstr->negative) ? -1 : 1); 920 goto end; 921 922 overflow: 923 res = mpfr_overflow (x, rnd, (pstr->negative) ? -1 : 1); 924 925 end: 926 MPFR_TMP_FREE (marker); 927 return res; 928 } 929 930 static void 931 free_parsed_string (struct parsed_string *pstr) 932 { 933 mpfr_free_func (pstr->mantissa, pstr->alloc); 934 } 935 936 int 937 mpfr_strtofr (mpfr_t x, const char *string, char **end, int base, 938 mpfr_rnd_t rnd) 939 { 940 int res; 941 struct parsed_string pstr; 942 943 /* For base <= 36, parsing is case-insensitive. */ 944 MPFR_ASSERTN (base == 0 || (base >= 2 && base <= 62)); 945 946 /* If an error occurred, it must return 0. */ 947 MPFR_SET_ZERO (x); 948 MPFR_SET_POS (x); 949 950 MPFR_STAT_STATIC_ASSERT (MPFR_MAX_BASE >= 62); 951 res = parse_string (x, &pstr, &string, base); 952 /* If res == 0, then it was exact (NAN or INF), 953 so it is also the ternary value */ 954 if (MPFR_UNLIKELY (res == -1)) /* invalid data */ 955 res = 0; /* x is set to 0, which is exact, thus ternary value is 0 */ 956 else if (res == 1) 957 { 958 res = parsed_string_to_mpfr (x, &pstr, rnd); 959 free_parsed_string (&pstr); 960 } 961 else if (res == 2) 962 res = mpfr_overflow (x, rnd, (pstr.negative) ? -1 : 1); 963 MPFR_ASSERTD (res != 3); 964 #if 0 965 else if (res == 3) 966 { 967 /* This is called when there is a huge overflow 968 (Real expo < MPFR_EXP_MIN << __gmpfr_emin */ 969 if (rnd == MPFR_RNDN) 970 rnd = MPFR_RNDZ; 971 res = mpfr_underflow (x, rnd, (pstr.negative) ? -1 : 1); 972 } 973 #endif 974 975 if (end != NULL) 976 *end = (char *) string; 977 return res; 978 } 979