1 /* numeric.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "That only makes eleven (plus one mislaid) and not fourteen, 13 * unless wizards count differently to other people." --Beorn 14 * 15 * [p.115 of _The Hobbit_: "Queer Lodgings"] 16 */ 17 18 /* 19 =head1 Numeric functions 20 21 =cut 22 23 This file contains all the stuff needed by perl for manipulating numeric 24 values, including such things as replacements for the OS's atof() function 25 26 */ 27 28 #include "EXTERN.h" 29 #define PERL_IN_NUMERIC_C 30 #include "perl.h" 31 32 #ifdef Perl_strtod 33 34 PERL_STATIC_INLINE NV 35 S_strtod(pTHX_ const char * const s, char ** e) 36 { 37 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 38 NV result; 39 40 STORE_LC_NUMERIC_SET_TO_NEEDED(); 41 42 # ifdef USE_QUADMATH 43 44 result = strtoflt128(s, e); 45 46 # elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ 47 && defined(USE_LONG_DOUBLE) 48 # if defined(__MINGW64_VERSION_MAJOR) 49 /*********************************************** 50 We are unable to use strtold because of 51 https://sourceforge.net/p/mingw-w64/bugs/711/ 52 & 53 https://sourceforge.net/p/mingw-w64/bugs/725/ 54 55 but __mingw_strtold is fine. 56 ***********************************************/ 57 58 result = __mingw_strtold(s, e); 59 60 # else 61 62 result = strtold(s, e); 63 64 # endif 65 # elif defined(HAS_STRTOD) 66 67 result = strtod(s, e); 68 69 # endif 70 71 RESTORE_LC_NUMERIC(); 72 73 return result; 74 } 75 76 #endif /* #ifdef Perl_strtod */ 77 78 /* 79 80 =for apidoc my_strtod 81 82 This function is equivalent to the libc strtod() function, and is available 83 even on platforms that lack plain strtod(). Its return value is the best 84 available precision depending on platform capabilities and F<Configure> 85 options. 86 87 It properly handles the locale radix character, meaning it expects a dot except 88 when called from within the scope of S<C<use locale>>, in which case the radix 89 character should be that specified by the current locale. 90 91 The synonym Strod() may be used instead. 92 93 =cut 94 95 */ 96 97 NV 98 my_strtod(const char * const s, char **e) 99 { 100 dTHX; 101 102 PERL_ARGS_ASSERT_MY_STRTOD; 103 104 #ifdef Perl_strtod 105 106 return S_strtod(aTHX_ s, e); 107 108 #else 109 110 { 111 NV result; 112 char ** end_ptr = NULL; 113 114 *end_ptr = my_atof2(s, &result); 115 if (e) { 116 *e = *end_ptr; 117 } 118 119 if (! *end_ptr) { 120 result = 0.0; 121 } 122 123 return result; 124 } 125 126 #endif 127 128 } 129 130 131 U32 132 Perl_cast_ulong(NV f) 133 { 134 if (f < 0.0) 135 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f; 136 if (f < U32_MAX_P1) { 137 #if CASTFLAGS & 2 138 if (f < U32_MAX_P1_HALF) 139 return (U32) f; 140 f -= U32_MAX_P1_HALF; 141 return ((U32) f) | (1 + (U32_MAX >> 1)); 142 #else 143 return (U32) f; 144 #endif 145 } 146 return f > 0 ? U32_MAX : 0 /* NaN */; 147 } 148 149 I32 150 Perl_cast_i32(NV f) 151 { 152 if (f < I32_MAX_P1) 153 return f < I32_MIN ? I32_MIN : (I32) f; 154 if (f < U32_MAX_P1) { 155 #if CASTFLAGS & 2 156 if (f < U32_MAX_P1_HALF) 157 return (I32)(U32) f; 158 f -= U32_MAX_P1_HALF; 159 return (I32)(((U32) f) | (1 + (U32_MAX >> 1))); 160 #else 161 return (I32)(U32) f; 162 #endif 163 } 164 return f > 0 ? (I32)U32_MAX : 0 /* NaN */; 165 } 166 167 IV 168 Perl_cast_iv(NV f) 169 { 170 if (f < IV_MAX_P1) 171 return f < IV_MIN ? IV_MIN : (IV) f; 172 if (f < UV_MAX_P1) { 173 #if CASTFLAGS & 2 174 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */ 175 if (f < UV_MAX_P1_HALF) 176 return (IV)(UV) f; 177 f -= UV_MAX_P1_HALF; 178 return (IV)(((UV) f) | (1 + (UV_MAX >> 1))); 179 #else 180 return (IV)(UV) f; 181 #endif 182 } 183 return f > 0 ? (IV)UV_MAX : 0 /* NaN */; 184 } 185 186 UV 187 Perl_cast_uv(NV f) 188 { 189 if (f < 0.0) 190 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f; 191 if (f < UV_MAX_P1) { 192 #if CASTFLAGS & 2 193 if (f < UV_MAX_P1_HALF) 194 return (UV) f; 195 f -= UV_MAX_P1_HALF; 196 return ((UV) f) | (1 + (UV_MAX >> 1)); 197 #else 198 return (UV) f; 199 #endif 200 } 201 return f > 0 ? UV_MAX : 0 /* NaN */; 202 } 203 204 /* 205 =for apidoc grok_bin 206 207 converts a string representing a binary number to numeric form. 208 209 On entry C<start> and C<*len> give the string to scan, C<*flags> gives 210 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 211 The scan stops at the end of the string, or the first invalid character. 212 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 213 invalid character will also trigger a warning. 214 On return C<*len> is set to the length of the scanned string, 215 and C<*flags> gives output flags. 216 217 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 218 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin> 219 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 220 and writes the value to C<*result> (or the value is discarded if C<result> 221 is NULL). 222 223 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless 224 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If 225 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary 226 number may use C<"_"> characters to separate digits. 227 228 =cut 229 230 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 231 which suppresses any message for non-portable numbers that are still valid 232 on this platform. 233 */ 234 235 UV 236 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 237 { 238 const char *s = start; 239 STRLEN len = *len_p; 240 UV value = 0; 241 NV value_nv = 0; 242 243 const UV max_div_2 = UV_MAX / 2; 244 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 245 bool overflowed = FALSE; 246 char bit; 247 248 PERL_ARGS_ASSERT_GROK_BIN; 249 250 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 251 /* strip off leading b or 0b. 252 for compatibility silently suffer "b" and "0b" as valid binary 253 numbers. */ 254 if (len >= 1) { 255 if (isALPHA_FOLD_EQ(s[0], 'b')) { 256 s++; 257 len--; 258 } 259 else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) { 260 s+=2; 261 len-=2; 262 } 263 } 264 } 265 266 for (; len-- && (bit = *s); s++) { 267 if (bit == '0' || bit == '1') { 268 /* Write it in this wonky order with a goto to attempt to get the 269 compiler to make the common case integer-only loop pretty tight. 270 With gcc seems to be much straighter code than old scan_bin. */ 271 redo: 272 if (!overflowed) { 273 if (value <= max_div_2) { 274 value = (value << 1) | (bit - '0'); 275 continue; 276 } 277 /* Bah. We're just overflowed. */ 278 /* diag_listed_as: Integer overflow in %s number */ 279 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 280 "Integer overflow in binary number"); 281 overflowed = TRUE; 282 value_nv = (NV) value; 283 } 284 value_nv *= 2.0; 285 /* If an NV has not enough bits in its mantissa to 286 * represent a UV this summing of small low-order numbers 287 * is a waste of time (because the NV cannot preserve 288 * the low-order bits anyway): we could just remember when 289 * did we overflow and in the end just multiply value_nv by the 290 * right amount. */ 291 value_nv += (NV)(bit - '0'); 292 continue; 293 } 294 if (bit == '_' && len && allow_underscores && (bit = s[1]) 295 && (bit == '0' || bit == '1')) 296 { 297 --len; 298 ++s; 299 goto redo; 300 } 301 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 302 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 303 "Illegal binary digit '%c' ignored", *s); 304 break; 305 } 306 307 if ( ( overflowed && value_nv > 4294967295.0) 308 #if UVSIZE > 4 309 || (!overflowed && value > 0xffffffff 310 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 311 #endif 312 ) { 313 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 314 "Binary number > 0b11111111111111111111111111111111 non-portable"); 315 } 316 *len_p = s - start; 317 if (!overflowed) { 318 *flags = 0; 319 return value; 320 } 321 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 322 if (result) 323 *result = value_nv; 324 return UV_MAX; 325 } 326 327 /* 328 =for apidoc grok_hex 329 330 converts a string representing a hex number to numeric form. 331 332 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives 333 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 334 The scan stops at the end of the string, or the first invalid character. 335 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 336 invalid character will also trigger a warning. 337 On return C<*len> is set to the length of the scanned string, 338 and C<*flags> gives output flags. 339 340 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 341 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex> 342 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 343 and writes the value to C<*result> (or the value is discarded if C<result> 344 is C<NULL>). 345 346 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless 347 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If 348 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex 349 number may use C<"_"> characters to separate digits. 350 351 =cut 352 353 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 354 which suppresses any message for non-portable numbers, but which are valid 355 on this platform. 356 */ 357 358 UV 359 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 360 { 361 const char *s = start; 362 STRLEN len = *len_p; 363 UV value = 0; 364 NV value_nv = 0; 365 const UV max_div_16 = UV_MAX / 16; 366 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 367 bool overflowed = FALSE; 368 369 PERL_ARGS_ASSERT_GROK_HEX; 370 371 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { 372 /* strip off leading x or 0x. 373 for compatibility silently suffer "x" and "0x" as valid hex numbers. 374 */ 375 if (len >= 1) { 376 if (isALPHA_FOLD_EQ(s[0], 'x')) { 377 s++; 378 len--; 379 } 380 else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) { 381 s+=2; 382 len-=2; 383 } 384 } 385 } 386 387 for (; len-- && *s; s++) { 388 if (isXDIGIT(*s)) { 389 /* Write it in this wonky order with a goto to attempt to get the 390 compiler to make the common case integer-only loop pretty tight. 391 With gcc seems to be much straighter code than old scan_hex. */ 392 redo: 393 if (!overflowed) { 394 if (value <= max_div_16) { 395 value = (value << 4) | XDIGIT_VALUE(*s); 396 continue; 397 } 398 /* Bah. We're just overflowed. */ 399 /* diag_listed_as: Integer overflow in %s number */ 400 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 401 "Integer overflow in hexadecimal number"); 402 overflowed = TRUE; 403 value_nv = (NV) value; 404 } 405 value_nv *= 16.0; 406 /* If an NV has not enough bits in its mantissa to 407 * represent a UV this summing of small low-order numbers 408 * is a waste of time (because the NV cannot preserve 409 * the low-order bits anyway): we could just remember when 410 * did we overflow and in the end just multiply value_nv by the 411 * right amount of 16-tuples. */ 412 value_nv += (NV) XDIGIT_VALUE(*s); 413 continue; 414 } 415 if (*s == '_' && len && allow_underscores && s[1] 416 && isXDIGIT(s[1])) 417 { 418 --len; 419 ++s; 420 goto redo; 421 } 422 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 423 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 424 "Illegal hexadecimal digit '%c' ignored", *s); 425 break; 426 } 427 428 if ( ( overflowed && value_nv > 4294967295.0) 429 #if UVSIZE > 4 430 || (!overflowed && value > 0xffffffff 431 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 432 #endif 433 ) { 434 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 435 "Hexadecimal number > 0xffffffff non-portable"); 436 } 437 *len_p = s - start; 438 if (!overflowed) { 439 *flags = 0; 440 return value; 441 } 442 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 443 if (result) 444 *result = value_nv; 445 return UV_MAX; 446 } 447 448 /* 449 =for apidoc grok_oct 450 451 converts a string representing an octal number to numeric form. 452 453 On entry C<start> and C<*len> give the string to scan, C<*flags> gives 454 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. 455 The scan stops at the end of the string, or the first invalid character. 456 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an 457 8 or 9 will also trigger a warning. 458 On return C<*len> is set to the length of the scanned string, 459 and C<*flags> gives output flags. 460 461 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 462 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct> 463 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 464 and writes the value to C<*result> (or the value is discarded if C<result> 465 is C<NULL>). 466 467 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal 468 number may use C<"_"> characters to separate digits. 469 470 =cut 471 472 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 473 which suppresses any message for non-portable numbers, but which are valid 474 on this platform. 475 */ 476 477 UV 478 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 479 { 480 const char *s = start; 481 STRLEN len = *len_p; 482 UV value = 0; 483 NV value_nv = 0; 484 const UV max_div_8 = UV_MAX / 8; 485 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES); 486 bool overflowed = FALSE; 487 488 PERL_ARGS_ASSERT_GROK_OCT; 489 490 for (; len-- && *s; s++) { 491 if (isOCTAL(*s)) { 492 /* Write it in this wonky order with a goto to attempt to get the 493 compiler to make the common case integer-only loop pretty tight. 494 */ 495 redo: 496 if (!overflowed) { 497 if (value <= max_div_8) { 498 value = (value << 3) | OCTAL_VALUE(*s); 499 continue; 500 } 501 /* Bah. We're just overflowed. */ 502 /* diag_listed_as: Integer overflow in %s number */ 503 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), 504 "Integer overflow in octal number"); 505 overflowed = TRUE; 506 value_nv = (NV) value; 507 } 508 value_nv *= 8.0; 509 /* If an NV has not enough bits in its mantissa to 510 * represent a UV this summing of small low-order numbers 511 * is a waste of time (because the NV cannot preserve 512 * the low-order bits anyway): we could just remember when 513 * did we overflow and in the end just multiply value_nv by the 514 * right amount of 8-tuples. */ 515 value_nv += (NV) OCTAL_VALUE(*s); 516 continue; 517 } 518 if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) { 519 --len; 520 ++s; 521 goto redo; 522 } 523 /* Allow \octal to work the DWIM way (that is, stop scanning 524 * as soon as non-octal characters are seen, complain only if 525 * someone seems to want to use the digits eight and nine. Since we 526 * know it is not octal, then if isDIGIT, must be an 8 or 9). */ 527 if (isDIGIT(*s)) { 528 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) 529 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT), 530 "Illegal octal digit '%c' ignored", *s); 531 } 532 break; 533 } 534 535 if ( ( overflowed && value_nv > 4294967295.0) 536 #if UVSIZE > 4 537 || (!overflowed && value > 0xffffffff 538 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE)) 539 #endif 540 ) { 541 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 542 "Octal number > 037777777777 non-portable"); 543 } 544 *len_p = s - start; 545 if (!overflowed) { 546 *flags = 0; 547 return value; 548 } 549 *flags = PERL_SCAN_GREATER_THAN_UV_MAX; 550 if (result) 551 *result = value_nv; 552 return UV_MAX; 553 } 554 555 /* 556 =for apidoc scan_bin 557 558 For backwards compatibility. Use C<grok_bin> instead. 559 560 =for apidoc scan_hex 561 562 For backwards compatibility. Use C<grok_hex> instead. 563 564 =for apidoc scan_oct 565 566 For backwards compatibility. Use C<grok_oct> instead. 567 568 =cut 569 */ 570 571 NV 572 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 573 { 574 NV rnv; 575 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 576 const UV ruv = grok_bin (start, &len, &flags, &rnv); 577 578 PERL_ARGS_ASSERT_SCAN_BIN; 579 580 *retlen = len; 581 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 582 } 583 584 NV 585 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 586 { 587 NV rnv; 588 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 589 const UV ruv = grok_oct (start, &len, &flags, &rnv); 590 591 PERL_ARGS_ASSERT_SCAN_OCT; 592 593 *retlen = len; 594 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 595 } 596 597 NV 598 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 599 { 600 NV rnv; 601 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 602 const UV ruv = grok_hex (start, &len, &flags, &rnv); 603 604 PERL_ARGS_ASSERT_SCAN_HEX; 605 606 *retlen = len; 607 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 608 } 609 610 /* 611 =for apidoc grok_numeric_radix 612 613 Scan and skip for a numeric decimal separator (radix). 614 615 =cut 616 */ 617 bool 618 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) 619 { 620 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 621 622 #ifdef USE_LOCALE_NUMERIC 623 624 if (IN_LC(LC_NUMERIC)) { 625 STRLEN len; 626 char * radix; 627 bool matches_radix = FALSE; 628 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 629 630 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); 631 632 radix = SvPV(PL_numeric_radix_sv, len); 633 radix = savepvn(radix, len); 634 635 RESTORE_LC_NUMERIC(); 636 637 if (*sp + len <= send) { 638 matches_radix = memEQ(*sp, radix, len); 639 } 640 641 Safefree(radix); 642 643 if (matches_radix) { 644 *sp += len; 645 return TRUE; 646 } 647 } 648 649 #endif 650 651 /* always try "." if numeric radix didn't match because 652 * we may have data from different locales mixed */ 653 if (*sp < send && **sp == '.') { 654 ++*sp; 655 return TRUE; 656 } 657 658 return FALSE; 659 } 660 661 /* 662 =for apidoc grok_infnan 663 664 Helper for C<grok_number()>, accepts various ways of spelling "infinity" 665 or "not a number", and returns one of the following flag combinations: 666 667 IS_NUMBER_INFINITY 668 IS_NUMBER_NAN 669 IS_NUMBER_INFINITY | IS_NUMBER_NEG 670 IS_NUMBER_NAN | IS_NUMBER_NEG 671 0 672 673 possibly |-ed with C<IS_NUMBER_TRAILING>. 674 675 If an infinity or a not-a-number is recognized, C<*sp> will point to 676 one byte past the end of the recognized string. If the recognition fails, 677 zero is returned, and C<*sp> will not move. 678 679 =cut 680 */ 681 682 int 683 Perl_grok_infnan(pTHX_ const char** sp, const char* send) 684 { 685 const char* s = *sp; 686 int flags = 0; 687 #if defined(NV_INF) || defined(NV_NAN) 688 bool odh = FALSE; /* one-dot-hash: 1.#INF */ 689 690 PERL_ARGS_ASSERT_GROK_INFNAN; 691 692 if (*s == '+') { 693 s++; if (s == send) return 0; 694 } 695 else if (*s == '-') { 696 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */ 697 s++; if (s == send) return 0; 698 } 699 700 if (*s == '1') { 701 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN) 702 * Let's keep the dot optional. */ 703 s++; if (s == send) return 0; 704 if (*s == '.') { 705 s++; if (s == send) return 0; 706 } 707 if (*s == '#') { 708 s++; if (s == send) return 0; 709 } else 710 return 0; 711 odh = TRUE; 712 } 713 714 if (isALPHA_FOLD_EQ(*s, 'I')) { 715 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */ 716 717 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 718 s++; if (s == send) return 0; 719 if (isALPHA_FOLD_EQ(*s, 'F')) { 720 s++; 721 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) { 722 int fail = 723 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING; 724 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail; 725 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail; 726 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail; 727 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail; 728 s++; 729 } else if (odh) { 730 while (*s == '0') { /* 1.#INF00 */ 731 s++; 732 } 733 } 734 while (s < send && isSPACE(*s)) 735 s++; 736 if (s < send && *s) { 737 flags |= IS_NUMBER_TRAILING; 738 } 739 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 740 } 741 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */ 742 s++; 743 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 744 while (*s == '0') { /* 1.#IND00 */ 745 s++; 746 } 747 if (*s) { 748 flags |= IS_NUMBER_TRAILING; 749 } 750 } else 751 return 0; 752 } 753 else { 754 /* Maybe NAN of some sort */ 755 756 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) { 757 /* snan, qNaN */ 758 /* XXX do something with the snan/qnan difference */ 759 s++; if (s == send) return 0; 760 } 761 762 if (isALPHA_FOLD_EQ(*s, 'N')) { 763 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0; 764 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 765 s++; 766 767 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 768 769 /* NaN can be followed by various stuff (NaNQ, NaNS), but 770 * there are also multiple different NaN values, and some 771 * implementations output the "payload" values, 772 * e.g. NaN123, NAN(abc), while some legacy implementations 773 * have weird stuff like NaN%. */ 774 if (isALPHA_FOLD_EQ(*s, 'q') || 775 isALPHA_FOLD_EQ(*s, 's')) { 776 /* "nanq" or "nans" are ok, though generating 777 * these portably is tricky. */ 778 s++; 779 } 780 if (*s == '(') { 781 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */ 782 const char *t; 783 s++; 784 if (s == send) { 785 return flags | IS_NUMBER_TRAILING; 786 } 787 t = s + 1; 788 while (t < send && *t && *t != ')') { 789 t++; 790 } 791 if (t == send) { 792 return flags | IS_NUMBER_TRAILING; 793 } 794 if (*t == ')') { 795 int nantype; 796 UV nanval; 797 if (s[0] == '0' && s + 2 < t && 798 isALPHA_FOLD_EQ(s[1], 'x') && 799 isXDIGIT(s[2])) { 800 STRLEN len = t - s; 801 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 802 nanval = grok_hex(s, &len, &flags, NULL); 803 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 804 nantype = 0; 805 } else { 806 nantype = IS_NUMBER_IN_UV; 807 } 808 s += len; 809 } else if (s[0] == '0' && s + 2 < t && 810 isALPHA_FOLD_EQ(s[1], 'b') && 811 (s[2] == '0' || s[2] == '1')) { 812 STRLEN len = t - s; 813 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 814 nanval = grok_bin(s, &len, &flags, NULL); 815 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 816 nantype = 0; 817 } else { 818 nantype = IS_NUMBER_IN_UV; 819 } 820 s += len; 821 } else { 822 const char *u; 823 nantype = 824 grok_number_flags(s, t - s, &nanval, 825 PERL_SCAN_TRAILING | 826 PERL_SCAN_ALLOW_UNDERSCORES); 827 /* Unfortunately grok_number_flags() doesn't 828 * tell how far we got and the ')' will always 829 * be "trailing", so we need to double-check 830 * whether we had something dubious. */ 831 for (u = s; u < t; u++) { 832 if (!isDIGIT(*u)) { 833 flags |= IS_NUMBER_TRAILING; 834 break; 835 } 836 } 837 s = u; 838 } 839 840 /* XXX Doesn't do octal: nan("0123"). 841 * Probably not a big loss. */ 842 843 if ((nantype & IS_NUMBER_NOT_INT) || 844 !(nantype && IS_NUMBER_IN_UV)) { 845 /* XXX the nanval is currently unused, that is, 846 * not inserted as the NaN payload of the NV. 847 * But the above code already parses the C99 848 * nan(...) format. See below, and see also 849 * the nan() in POSIX.xs. 850 * 851 * Certain configuration combinations where 852 * NVSIZE is greater than UVSIZE mean that 853 * a single UV cannot contain all the possible 854 * NaN payload bits. There would need to be 855 * some more generic syntax than "nan($uv)". 856 * 857 * Issues to keep in mind: 858 * 859 * (1) In most common cases there would 860 * not be an integral number of bytes that 861 * could be set, only a certain number of bits. 862 * For example for the common case of 863 * NVSIZE == UVSIZE == 8 there is room for 52 864 * bits in the payload, but the most significant 865 * bit is commonly reserved for the 866 * signaling/quiet bit, leaving 51 bits. 867 * Furthermore, the C99 nan() is supposed 868 * to generate quiet NaNs, so it is doubtful 869 * whether it should be able to generate 870 * signaling NaNs. For the x86 80-bit doubles 871 * (if building a long double Perl) there would 872 * be 62 bits (s/q bit being the 63rd). 873 * 874 * (2) Endianness of the payload bits. If the 875 * payload is specified as an UV, the low-order 876 * bits of the UV are naturally little-endianed 877 * (rightmost) bits of the payload. The endianness 878 * of UVs and NVs can be different. */ 879 return 0; 880 } 881 if (s < t) { 882 flags |= IS_NUMBER_TRAILING; 883 } 884 } else { 885 /* Looked like nan(...), but no close paren. */ 886 flags |= IS_NUMBER_TRAILING; 887 } 888 } else { 889 while (s < send && isSPACE(*s)) 890 s++; 891 if (s < send && *s) { 892 /* Note that we here implicitly accept (parse as 893 * "nan", but with warnings) also any other weird 894 * trailing stuff for "nan". In the above we just 895 * check that if we got the C99-style "nan(...)", 896 * the "..." looks sane. 897 * If in future we accept more ways of specifying 898 * the nan payload, the accepting would happen around 899 * here. */ 900 flags |= IS_NUMBER_TRAILING; 901 } 902 } 903 s = send; 904 } 905 else 906 return 0; 907 } 908 909 while (s < send && isSPACE(*s)) 910 s++; 911 912 #else 913 PERL_UNUSED_ARG(send); 914 #endif /* #if defined(NV_INF) || defined(NV_NAN) */ 915 *sp = s; 916 return flags; 917 } 918 919 /* 920 =for apidoc grok_number_flags 921 922 Recognise (or not) a number. The type of the number is returned 923 (0 if unrecognised), otherwise it is a bit-ORed combination of 924 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>, 925 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h). 926 927 If the value of the number can fit in a UV, it is returned in C<*valuep>. 928 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV> 929 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned 930 to during processing even though C<IS_NUMBER_IN_UV> is not set on return. 931 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when 932 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur. 933 934 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were 935 seen (in which case C<*valuep> gives the true value truncated to an integer), and 936 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the 937 absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the 938 number is larger than a UV. 939 940 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing 941 non-numeric text on an otherwise successful I<grok>, setting 942 C<IS_NUMBER_TRAILING> on the result. 943 944 =for apidoc grok_number 945 946 Identical to C<grok_number_flags()> with C<flags> set to zero. 947 948 =cut 949 */ 950 int 951 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 952 { 953 PERL_ARGS_ASSERT_GROK_NUMBER; 954 955 return grok_number_flags(pv, len, valuep, 0); 956 } 957 958 static const UV uv_max_div_10 = UV_MAX / 10; 959 static const U8 uv_max_mod_10 = UV_MAX % 10; 960 961 int 962 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) 963 { 964 const char *s = pv; 965 const char * const send = pv + len; 966 const char *d; 967 int numtype = 0; 968 969 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS; 970 971 while (s < send && isSPACE(*s)) 972 s++; 973 if (s == send) { 974 return 0; 975 } else if (*s == '-') { 976 s++; 977 numtype = IS_NUMBER_NEG; 978 } 979 else if (*s == '+') 980 s++; 981 982 if (s == send) 983 return 0; 984 985 /* The first digit (after optional sign): note that might 986 * also point to "infinity" or "nan", or "1.#INF". */ 987 d = s; 988 989 /* next must be digit or the radix separator or beginning of infinity/nan */ 990 if (isDIGIT(*s)) { 991 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 992 overflow. */ 993 UV value = *s - '0'; 994 /* This construction seems to be more optimiser friendly. 995 (without it gcc does the isDIGIT test and the *s - '0' separately) 996 With it gcc on arm is managing 6 instructions (6 cycles) per digit. 997 In theory the optimiser could deduce how far to unroll the loop 998 before checking for overflow. */ 999 if (++s < send) { 1000 int digit = *s - '0'; 1001 if (inRANGE(digit, 0, 9)) { 1002 value = value * 10 + digit; 1003 if (++s < send) { 1004 digit = *s - '0'; 1005 if (inRANGE(digit, 0, 9)) { 1006 value = value * 10 + digit; 1007 if (++s < send) { 1008 digit = *s - '0'; 1009 if (inRANGE(digit, 0, 9)) { 1010 value = value * 10 + digit; 1011 if (++s < send) { 1012 digit = *s - '0'; 1013 if (inRANGE(digit, 0, 9)) { 1014 value = value * 10 + digit; 1015 if (++s < send) { 1016 digit = *s - '0'; 1017 if (inRANGE(digit, 0, 9)) { 1018 value = value * 10 + digit; 1019 if (++s < send) { 1020 digit = *s - '0'; 1021 if (inRANGE(digit, 0, 9)) { 1022 value = value * 10 + digit; 1023 if (++s < send) { 1024 digit = *s - '0'; 1025 if (inRANGE(digit, 0, 9)) { 1026 value = value * 10 + digit; 1027 if (++s < send) { 1028 digit = *s - '0'; 1029 if (inRANGE(digit, 0, 9)) { 1030 value = value * 10 + digit; 1031 if (++s < send) { 1032 /* Now got 9 digits, so need to check 1033 each time for overflow. */ 1034 digit = *s - '0'; 1035 while ( inRANGE(digit, 0, 9) 1036 && (value < uv_max_div_10 1037 || (value == uv_max_div_10 1038 && digit <= uv_max_mod_10))) { 1039 value = value * 10 + digit; 1040 if (++s < send) 1041 digit = *s - '0'; 1042 else 1043 break; 1044 } 1045 if (inRANGE(digit, 0, 9) 1046 && (s < send)) { 1047 /* value overflowed. 1048 skip the remaining digits, don't 1049 worry about setting *valuep. */ 1050 do { 1051 s++; 1052 } while (s < send && isDIGIT(*s)); 1053 numtype |= 1054 IS_NUMBER_GREATER_THAN_UV_MAX; 1055 goto skip_value; 1056 } 1057 } 1058 } 1059 } 1060 } 1061 } 1062 } 1063 } 1064 } 1065 } 1066 } 1067 } 1068 } 1069 } 1070 } 1071 } 1072 } 1073 } 1074 numtype |= IS_NUMBER_IN_UV; 1075 if (valuep) 1076 *valuep = value; 1077 1078 skip_value: 1079 if (GROK_NUMERIC_RADIX(&s, send)) { 1080 numtype |= IS_NUMBER_NOT_INT; 1081 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 1082 s++; 1083 } 1084 } 1085 else if (GROK_NUMERIC_RADIX(&s, send)) { 1086 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 1087 /* no digits before the radix means we need digits after it */ 1088 if (s < send && isDIGIT(*s)) { 1089 do { 1090 s++; 1091 } while (s < send && isDIGIT(*s)); 1092 if (valuep) { 1093 /* integer approximation is valid - it's 0. */ 1094 *valuep = 0; 1095 } 1096 } 1097 else 1098 return 0; 1099 } 1100 1101 if (s > d && s < send) { 1102 /* we can have an optional exponent part */ 1103 if (isALPHA_FOLD_EQ(*s, 'e')) { 1104 s++; 1105 if (s < send && (*s == '-' || *s == '+')) 1106 s++; 1107 if (s < send && isDIGIT(*s)) { 1108 do { 1109 s++; 1110 } while (s < send && isDIGIT(*s)); 1111 } 1112 else if (flags & PERL_SCAN_TRAILING) 1113 return numtype | IS_NUMBER_TRAILING; 1114 else 1115 return 0; 1116 1117 /* The only flag we keep is sign. Blow away any "it's UV" */ 1118 numtype &= IS_NUMBER_NEG; 1119 numtype |= IS_NUMBER_NOT_INT; 1120 } 1121 } 1122 while (s < send && isSPACE(*s)) 1123 s++; 1124 if (s >= send) 1125 return numtype; 1126 if (memEQs(pv, len, "0 but true")) { 1127 if (valuep) 1128 *valuep = 0; 1129 return IS_NUMBER_IN_UV; 1130 } 1131 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */ 1132 if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) { 1133 /* Really detect inf/nan. Start at d, not s, since the above 1134 * code might have already consumed the "1." or "1". */ 1135 const int infnan = Perl_grok_infnan(aTHX_ &d, send); 1136 if ((infnan & IS_NUMBER_INFINITY)) { 1137 return (numtype | infnan); /* Keep sign for infinity. */ 1138 } 1139 else if ((infnan & IS_NUMBER_NAN)) { 1140 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */ 1141 } 1142 } 1143 else if (flags & PERL_SCAN_TRAILING) { 1144 return numtype | IS_NUMBER_TRAILING; 1145 } 1146 1147 return 0; 1148 } 1149 1150 /* 1151 =for apidoc grok_atoUV 1152 1153 parse a string, looking for a decimal unsigned integer. 1154 1155 On entry, C<pv> points to the beginning of the string; 1156 C<valptr> points to a UV that will receive the converted value, if found; 1157 C<endptr> is either NULL or points to a variable that points to one byte 1158 beyond the point in C<pv> that this routine should examine. 1159 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated. 1160 1161 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with 1162 no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that 1163 value. 1164 1165 If you constrain the portion of C<pv> that is looked at by this function (by 1166 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a 1167 valid value, it will return TRUE, setting C<*endptr> to the byte following the 1168 final digit of the value. But if there is no constraint at what's looked at, 1169 all of C<pv> must be valid in order for TRUE to be returned. 1170 1171 The only characters this accepts are the decimal digits '0'..'9'. 1172 1173 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional 1174 leading whitespace, nor negative inputs. If such features are required, the 1175 calling code needs to explicitly implement those. 1176 1177 Note that this function returns FALSE for inputs that would overflow a UV, 1178 or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor 1179 C<01>, C<002>, I<etc>. 1180 1181 Background: C<atoi> has severe problems with illegal inputs, it cannot be 1182 used for incremental parsing, and therefore should be avoided 1183 C<atoi> and C<strtol> are also affected by locale settings, which can also be 1184 seen as a bug (global state controlled by user environment). 1185 1186 =cut 1187 1188 */ 1189 1190 bool 1191 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) 1192 { 1193 const char* s = pv; 1194 const char** eptr; 1195 const char* end2; /* Used in case endptr is NULL. */ 1196 UV val = 0; /* The parsed value. */ 1197 1198 PERL_ARGS_ASSERT_GROK_ATOUV; 1199 1200 if (endptr) { 1201 eptr = endptr; 1202 } 1203 else { 1204 end2 = s + strlen(s); 1205 eptr = &end2; 1206 } 1207 1208 if ( *eptr <= s 1209 || ! isDIGIT(*s)) 1210 { 1211 return FALSE; 1212 } 1213 1214 /* Single-digit inputs are quite common. */ 1215 val = *s++ - '0'; 1216 if (s < *eptr && isDIGIT(*s)) { 1217 /* Fail on extra leading zeros. */ 1218 if (val == 0) 1219 return FALSE; 1220 while (s < *eptr && isDIGIT(*s)) { 1221 /* This could be unrolled like in grok_number(), but 1222 * the expected uses of this are not speed-needy, and 1223 * unlikely to need full 64-bitness. */ 1224 const U8 digit = *s++ - '0'; 1225 if (val < uv_max_div_10 || 1226 (val == uv_max_div_10 && digit <= uv_max_mod_10)) { 1227 val = val * 10 + digit; 1228 } else { 1229 return FALSE; 1230 } 1231 } 1232 } 1233 1234 if (endptr == NULL) { 1235 if (*s) { 1236 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ 1237 } 1238 } 1239 else { 1240 *endptr = s; 1241 } 1242 1243 *valptr = val; 1244 return TRUE; 1245 } 1246 1247 #ifndef Perl_strtod 1248 STATIC NV 1249 S_mulexp10(NV value, I32 exponent) 1250 { 1251 NV result = 1.0; 1252 NV power = 10.0; 1253 bool negative = 0; 1254 I32 bit; 1255 1256 if (exponent == 0) 1257 return value; 1258 if (value == 0) 1259 return (NV)0; 1260 1261 /* On OpenVMS VAX we by default use the D_FLOAT double format, 1262 * and that format does not have *easy* capabilities [1] for 1263 * overflowing doubles 'silently' as IEEE fp does. We also need 1264 * to support G_FLOAT on both VAX and Alpha, and though the exponent 1265 * range is much larger than D_FLOAT it still doesn't do silent 1266 * overflow. Therefore we need to detect early whether we would 1267 * overflow (this is the behaviour of the native string-to-float 1268 * conversion routines, and therefore of native applications, too). 1269 * 1270 * [1] Trying to establish a condition handler to trap floating point 1271 * exceptions is not a good idea. */ 1272 1273 /* In UNICOS and in certain Cray models (such as T90) there is no 1274 * IEEE fp, and no way at all from C to catch fp overflows gracefully. 1275 * There is something you can do if you are willing to use some 1276 * inline assembler: the instruction is called DFI-- but that will 1277 * disable *all* floating point interrupts, a little bit too large 1278 * a hammer. Therefore we need to catch potential overflows before 1279 * it's too late. */ 1280 1281 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP) 1282 STMT_START { 1283 const NV exp_v = log10(value); 1284 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP) 1285 return NV_MAX; 1286 if (exponent < 0) { 1287 if (-(exponent + exp_v) >= NV_MAX_10_EXP) 1288 return 0.0; 1289 while (-exponent >= NV_MAX_10_EXP) { 1290 /* combination does not overflow, but 10^(-exponent) does */ 1291 value /= 10; 1292 ++exponent; 1293 } 1294 } 1295 } STMT_END; 1296 #endif 1297 1298 if (exponent < 0) { 1299 negative = 1; 1300 exponent = -exponent; 1301 #ifdef NV_MAX_10_EXP 1302 /* for something like 1234 x 10^-309, the action of calculating 1303 * the intermediate value 10^309 then returning 1234 / (10^309) 1304 * will fail, since 10^309 becomes infinity. In this case try to 1305 * refactor it as 123 / (10^308) etc. 1306 */ 1307 while (value && exponent > NV_MAX_10_EXP) { 1308 exponent--; 1309 value /= 10; 1310 } 1311 if (value == 0.0) 1312 return value; 1313 #endif 1314 } 1315 #if defined(__osf__) 1316 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV) 1317 * Tru64 fp behavior on inf/nan is somewhat broken. Another way 1318 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF) 1319 * but that breaks another set of infnan.t tests. */ 1320 # define FP_OVERFLOWS_TO_ZERO 1321 #endif 1322 for (bit = 1; exponent; bit <<= 1) { 1323 if (exponent & bit) { 1324 exponent ^= bit; 1325 result *= power; 1326 #ifdef FP_OVERFLOWS_TO_ZERO 1327 if (result == 0) 1328 # ifdef NV_INF 1329 return value < 0 ? -NV_INF : NV_INF; 1330 # else 1331 return value < 0 ? -FLT_MAX : FLT_MAX; 1332 # endif 1333 #endif 1334 /* Floating point exceptions are supposed to be turned off, 1335 * but if we're obviously done, don't risk another iteration. 1336 */ 1337 if (exponent == 0) break; 1338 } 1339 power *= power; 1340 } 1341 return negative ? value / result : value * result; 1342 } 1343 #endif /* #ifndef Perl_strtod */ 1344 1345 #ifdef Perl_strtod 1346 # define ATOF(s, x) my_atof2(s, &x) 1347 #else 1348 # define ATOF(s, x) Perl_atof2(s, x) 1349 #endif 1350 1351 NV 1352 Perl_my_atof(pTHX_ const char* s) 1353 { 1354 /* 's' must be NUL terminated */ 1355 1356 NV x = 0.0; 1357 1358 PERL_ARGS_ASSERT_MY_ATOF; 1359 1360 #if ! defined(USE_LOCALE_NUMERIC) 1361 1362 ATOF(s, x); 1363 1364 #else 1365 1366 { 1367 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 1368 STORE_LC_NUMERIC_SET_TO_NEEDED(); 1369 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) { 1370 ATOF(s,x); 1371 } 1372 else { 1373 1374 /* Look through the string for the first thing that looks like a 1375 * decimal point: either the value in the current locale or the 1376 * standard fallback of '.'. The one which appears earliest in the 1377 * input string is the one that we should have atof look for. Note 1378 * that we have to determine this beforehand because on some 1379 * systems, Perl_atof2 is just a wrapper around the system's atof. 1380 * */ 1381 const char * const standard_pos = strchr(s, '.'); 1382 const char * const local_pos 1383 = strstr(s, SvPV_nolen(PL_numeric_radix_sv)); 1384 const bool use_standard_radix 1385 = standard_pos && (!local_pos || standard_pos < local_pos); 1386 1387 if (use_standard_radix) { 1388 SET_NUMERIC_STANDARD(); 1389 LOCK_LC_NUMERIC_STANDARD(); 1390 } 1391 1392 ATOF(s,x); 1393 1394 if (use_standard_radix) { 1395 UNLOCK_LC_NUMERIC_STANDARD(); 1396 SET_NUMERIC_UNDERLYING(); 1397 } 1398 } 1399 RESTORE_LC_NUMERIC(); 1400 } 1401 1402 #endif 1403 1404 return x; 1405 } 1406 1407 #if defined(NV_INF) || defined(NV_NAN) 1408 1409 #ifdef USING_MSVC6 1410 # pragma warning(push) 1411 # pragma warning(disable:4756;disable:4056) 1412 #endif 1413 static char* 1414 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value) 1415 { 1416 const char *p0 = negative ? s - 1 : s; 1417 const char *p = p0; 1418 const int infnan = grok_infnan(&p, send); 1419 if (infnan && p != p0) { 1420 /* If we can generate inf/nan directly, let's do so. */ 1421 #ifdef NV_INF 1422 if ((infnan & IS_NUMBER_INFINITY)) { 1423 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF; 1424 return (char*)p; 1425 } 1426 #endif 1427 #ifdef NV_NAN 1428 if ((infnan & IS_NUMBER_NAN)) { 1429 *value = NV_NAN; 1430 return (char*)p; 1431 } 1432 #endif 1433 #ifdef Perl_strtod 1434 /* If still here, we didn't have either NV_INF or NV_NAN, 1435 * and can try falling back to native strtod/strtold. 1436 * 1437 * The native interface might not recognize all the possible 1438 * inf/nan strings Perl recognizes. What we can try 1439 * is to try faking the input. We will try inf/-inf/nan 1440 * as the most promising/portable input. */ 1441 { 1442 const char* fake = "silence compiler warning"; 1443 char* endp; 1444 NV nv; 1445 #ifdef NV_INF 1446 if ((infnan & IS_NUMBER_INFINITY)) { 1447 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf"; 1448 } 1449 #endif 1450 #ifdef NV_NAN 1451 if ((infnan & IS_NUMBER_NAN)) { 1452 fake = "nan"; 1453 } 1454 #endif 1455 assert(strNE(fake, "silence compiler warning")); 1456 nv = S_strtod(aTHX_ fake, &endp); 1457 if (fake != endp) { 1458 #ifdef NV_INF 1459 if ((infnan & IS_NUMBER_INFINITY)) { 1460 # ifdef Perl_isinf 1461 if (Perl_isinf(nv)) 1462 *value = nv; 1463 # else 1464 /* last resort, may generate SIGFPE */ 1465 *value = Perl_exp((NV)1e9); 1466 if ((infnan & IS_NUMBER_NEG)) 1467 *value = -*value; 1468 # endif 1469 return (char*)p; /* p, not endp */ 1470 } 1471 #endif 1472 #ifdef NV_NAN 1473 if ((infnan & IS_NUMBER_NAN)) { 1474 # ifdef Perl_isnan 1475 if (Perl_isnan(nv)) 1476 *value = nv; 1477 # else 1478 /* last resort, may generate SIGFPE */ 1479 *value = Perl_log((NV)-1.0); 1480 # endif 1481 return (char*)p; /* p, not endp */ 1482 #endif 1483 } 1484 } 1485 } 1486 #endif /* #ifdef Perl_strtod */ 1487 } 1488 return NULL; 1489 } 1490 #ifdef USING_MSVC6 1491 # pragma warning(pop) 1492 #endif 1493 1494 #endif /* if defined(NV_INF) || defined(NV_NAN) */ 1495 1496 char* 1497 Perl_my_atof2(pTHX_ const char* orig, NV* value) 1498 { 1499 PERL_ARGS_ASSERT_MY_ATOF2; 1500 return my_atof3(orig, value, 0); 1501 } 1502 1503 char* 1504 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len) 1505 { 1506 const char* s = orig; 1507 NV result[3] = {0.0, 0.0, 0.0}; 1508 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1509 const char* send = s + ((len != 0) 1510 ? len 1511 : strlen(orig)); /* one past the last */ 1512 bool negative = 0; 1513 #endif 1514 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod) 1515 UV accumulator[2] = {0,0}; /* before/after dp */ 1516 bool seen_digit = 0; 1517 I32 exp_adjust[2] = {0,0}; 1518 I32 exp_acc[2] = {-1, -1}; 1519 /* the current exponent adjust for the accumulators */ 1520 I32 exponent = 0; 1521 I32 seen_dp = 0; 1522 I32 digit = 0; 1523 I32 old_digit = 0; 1524 I32 sig_digits = 0; /* noof significant digits seen so far */ 1525 #endif 1526 1527 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1528 PERL_ARGS_ASSERT_MY_ATOF3; 1529 1530 /* leading whitespace */ 1531 while (s < send && isSPACE(*s)) 1532 ++s; 1533 1534 /* sign */ 1535 switch (*s) { 1536 case '-': 1537 negative = 1; 1538 /* FALLTHROUGH */ 1539 case '+': 1540 ++s; 1541 } 1542 #endif 1543 1544 #ifdef Perl_strtod 1545 { 1546 char* endp; 1547 char* copy = NULL; 1548 1549 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1550 return endp; 1551 1552 /* If the length is passed in, the input string isn't NUL-terminated, 1553 * and in it turns out the function below assumes it is; therefore we 1554 * create a copy and NUL-terminate that */ 1555 if (len) { 1556 Newx(copy, len + 1, char); 1557 Copy(orig, copy, len, char); 1558 copy[len] = '\0'; 1559 s = copy + (s - orig); 1560 } 1561 1562 result[2] = S_strtod(aTHX_ s, &endp); 1563 1564 /* If we created a copy, 'endp' is in terms of that. Convert back to 1565 * the original */ 1566 if (copy) { 1567 s = (s - copy) + (char *) orig; 1568 endp = (endp - copy) + (char *) orig; 1569 Safefree(copy); 1570 } 1571 1572 if (s != endp) { 1573 *value = negative ? -result[2] : result[2]; 1574 return endp; 1575 } 1576 return NULL; 1577 } 1578 #elif defined(USE_PERL_ATOF) 1579 1580 /* There is no point in processing more significant digits 1581 * than the NV can hold. Note that NV_DIG is a lower-bound value, 1582 * while we need an upper-bound value. We add 2 to account for this; 1583 * since it will have been conservative on both the first and last digit. 1584 * For example a 32-bit mantissa with an exponent of 4 would have 1585 * exact values in the set 1586 * 4 1587 * 8 1588 * .. 1589 * 17179869172 1590 * 17179869176 1591 * 17179869180 1592 * 1593 * where for the purposes of calculating NV_DIG we would have to discount 1594 * both the first and last digit, since neither can hold all values from 1595 * 0..9; but for calculating the value we must examine those two digits. 1596 */ 1597 #ifdef MAX_SIG_DIG_PLUS 1598 /* It is not necessarily the case that adding 2 to NV_DIG gets all the 1599 possible digits in a NV, especially if NVs are not IEEE compliant 1600 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */ 1601 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS) 1602 #else 1603 # define MAX_SIG_DIGITS (NV_DIG+2) 1604 #endif 1605 1606 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */ 1607 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10)) 1608 1609 #if defined(NV_INF) || defined(NV_NAN) 1610 { 1611 char* endp; 1612 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1613 return endp; 1614 } 1615 #endif 1616 1617 /* we accumulate digits into an integer; when this becomes too 1618 * large, we add the total to NV and start again */ 1619 1620 while (s < send) { 1621 if (isDIGIT(*s)) { 1622 seen_digit = 1; 1623 old_digit = digit; 1624 digit = *s++ - '0'; 1625 if (seen_dp) 1626 exp_adjust[1]++; 1627 1628 /* don't start counting until we see the first significant 1629 * digit, eg the 5 in 0.00005... */ 1630 if (!sig_digits && digit == 0) 1631 continue; 1632 1633 if (++sig_digits > MAX_SIG_DIGITS) { 1634 /* limits of precision reached */ 1635 if (digit > 5) { 1636 ++accumulator[seen_dp]; 1637 } else if (digit == 5) { 1638 if (old_digit % 2) { /* round to even - Allen */ 1639 ++accumulator[seen_dp]; 1640 } 1641 } 1642 if (seen_dp) { 1643 exp_adjust[1]--; 1644 } else { 1645 exp_adjust[0]++; 1646 } 1647 /* skip remaining digits */ 1648 while (s < send && isDIGIT(*s)) { 1649 ++s; 1650 if (! seen_dp) { 1651 exp_adjust[0]++; 1652 } 1653 } 1654 /* warn of loss of precision? */ 1655 } 1656 else { 1657 if (accumulator[seen_dp] > MAX_ACCUMULATE) { 1658 /* add accumulator to result and start again */ 1659 result[seen_dp] = S_mulexp10(result[seen_dp], 1660 exp_acc[seen_dp]) 1661 + (NV)accumulator[seen_dp]; 1662 accumulator[seen_dp] = 0; 1663 exp_acc[seen_dp] = 0; 1664 } 1665 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit; 1666 ++exp_acc[seen_dp]; 1667 } 1668 } 1669 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) { 1670 seen_dp = 1; 1671 if (sig_digits > MAX_SIG_DIGITS) { 1672 while (s < send && isDIGIT(*s)) { 1673 ++s; 1674 } 1675 break; 1676 } 1677 } 1678 else { 1679 break; 1680 } 1681 } 1682 1683 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0]; 1684 if (seen_dp) { 1685 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1]; 1686 } 1687 1688 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) { 1689 bool expnegative = 0; 1690 1691 ++s; 1692 switch (*s) { 1693 case '-': 1694 expnegative = 1; 1695 /* FALLTHROUGH */ 1696 case '+': 1697 ++s; 1698 } 1699 while (s < send && isDIGIT(*s)) 1700 exponent = exponent * 10 + (*s++ - '0'); 1701 if (expnegative) 1702 exponent = -exponent; 1703 } 1704 1705 /* now apply the exponent */ 1706 1707 if (seen_dp) { 1708 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]) 1709 + S_mulexp10(result[1],exponent-exp_adjust[1]); 1710 } else { 1711 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]); 1712 } 1713 1714 /* now apply the sign */ 1715 if (negative) 1716 result[2] = -result[2]; 1717 #endif /* USE_PERL_ATOF */ 1718 *value = result[2]; 1719 return (char *)s; 1720 } 1721 1722 /* 1723 =for apidoc isinfnan 1724 1725 C<Perl_isinfnan()> is utility function that returns true if the NV 1726 argument is either an infinity or a C<NaN>, false otherwise. To test 1727 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>. 1728 1729 This is also the logical inverse of Perl_isfinite(). 1730 1731 =cut 1732 */ 1733 bool 1734 Perl_isinfnan(NV nv) 1735 { 1736 PERL_UNUSED_ARG(nv); 1737 #ifdef Perl_isinf 1738 if (Perl_isinf(nv)) 1739 return TRUE; 1740 #endif 1741 #ifdef Perl_isnan 1742 if (Perl_isnan(nv)) 1743 return TRUE; 1744 #endif 1745 return FALSE; 1746 } 1747 1748 /* 1749 =for apidoc 1750 1751 Checks whether the argument would be either an infinity or C<NaN> when used 1752 as a number, but is careful not to trigger non-numeric or uninitialized 1753 warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already. 1754 1755 =cut 1756 */ 1757 1758 bool 1759 Perl_isinfnansv(pTHX_ SV *sv) 1760 { 1761 PERL_ARGS_ASSERT_ISINFNANSV; 1762 if (!SvOK(sv)) 1763 return FALSE; 1764 if (SvNOKp(sv)) 1765 return Perl_isinfnan(SvNVX(sv)); 1766 if (SvIOKp(sv)) 1767 return FALSE; 1768 { 1769 STRLEN len; 1770 const char *s = SvPV_nomg_const(sv, len); 1771 return cBOOL(grok_infnan(&s, s+len)); 1772 } 1773 } 1774 1775 #ifndef HAS_MODFL 1776 /* C99 has truncl, pre-C99 Solaris had aintl. We can use either with 1777 * copysignl to emulate modfl, which is in some platforms missing or 1778 * broken. */ 1779 # if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL) 1780 long double 1781 Perl_my_modfl(long double x, long double *ip) 1782 { 1783 *ip = truncl(x); 1784 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1785 } 1786 # elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL) 1787 long double 1788 Perl_my_modfl(long double x, long double *ip) 1789 { 1790 *ip = aintl(x); 1791 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1792 } 1793 # endif 1794 #endif 1795 1796 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */ 1797 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL) 1798 long double 1799 Perl_my_frexpl(long double x, int *e) { 1800 *e = x == 0.0L ? 0 : ilogbl(x) + 1; 1801 return (scalbnl(x, -*e)); 1802 } 1803 #endif 1804 1805 /* 1806 =for apidoc Perl_signbit 1807 1808 Return a non-zero integer if the sign bit on an NV is set, and 0 if 1809 it is not. 1810 1811 If F<Configure> detects this system has a C<signbit()> that will work with 1812 our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise, 1813 fall back on this implementation. The main use of this function 1814 is catching C<-0.0>. 1815 1816 C<Configure> notes: This function is called C<'Perl_signbit'> instead of a 1817 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()> 1818 function or macro that doesn't happen to work with our particular choice 1819 of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect 1820 the standard system headers to be happy. Also, this is a no-context 1821 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in 1822 F<perl.h> as a simple macro call to the system's C<signbit()>. 1823 Users should just always call C<Perl_signbit()>. 1824 1825 =cut 1826 */ 1827 #if !defined(HAS_SIGNBIT) 1828 int 1829 Perl_signbit(NV x) { 1830 # ifdef Perl_fp_class_nzero 1831 return Perl_fp_class_nzero(x); 1832 /* Try finding the high byte, and assume it's highest bit 1833 * is the sign. This assumption is probably wrong somewhere. */ 1834 # elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 1835 return (((unsigned char *)&x)[9] & 0x80); 1836 # elif defined(NV_LITTLE_ENDIAN) 1837 /* Note that NVSIZE is sizeof(NV), which would make the below be 1838 * wrong if the end bytes are unused, which happens with the x86 1839 * 80-bit long doubles, which is why take care of that above. */ 1840 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80); 1841 # elif defined(NV_BIG_ENDIAN) 1842 return (((unsigned char *)&x)[0] & 0x80); 1843 # else 1844 /* This last resort fallback is wrong for the negative zero. */ 1845 return (x < 0.0) ? 1 : 0; 1846 # endif 1847 } 1848 #endif 1849 1850 /* 1851 * ex: set ts=8 sts=4 sw=4 et: 1852 */ 1853