1 /* numeric.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * "That only makes eleven (plus one mislaid) and not fourteen, 13 * unless wizards count differently to other people." --Beorn 14 * 15 * [p.115 of _The Hobbit_: "Queer Lodgings"] 16 */ 17 18 /* 19 =head1 Numeric functions 20 21 =cut 22 23 This file contains all the stuff needed by perl for manipulating numeric 24 values, including such things as replacements for the OS's atof() function 25 26 */ 27 28 #include "EXTERN.h" 29 #define PERL_IN_NUMERIC_C 30 #include "perl.h" 31 32 #ifdef Perl_strtod 33 34 PERL_STATIC_INLINE NV 35 S_strtod(pTHX_ const char * const s, char ** e) 36 { 37 NV result; 38 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 39 40 STORE_LC_NUMERIC_SET_TO_NEEDED(); 41 42 # ifdef USE_QUADMATH 43 44 result = strtoflt128(s, e); 45 46 # elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \ 47 && defined(USE_LONG_DOUBLE) 48 # if defined(__MINGW64_VERSION_MAJOR) 49 /*********************************************** 50 We are unable to use strtold because of 51 https://sourceforge.net/p/mingw-w64/bugs/711/ 52 & 53 https://sourceforge.net/p/mingw-w64/bugs/725/ 54 55 but __mingw_strtold is fine. 56 ***********************************************/ 57 58 result = __mingw_strtold(s, e); 59 60 # else 61 62 result = strtold(s, e); 63 64 # endif 65 # elif defined(HAS_STRTOD) 66 67 result = strtod(s, e); 68 69 # else 70 # error No strtod() equivalent found 71 # endif 72 73 RESTORE_LC_NUMERIC(); 74 75 return result; 76 } 77 78 #endif /* #ifdef Perl_strtod */ 79 80 /* 81 82 =for apidoc my_strtod 83 84 This function is equivalent to the libc strtod() function, and is available 85 even on platforms that lack plain strtod(). Its return value is the best 86 available precision depending on platform capabilities and F<Configure> 87 options. 88 89 It properly handles the locale radix character, meaning it expects a dot except 90 when called from within the scope of S<C<use locale>>, in which case the radix 91 character should be that specified by the current locale. 92 93 The synonym Strtod() may be used instead. 94 95 =cut 96 97 */ 98 99 NV 100 Perl_my_strtod(const char * const s, char **e) 101 { 102 dTHX; 103 104 PERL_ARGS_ASSERT_MY_STRTOD; 105 106 #ifdef Perl_strtod 107 108 return S_strtod(aTHX_ s, e); 109 110 #else 111 112 { 113 NV result; 114 char ** end_ptr = NULL; 115 116 *end_ptr = my_atof2(s, &result); 117 if (e) { 118 *e = *end_ptr; 119 } 120 121 if (! *end_ptr) { 122 result = 0.0; 123 } 124 125 return result; 126 } 127 128 #endif 129 130 } 131 132 133 U32 134 Perl_cast_ulong(NV f) 135 { 136 if (f < 0.0) 137 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f; 138 if (f < U32_MAX_P1) { 139 #if CASTFLAGS & 2 140 if (f < U32_MAX_P1_HALF) 141 return (U32) f; 142 f -= U32_MAX_P1_HALF; 143 return ((U32) f) | (1 + (U32_MAX >> 1)); 144 #else 145 return (U32) f; 146 #endif 147 } 148 return f > 0 ? U32_MAX : 0 /* NaN */; 149 } 150 151 I32 152 Perl_cast_i32(NV f) 153 { 154 if (f < I32_MAX_P1) 155 return f < I32_MIN ? I32_MIN : (I32) f; 156 if (f < U32_MAX_P1) { 157 #if CASTFLAGS & 2 158 if (f < U32_MAX_P1_HALF) 159 return (I32)(U32) f; 160 f -= U32_MAX_P1_HALF; 161 return (I32)(((U32) f) | (1 + (U32_MAX >> 1))); 162 #else 163 return (I32)(U32) f; 164 #endif 165 } 166 return f > 0 ? (I32)U32_MAX : 0 /* NaN */; 167 } 168 169 IV 170 Perl_cast_iv(NV f) 171 { 172 if (f < IV_MAX_P1) 173 return f < IV_MIN ? IV_MIN : (IV) f; 174 if (f < UV_MAX_P1) { 175 #if CASTFLAGS & 2 176 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */ 177 if (f < UV_MAX_P1_HALF) 178 return (IV)(UV) f; 179 f -= UV_MAX_P1_HALF; 180 return (IV)(((UV) f) | (1 + (UV_MAX >> 1))); 181 #else 182 return (IV)(UV) f; 183 #endif 184 } 185 return f > 0 ? (IV)UV_MAX : 0 /* NaN */; 186 } 187 188 UV 189 Perl_cast_uv(NV f) 190 { 191 if (f < 0.0) 192 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f; 193 if (f < UV_MAX_P1) { 194 #if CASTFLAGS & 2 195 if (f < UV_MAX_P1_HALF) 196 return (UV) f; 197 f -= UV_MAX_P1_HALF; 198 return ((UV) f) | (1 + (UV_MAX >> 1)); 199 #else 200 return (UV) f; 201 #endif 202 } 203 return f > 0 ? UV_MAX : 0 /* NaN */; 204 } 205 206 /* 207 =for apidoc grok_bin 208 209 converts a string representing a binary number to numeric form. 210 211 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives 212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The 213 scan stops at the end of the string, or at just before the first invalid 214 character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, 215 encountering an invalid character (except NUL) will also trigger a warning. On 216 return C<*len_p> is set to the length of the scanned string, and C<*flags> 217 gives output flags. 218 219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 220 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin> 221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 222 and writes an approximation of the correct value into C<*result> (which is an 223 NV; or the approximation is discarded if C<result> is NULL). 224 225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless 226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. 227 228 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of 229 digits may be separated from each other by a single underscore; also a single 230 leading underscore is accepted. 231 232 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES 233 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX 234 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX 235 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT 236 237 =cut 238 239 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE 240 which suppresses any message for non-portable numbers that are still valid 241 on this platform. 242 */ 243 244 UV 245 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 246 { 247 PERL_ARGS_ASSERT_GROK_BIN; 248 249 return grok_bin(start, len_p, flags, result); 250 } 251 252 /* 253 =for apidoc grok_hex 254 255 converts a string representing a hex number to numeric form. 256 257 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives 258 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The 259 scan stops at the end of the string, or at just before the first invalid 260 character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, 261 encountering an invalid character (except NUL) will also trigger a warning. On 262 return C<*len_p> is set to the length of the scanned string, and C<*flags> 263 gives output flags. 264 265 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 266 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex> 267 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 268 and writes an approximation of the correct value into C<*result> (which is an 269 NV; or the approximation is discarded if C<result> is NULL). 270 271 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless 272 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. 273 274 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of 275 digits may be separated from each other by a single underscore; also a single 276 leading underscore is accepted. 277 278 =cut 279 280 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 281 which suppresses any message for non-portable numbers, but which are valid 282 on this platform. But, C<*flags> will have the corresponding flag bit set. 283 */ 284 285 UV 286 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 287 { 288 PERL_ARGS_ASSERT_GROK_HEX; 289 290 return grok_hex(start, len_p, flags, result); 291 } 292 293 /* 294 =for apidoc grok_oct 295 296 converts a string representing an octal number to numeric form. 297 298 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives 299 conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The 300 scan stops at the end of the string, or at just before the first invalid 301 character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, 302 encountering an invalid character (except NUL) will also trigger a warning. On 303 return C<*len_p> is set to the length of the scanned string, and C<*flags> 304 gives output flags. 305 306 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear, 307 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct> 308 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags, 309 and writes an approximation of the correct value into C<*result> (which is an 310 NV; or the approximation is discarded if C<result> is NULL). 311 312 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of 313 digits may be separated from each other by a single underscore; also a single 314 leading underscore is accepted. 315 316 The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for 317 this function. 318 319 =cut 320 321 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE> 322 which suppresses any message for non-portable numbers, but which are valid 323 on this platform. 324 */ 325 326 UV 327 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) 328 { 329 PERL_ARGS_ASSERT_GROK_OCT; 330 331 return grok_oct(start, len_p, flags, result); 332 } 333 334 STATIC void 335 S_output_non_portable(pTHX_ const U8 base) 336 { 337 /* Display the proper message for a number in the given input base not 338 * fitting in 32 bits */ 339 const char * which = (base == 2) 340 ? "Binary number > 0b11111111111111111111111111111111" 341 : (base == 8) 342 ? "Octal number > 037777777777" 343 : "Hexadecimal number > 0xffffffff"; 344 345 PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE; 346 347 /* Also there are listings for the other two. That's because, since they 348 * are the first word, it would be hard for a user to find them there 349 * starting with a %s */ 350 /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */ 351 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which); 352 } 353 354 UV 355 Perl_grok_bin_oct_hex(pTHX_ const char *start, 356 STRLEN *len_p, 357 I32 *flags, 358 NV *result, 359 const unsigned shift, /* 1 for binary; 3 for octal; 360 4 for hex */ 361 const U8 class_bit, 362 const char prefix 363 ) 364 365 { 366 const char *s0 = start; 367 const char *s; 368 STRLEN len = *len_p; 369 STRLEN bytes_so_far; /* How many real digits have been processed */ 370 UV value = 0; 371 NV value_nv = 0; 372 const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ 373 const UV max_div= UV_MAX / base; /* Value above which, the next digit 374 processed would overflow */ 375 const I32 input_flags = *flags; 376 const bool allow_underscores = 377 cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES); 378 bool overflowed = FALSE; 379 380 /* In overflows, this keeps track of how much to multiply the overflowed NV 381 * by as we continue to parse the remaining digits */ 382 NV factor = 0; 383 384 /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It 385 * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to 386 * find the numeric value of a digit. That requires more instructions than 387 * OCTAL_VALUE would, but gives the same result for the narrowed range of 388 * octal digits; same for binary. If it were ever critical to squeeze more 389 * performance from this, the function could become grok_hex, and a regen 390 * perl script could scan it and write out two edited copies for the other 391 * two functions. That would improve the performance of all three 392 * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra 393 * parameters are now passed to this to avoid conditionals. Those could 394 * become declared consts, like: 395 * const U8 base = 16; 396 * const U8 base = 8; 397 * ... 398 */ 399 400 PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX; 401 402 ASSUME(inRANGE(shift, 1, 4) && shift != 2); 403 404 /* Clear output flags; unlikely to find a problem that sets them */ 405 *flags = 0; 406 407 if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) { 408 409 /* strip off leading b or 0b; x or 0x. 410 for compatibility silently suffer "b" and "0b" as valid binary; "x" 411 and "0x" as valid hex numbers. */ 412 if (len >= 1) { 413 if (isALPHA_FOLD_EQ(s0[0], prefix)) { 414 s0++; 415 len--; 416 } 417 else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) { 418 s0+=2; 419 len-=2; 420 } 421 } 422 } 423 424 s = s0; /* s0 potentially advanced from 'start' */ 425 426 /* Unroll the loop so that the first 8 digits are branchless except for the 427 * switch. A ninth hex one overflows a 32 bit word. */ 428 switch (len) { 429 case 0: 430 return 0; 431 default: 432 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 433 value = (value << shift) | XDIGIT_VALUE(*s); 434 s++; 435 /* FALLTHROUGH */ 436 case 7: 437 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 438 value = (value << shift) | XDIGIT_VALUE(*s); 439 s++; 440 /* FALLTHROUGH */ 441 case 6: 442 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 443 value = (value << shift) | XDIGIT_VALUE(*s); 444 s++; 445 /* FALLTHROUGH */ 446 case 5: 447 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 448 value = (value << shift) | XDIGIT_VALUE(*s); 449 s++; 450 /* FALLTHROUGH */ 451 case 4: 452 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 453 value = (value << shift) | XDIGIT_VALUE(*s); 454 s++; 455 /* FALLTHROUGH */ 456 case 3: 457 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 458 value = (value << shift) | XDIGIT_VALUE(*s); 459 s++; 460 /* FALLTHROUGH */ 461 case 2: 462 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 463 value = (value << shift) | XDIGIT_VALUE(*s); 464 s++; 465 /* FALLTHROUGH */ 466 case 1: 467 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break; 468 value = (value << shift) | XDIGIT_VALUE(*s); 469 470 if (LIKELY(len <= 8)) { 471 return value; 472 } 473 474 s++; 475 break; 476 } 477 478 bytes_so_far = s - s0; 479 factor = shift << bytes_so_far; 480 len -= bytes_so_far; 481 482 for (; len--; s++) { 483 if (_generic_isCC(*s, class_bit)) { 484 /* Write it in this wonky order with a goto to attempt to get the 485 compiler to make the common case integer-only loop pretty tight. 486 With gcc seems to be much straighter code than old scan_hex. 487 (khw suspects that adding a LIKELY() just above would do the 488 same thing) */ 489 redo: 490 if (LIKELY(value <= max_div)) { 491 value = (value << shift) | XDIGIT_VALUE(*s); 492 /* Note XDIGIT_VALUE() is branchless, works on binary 493 * and octal as well, so can be used here, without 494 * slowing those down */ 495 factor *= 1 << shift; 496 continue; 497 } 498 499 /* Bah. We are about to overflow. Instead, add the unoverflowed 500 * value to an NV that contains an approximation to the correct 501 * value. Each time through the loop we have increased 'factor' so 502 * that it gives how much the current approximation needs to 503 * effectively be shifted to make room for this new value */ 504 value_nv *= factor; 505 value_nv += (NV) value; 506 507 /* Then we keep accumulating digits, until all are parsed. We 508 * start over using the current input value. This will be added to 509 * 'value_nv' eventually, either when all digits are gone, or we 510 * have overflowed this fresh start. */ 511 value = XDIGIT_VALUE(*s); 512 factor = 1 << shift; 513 514 if (! overflowed) { 515 overflowed = TRUE; 516 if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW) 517 && ckWARN_d(WARN_OVERFLOW)) 518 { 519 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), 520 "Integer overflow in %s number", 521 (base == 16) ? "hexadecimal" 522 : (base == 2) 523 ? "binary" 524 : "octal"); 525 } 526 } 527 continue; 528 } 529 530 if ( *s == '_' 531 && len 532 && allow_underscores 533 && _generic_isCC(s[1], class_bit) 534 535 /* Don't allow a leading underscore if the only-medial bit is 536 * set */ 537 && ( LIKELY(s > s0) 538 || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES) 539 != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES))) 540 { 541 --len; 542 ++s; 543 goto redo; 544 } 545 546 if (*s) { 547 if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT) 548 && ckWARN(WARN_DIGIT)) 549 { 550 if (base != 8) { 551 Perl_warner(aTHX_ packWARN(WARN_DIGIT), 552 "Illegal %s digit '%c' ignored", 553 ((base == 2) 554 ? "binary" 555 : "hexadecimal"), 556 *s); 557 } 558 else if (isDIGIT(*s)) { /* octal base */ 559 560 /* Allow \octal to work the DWIM way (that is, stop 561 * scanning as soon as non-octal characters are seen, 562 * complain only if someone seems to want to use the digits 563 * eight and nine. Since we know it is not octal, then if 564 * isDIGIT, must be an 8 or 9). */ 565 Perl_warner(aTHX_ packWARN(WARN_DIGIT), 566 "Illegal octal digit '%c' ignored", *s); 567 } 568 } 569 570 if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) { 571 *flags |= PERL_SCAN_NOTIFY_ILLDIGIT; 572 } 573 } 574 575 break; 576 } 577 578 *len_p = s - start; 579 580 if (LIKELY(! overflowed)) { 581 #if UVSIZE > 4 582 if ( UNLIKELY(value > 0xffffffff) 583 && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE)) 584 { 585 output_non_portable(base); 586 *flags |= PERL_SCAN_SILENT_NON_PORTABLE; 587 } 588 #endif 589 return value; 590 } 591 592 /* Overflowed: Calculate the final overflow approximation */ 593 value_nv *= factor; 594 value_nv += (NV) value; 595 596 output_non_portable(base); 597 598 *flags |= PERL_SCAN_GREATER_THAN_UV_MAX 599 | PERL_SCAN_SILENT_NON_PORTABLE; 600 if (result) 601 *result = value_nv; 602 return UV_MAX; 603 } 604 605 /* 606 =for apidoc scan_bin 607 608 For backwards compatibility. Use C<grok_bin> instead. 609 610 =for apidoc scan_hex 611 612 For backwards compatibility. Use C<grok_hex> instead. 613 614 =for apidoc scan_oct 615 616 For backwards compatibility. Use C<grok_oct> instead. 617 618 =cut 619 */ 620 621 NV 622 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 623 { 624 NV rnv; 625 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 626 const UV ruv = grok_bin (start, &len, &flags, &rnv); 627 628 PERL_ARGS_ASSERT_SCAN_BIN; 629 630 *retlen = len; 631 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 632 } 633 634 NV 635 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 636 { 637 NV rnv; 638 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 639 const UV ruv = grok_oct (start, &len, &flags, &rnv); 640 641 PERL_ARGS_ASSERT_SCAN_OCT; 642 643 *retlen = len; 644 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 645 } 646 647 NV 648 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen) 649 { 650 NV rnv; 651 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0; 652 const UV ruv = grok_hex (start, &len, &flags, &rnv); 653 654 PERL_ARGS_ASSERT_SCAN_HEX; 655 656 *retlen = len; 657 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv; 658 } 659 660 /* 661 =for apidoc grok_numeric_radix 662 663 Scan and skip for a numeric decimal separator (radix). 664 665 =cut 666 */ 667 bool 668 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) 669 { 670 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX; 671 672 #ifdef USE_LOCALE_NUMERIC 673 674 if (IN_LC(LC_NUMERIC)) { 675 STRLEN len; 676 char * radix; 677 bool matches_radix = FALSE; 678 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 679 680 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING(); 681 682 radix = SvPV(PL_numeric_radix_sv, len); 683 radix = savepvn(radix, len); 684 685 RESTORE_LC_NUMERIC(); 686 687 if (*sp + len <= send) { 688 matches_radix = memEQ(*sp, radix, len); 689 } 690 691 Safefree(radix); 692 693 if (matches_radix) { 694 *sp += len; 695 return TRUE; 696 } 697 } 698 699 #endif 700 701 /* always try "." if numeric radix didn't match because 702 * we may have data from different locales mixed */ 703 if (*sp < send && **sp == '.') { 704 ++*sp; 705 return TRUE; 706 } 707 708 return FALSE; 709 } 710 711 /* 712 =for apidoc grok_infnan 713 714 Helper for C<grok_number()>, accepts various ways of spelling "infinity" 715 or "not a number", and returns one of the following flag combinations: 716 717 IS_NUMBER_INFINITY 718 IS_NUMBER_NAN 719 IS_NUMBER_INFINITY | IS_NUMBER_NEG 720 IS_NUMBER_NAN | IS_NUMBER_NEG 721 0 722 723 possibly |-ed with C<IS_NUMBER_TRAILING>. 724 725 If an infinity or a not-a-number is recognized, C<*sp> will point to 726 one byte past the end of the recognized string. If the recognition fails, 727 zero is returned, and C<*sp> will not move. 728 729 =for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX 730 =for apidoc Amn|bool|IS_NUMBER_INFINITY 731 =for apidoc Amn|bool|IS_NUMBER_IN_UV 732 =for apidoc Amn|bool|IS_NUMBER_NAN 733 =for apidoc Amn|bool|IS_NUMBER_NEG 734 =for apidoc Amn|bool|IS_NUMBER_NOT_INT 735 736 =cut 737 */ 738 739 int 740 Perl_grok_infnan(pTHX_ const char** sp, const char* send) 741 { 742 const char* s = *sp; 743 int flags = 0; 744 #if defined(NV_INF) || defined(NV_NAN) 745 bool odh = FALSE; /* one-dot-hash: 1.#INF */ 746 747 PERL_ARGS_ASSERT_GROK_INFNAN; 748 749 if (*s == '+') { 750 s++; if (s == send) return 0; 751 } 752 else if (*s == '-') { 753 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */ 754 s++; if (s == send) return 0; 755 } 756 757 if (*s == '1') { 758 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN) 759 * Let's keep the dot optional. */ 760 s++; if (s == send) return 0; 761 if (*s == '.') { 762 s++; if (s == send) return 0; 763 } 764 if (*s == '#') { 765 s++; if (s == send) return 0; 766 } else 767 return 0; 768 odh = TRUE; 769 } 770 771 if (isALPHA_FOLD_EQ(*s, 'I')) { 772 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */ 773 774 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 775 s++; if (s == send) return 0; 776 if (isALPHA_FOLD_EQ(*s, 'F')) { 777 s++; 778 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) { 779 int fail = 780 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING; 781 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail; 782 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail; 783 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail; 784 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail; 785 s++; 786 } else if (odh) { 787 while (s < send && *s == '0') { /* 1.#INF00 */ 788 s++; 789 } 790 } 791 while (s < send && isSPACE(*s)) 792 s++; 793 if (s < send && *s) { 794 flags |= IS_NUMBER_TRAILING; 795 } 796 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; 797 } 798 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */ 799 s++; 800 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 801 while (s < send && *s == '0') { /* 1.#IND00 */ 802 s++; 803 } 804 if (s < send && *s) { 805 flags |= IS_NUMBER_TRAILING; 806 } 807 } else 808 return 0; 809 } 810 else { 811 /* Maybe NAN of some sort */ 812 813 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) { 814 /* snan, qNaN */ 815 /* XXX do something with the snan/qnan difference */ 816 s++; if (s == send) return 0; 817 } 818 819 if (isALPHA_FOLD_EQ(*s, 'N')) { 820 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0; 821 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; 822 s++; 823 824 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; 825 if (s == send) { 826 return flags; 827 } 828 829 /* NaN can be followed by various stuff (NaNQ, NaNS), but 830 * there are also multiple different NaN values, and some 831 * implementations output the "payload" values, 832 * e.g. NaN123, NAN(abc), while some legacy implementations 833 * have weird stuff like NaN%. */ 834 if (isALPHA_FOLD_EQ(*s, 'q') || 835 isALPHA_FOLD_EQ(*s, 's')) { 836 /* "nanq" or "nans" are ok, though generating 837 * these portably is tricky. */ 838 s++; 839 if (s == send) { 840 return flags; 841 } 842 } 843 if (*s == '(') { 844 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */ 845 const char *t; 846 s++; 847 if (s == send) { 848 return flags | IS_NUMBER_TRAILING; 849 } 850 t = s + 1; 851 while (t < send && *t && *t != ')') { 852 t++; 853 } 854 if (t == send) { 855 return flags | IS_NUMBER_TRAILING; 856 } 857 if (*t == ')') { 858 int nantype; 859 UV nanval; 860 if (s[0] == '0' && s + 2 < t && 861 isALPHA_FOLD_EQ(s[1], 'x') && 862 isXDIGIT(s[2])) { 863 STRLEN len = t - s; 864 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 865 nanval = grok_hex(s, &len, &flags, NULL); 866 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 867 nantype = 0; 868 } else { 869 nantype = IS_NUMBER_IN_UV; 870 } 871 s += len; 872 } else if (s[0] == '0' && s + 2 < t && 873 isALPHA_FOLD_EQ(s[1], 'b') && 874 (s[2] == '0' || s[2] == '1')) { 875 STRLEN len = t - s; 876 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES; 877 nanval = grok_bin(s, &len, &flags, NULL); 878 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) { 879 nantype = 0; 880 } else { 881 nantype = IS_NUMBER_IN_UV; 882 } 883 s += len; 884 } else { 885 const char *u; 886 nantype = 887 grok_number_flags(s, t - s, &nanval, 888 PERL_SCAN_TRAILING | 889 PERL_SCAN_ALLOW_UNDERSCORES); 890 /* Unfortunately grok_number_flags() doesn't 891 * tell how far we got and the ')' will always 892 * be "trailing", so we need to double-check 893 * whether we had something dubious. */ 894 for (u = s; u < t; u++) { 895 if (!isDIGIT(*u)) { 896 flags |= IS_NUMBER_TRAILING; 897 break; 898 } 899 } 900 s = u; 901 } 902 903 /* XXX Doesn't do octal: nan("0123"). 904 * Probably not a big loss. */ 905 906 if ((nantype & IS_NUMBER_NOT_INT) || 907 !(nantype && IS_NUMBER_IN_UV)) { 908 /* XXX the nanval is currently unused, that is, 909 * not inserted as the NaN payload of the NV. 910 * But the above code already parses the C99 911 * nan(...) format. See below, and see also 912 * the nan() in POSIX.xs. 913 * 914 * Certain configuration combinations where 915 * NVSIZE is greater than UVSIZE mean that 916 * a single UV cannot contain all the possible 917 * NaN payload bits. There would need to be 918 * some more generic syntax than "nan($uv)". 919 * 920 * Issues to keep in mind: 921 * 922 * (1) In most common cases there would 923 * not be an integral number of bytes that 924 * could be set, only a certain number of bits. 925 * For example for the common case of 926 * NVSIZE == UVSIZE == 8 there is room for 52 927 * bits in the payload, but the most significant 928 * bit is commonly reserved for the 929 * signaling/quiet bit, leaving 51 bits. 930 * Furthermore, the C99 nan() is supposed 931 * to generate quiet NaNs, so it is doubtful 932 * whether it should be able to generate 933 * signaling NaNs. For the x86 80-bit doubles 934 * (if building a long double Perl) there would 935 * be 62 bits (s/q bit being the 63rd). 936 * 937 * (2) Endianness of the payload bits. If the 938 * payload is specified as an UV, the low-order 939 * bits of the UV are naturally little-endianed 940 * (rightmost) bits of the payload. The endianness 941 * of UVs and NVs can be different. */ 942 return 0; 943 } 944 if (s < t) { 945 flags |= IS_NUMBER_TRAILING; 946 } 947 } else { 948 /* Looked like nan(...), but no close paren. */ 949 flags |= IS_NUMBER_TRAILING; 950 } 951 } else { 952 while (s < send && isSPACE(*s)) 953 s++; 954 if (s < send && *s) { 955 /* Note that we here implicitly accept (parse as 956 * "nan", but with warnings) also any other weird 957 * trailing stuff for "nan". In the above we just 958 * check that if we got the C99-style "nan(...)", 959 * the "..." looks sane. 960 * If in future we accept more ways of specifying 961 * the nan payload, the accepting would happen around 962 * here. */ 963 flags |= IS_NUMBER_TRAILING; 964 } 965 } 966 s = send; 967 } 968 else 969 return 0; 970 } 971 972 while (s < send && isSPACE(*s)) 973 s++; 974 975 #else 976 PERL_UNUSED_ARG(send); 977 #endif /* #if defined(NV_INF) || defined(NV_NAN) */ 978 *sp = s; 979 return flags; 980 } 981 982 /* 983 =for apidoc grok_number_flags 984 985 Recognise (or not) a number. The type of the number is returned 986 (0 if unrecognised), otherwise it is a bit-ORed combination of 987 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>, 988 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h). 989 990 If the value of the number can fit in a UV, it is returned in C<*valuep>. 991 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV> 992 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned 993 to during processing even though C<IS_NUMBER_IN_UV> is not set on return. 994 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when 995 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur. 996 997 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were 998 seen (in which case C<*valuep> gives the true value truncated to an integer), and 999 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the 1000 absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the 1001 number is larger than a UV. 1002 1003 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing 1004 non-numeric text on an otherwise successful I<grok>, setting 1005 C<IS_NUMBER_TRAILING> on the result. 1006 1007 =for apidoc Amnh||PERL_SCAN_TRAILING 1008 1009 =for apidoc grok_number 1010 1011 Identical to C<grok_number_flags()> with C<flags> set to zero. 1012 1013 =cut 1014 */ 1015 int 1016 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) 1017 { 1018 PERL_ARGS_ASSERT_GROK_NUMBER; 1019 1020 return grok_number_flags(pv, len, valuep, 0); 1021 } 1022 1023 static const UV uv_max_div_10 = UV_MAX / 10; 1024 static const U8 uv_max_mod_10 = UV_MAX % 10; 1025 1026 int 1027 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) 1028 { 1029 const char *s = pv; 1030 const char * const send = pv + len; 1031 const char *d; 1032 int numtype = 0; 1033 1034 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS; 1035 1036 if (UNLIKELY(isSPACE(*s))) { 1037 s++; 1038 while (s < send) { 1039 if (LIKELY(! isSPACE(*s))) goto non_space; 1040 s++; 1041 } 1042 return 0; 1043 non_space: ; 1044 } 1045 1046 /* See if signed. This assumes it is more likely to be unsigned, so 1047 * penalizes signed by an extra conditional; rewarding unsigned by one fewer 1048 * (because we detect '+' and '-' with a single test and then add a 1049 * conditional to determine which) */ 1050 if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) { 1051 1052 /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-', 1053 * 2F = '/'. That is, it is either a sign, or a character that doesn't 1054 * belong in a number at all (unless it's a radix character in a weird 1055 * locale). Given this, it's far more likely to be a minus than the 1056 * others. (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E, (not 40 1057 * because can't be a space) 60, 62, 64, 66, 68, 6A, 6C, 6E. Again, 1058 * only potentially a weird radix character, or 4E='+', or 60='-') */ 1059 if (LIKELY(*s == '-')) { 1060 s++; 1061 numtype = IS_NUMBER_NEG; 1062 } 1063 else if (LIKELY(*s == '+')) 1064 s++; 1065 else /* Can't just return failure here, as it could be a weird radix 1066 character */ 1067 goto done_sign; 1068 1069 if (UNLIKELY(s == send)) 1070 return 0; 1071 done_sign: ; 1072 } 1073 1074 /* The first digit (after optional sign): note that might 1075 * also point to "infinity" or "nan", or "1.#INF". */ 1076 d = s; 1077 1078 /* next must be digit or the radix separator or beginning of infinity/nan */ 1079 if (LIKELY(isDIGIT(*s))) { 1080 /* UVs are at least 32 bits, so the first 9 decimal digits cannot 1081 overflow. */ 1082 UV value = *s - '0'; /* Process this first (perhaps only) digit */ 1083 int digit; 1084 1085 s++; 1086 1087 switch(send - s) { 1088 default: /* 8 or more remaining characters */ 1089 digit = *s - '0'; 1090 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1091 value = value * 10 + digit; 1092 s++; 1093 /* FALLTHROUGH */ 1094 case 7: 1095 digit = *s - '0'; 1096 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1097 value = value * 10 + digit; 1098 s++; 1099 /* FALLTHROUGH */ 1100 case 6: 1101 digit = *s - '0'; 1102 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1103 value = value * 10 + digit; 1104 s++; 1105 /* FALLTHROUGH */ 1106 case 5: 1107 digit = *s - '0'; 1108 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1109 value = value * 10 + digit; 1110 s++; 1111 /* FALLTHROUGH */ 1112 case 4: 1113 digit = *s - '0'; 1114 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1115 value = value * 10 + digit; 1116 s++; 1117 /* FALLTHROUGH */ 1118 case 3: 1119 digit = *s - '0'; 1120 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1121 value = value * 10 + digit; 1122 s++; 1123 /* FALLTHROUGH */ 1124 case 2: 1125 digit = *s - '0'; 1126 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1127 value = value * 10 + digit; 1128 s++; 1129 /* FALLTHROUGH */ 1130 case 1: 1131 digit = *s - '0'; 1132 if (UNLIKELY(! inRANGE(digit, 0, 9))) break; 1133 value = value * 10 + digit; 1134 s++; 1135 /* FALLTHROUGH */ 1136 case 0: /* This case means the string consists of just the one 1137 digit we already have processed */ 1138 1139 /* If we got here by falling through other than the default: case, we 1140 * have processed the whole string, and know it consists entirely of 1141 * digits, and can't have overflowed. */ 1142 if (s >= send) { 1143 if (valuep) 1144 *valuep = value; 1145 return numtype|IS_NUMBER_IN_UV; 1146 } 1147 1148 /* Here, there are extra characters beyond the first 9 digits. Use a 1149 * loop to accumulate any remaining digits, until we get a non-digit or 1150 * would overflow. Note that leading zeros could cause us to get here 1151 * without being close to overflowing. 1152 * 1153 * (The conditional 's >= send' above could be eliminated by making the 1154 * default: in the switch to instead be 'case 8:', and process longer 1155 * strings separately by using the loop below. This would penalize 1156 * these inputs by the extra instructions needed for looping. That 1157 * could be eliminated by copying the unwound code from above to handle 1158 * the firt 9 digits of these. khw didn't think this saving of a 1159 * single conditional was worth it.) */ 1160 do { 1161 digit = *s - '0'; 1162 if (! inRANGE(digit, 0, 9)) goto mantissa_done; 1163 if ( value < uv_max_div_10 1164 || ( value == uv_max_div_10 1165 && digit <= uv_max_mod_10)) 1166 { 1167 value = value * 10 + digit; 1168 s++; 1169 } 1170 else { /* value would overflow. skip the remaining digits, don't 1171 worry about setting *valuep. */ 1172 do { 1173 s++; 1174 } while (s < send && isDIGIT(*s)); 1175 numtype |= 1176 IS_NUMBER_GREATER_THAN_UV_MAX; 1177 goto skip_value; 1178 } 1179 } while (s < send); 1180 } /* End switch on input length */ 1181 1182 mantissa_done: 1183 numtype |= IS_NUMBER_IN_UV; 1184 if (valuep) 1185 *valuep = value; 1186 1187 skip_value: 1188 if (GROK_NUMERIC_RADIX(&s, send)) { 1189 numtype |= IS_NUMBER_NOT_INT; 1190 while (s < send && isDIGIT(*s)) /* optional digits after the radix */ 1191 s++; 1192 } 1193 } /* End of *s is a digit */ 1194 else if (GROK_NUMERIC_RADIX(&s, send)) { 1195 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ 1196 /* no digits before the radix means we need digits after it */ 1197 if (s < send && isDIGIT(*s)) { 1198 do { 1199 s++; 1200 } while (s < send && isDIGIT(*s)); 1201 if (valuep) { 1202 /* integer approximation is valid - it's 0. */ 1203 *valuep = 0; 1204 } 1205 } 1206 else 1207 return 0; 1208 } 1209 1210 if (LIKELY(s > d) && s < send) { 1211 /* we can have an optional exponent part */ 1212 if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) { 1213 s++; 1214 if (s < send && (*s == '-' || *s == '+')) 1215 s++; 1216 if (s < send && isDIGIT(*s)) { 1217 do { 1218 s++; 1219 } while (s < send && isDIGIT(*s)); 1220 } 1221 else if (flags & PERL_SCAN_TRAILING) 1222 return numtype | IS_NUMBER_TRAILING; 1223 else 1224 return 0; 1225 1226 /* The only flag we keep is sign. Blow away any "it's UV" */ 1227 numtype &= IS_NUMBER_NEG; 1228 numtype |= IS_NUMBER_NOT_INT; 1229 } 1230 } 1231 1232 while (s < send) { 1233 if (LIKELY(! isSPACE(*s))) goto end_space; 1234 s++; 1235 } 1236 return numtype; 1237 1238 end_space: 1239 1240 if (UNLIKELY(memEQs(pv, len, "0 but true"))) { 1241 if (valuep) 1242 *valuep = 0; 1243 return IS_NUMBER_IN_UV; 1244 } 1245 1246 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */ 1247 if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) { 1248 /* Really detect inf/nan. Start at d, not s, since the above 1249 * code might have already consumed the "1." or "1". */ 1250 const int infnan = Perl_grok_infnan(aTHX_ &d, send); 1251 if ((infnan & IS_NUMBER_INFINITY)) { 1252 return (numtype | infnan); /* Keep sign for infinity. */ 1253 } 1254 else if ((infnan & IS_NUMBER_NAN)) { 1255 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */ 1256 } 1257 } 1258 else if (flags & PERL_SCAN_TRAILING) { 1259 return numtype | IS_NUMBER_TRAILING; 1260 } 1261 1262 return 0; 1263 } 1264 1265 /* 1266 =for apidoc grok_atoUV 1267 1268 parse a string, looking for a decimal unsigned integer. 1269 1270 On entry, C<pv> points to the beginning of the string; 1271 C<valptr> points to a UV that will receive the converted value, if found; 1272 C<endptr> is either NULL or points to a variable that points to one byte 1273 beyond the point in C<pv> that this routine should examine. 1274 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated. 1275 1276 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with 1277 no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that 1278 value. 1279 1280 If you constrain the portion of C<pv> that is looked at by this function (by 1281 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a 1282 valid value, it will return TRUE, setting C<*endptr> to the byte following the 1283 final digit of the value. But if there is no constraint at what's looked at, 1284 all of C<pv> must be valid in order for TRUE to be returned. C<*endptr> is 1285 unchanged from its value on input if FALSE is returned; 1286 1287 The only characters this accepts are the decimal digits '0'..'9'. 1288 1289 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional 1290 leading whitespace, nor negative inputs. If such features are required, the 1291 calling code needs to explicitly implement those. 1292 1293 Note that this function returns FALSE for inputs that would overflow a UV, 1294 or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor 1295 C<01>, C<002>, I<etc>. 1296 1297 Background: C<atoi> has severe problems with illegal inputs, it cannot be 1298 used for incremental parsing, and therefore should be avoided 1299 C<atoi> and C<strtol> are also affected by locale settings, which can also be 1300 seen as a bug (global state controlled by user environment). 1301 1302 =cut 1303 1304 */ 1305 1306 bool 1307 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) 1308 { 1309 const char* s = pv; 1310 const char** eptr; 1311 const char* end2; /* Used in case endptr is NULL. */ 1312 UV val = 0; /* The parsed value. */ 1313 1314 PERL_ARGS_ASSERT_GROK_ATOUV; 1315 1316 if (endptr) { 1317 eptr = endptr; 1318 } 1319 else { 1320 end2 = s + strlen(s); 1321 eptr = &end2; 1322 } 1323 1324 if ( *eptr <= s 1325 || ! isDIGIT(*s)) 1326 { 1327 return FALSE; 1328 } 1329 1330 /* Single-digit inputs are quite common. */ 1331 val = *s++ - '0'; 1332 if (s < *eptr && isDIGIT(*s)) { 1333 /* Fail on extra leading zeros. */ 1334 if (val == 0) 1335 return FALSE; 1336 while (s < *eptr && isDIGIT(*s)) { 1337 /* This could be unrolled like in grok_number(), but 1338 * the expected uses of this are not speed-needy, and 1339 * unlikely to need full 64-bitness. */ 1340 const U8 digit = *s++ - '0'; 1341 if (val < uv_max_div_10 || 1342 (val == uv_max_div_10 && digit <= uv_max_mod_10)) { 1343 val = val * 10 + digit; 1344 } else { 1345 return FALSE; 1346 } 1347 } 1348 } 1349 1350 if (endptr == NULL) { 1351 if (*s) { 1352 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ 1353 } 1354 } 1355 else { 1356 *endptr = s; 1357 } 1358 1359 *valptr = val; 1360 return TRUE; 1361 } 1362 1363 #ifndef Perl_strtod 1364 STATIC NV 1365 S_mulexp10(NV value, I32 exponent) 1366 { 1367 NV result = 1.0; 1368 NV power = 10.0; 1369 bool negative = 0; 1370 I32 bit; 1371 1372 if (exponent == 0) 1373 return value; 1374 if (value == 0) 1375 return (NV)0; 1376 1377 /* On OpenVMS VAX we by default use the D_FLOAT double format, 1378 * and that format does not have *easy* capabilities [1] for 1379 * overflowing doubles 'silently' as IEEE fp does. We also need 1380 * to support G_FLOAT on both VAX and Alpha, and though the exponent 1381 * range is much larger than D_FLOAT it still doesn't do silent 1382 * overflow. Therefore we need to detect early whether we would 1383 * overflow (this is the behaviour of the native string-to-float 1384 * conversion routines, and therefore of native applications, too). 1385 * 1386 * [1] Trying to establish a condition handler to trap floating point 1387 * exceptions is not a good idea. */ 1388 1389 /* In UNICOS and in certain Cray models (such as T90) there is no 1390 * IEEE fp, and no way at all from C to catch fp overflows gracefully. 1391 * There is something you can do if you are willing to use some 1392 * inline assembler: the instruction is called DFI-- but that will 1393 * disable *all* floating point interrupts, a little bit too large 1394 * a hammer. Therefore we need to catch potential overflows before 1395 * it's too late. */ 1396 1397 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP) 1398 STMT_START { 1399 const NV exp_v = log10(value); 1400 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP) 1401 return NV_MAX; 1402 if (exponent < 0) { 1403 if (-(exponent + exp_v) >= NV_MAX_10_EXP) 1404 return 0.0; 1405 while (-exponent >= NV_MAX_10_EXP) { 1406 /* combination does not overflow, but 10^(-exponent) does */ 1407 value /= 10; 1408 ++exponent; 1409 } 1410 } 1411 } STMT_END; 1412 #endif 1413 1414 if (exponent < 0) { 1415 negative = 1; 1416 exponent = -exponent; 1417 #ifdef NV_MAX_10_EXP 1418 /* for something like 1234 x 10^-309, the action of calculating 1419 * the intermediate value 10^309 then returning 1234 / (10^309) 1420 * will fail, since 10^309 becomes infinity. In this case try to 1421 * refactor it as 123 / (10^308) etc. 1422 */ 1423 while (value && exponent > NV_MAX_10_EXP) { 1424 exponent--; 1425 value /= 10; 1426 } 1427 if (value == 0.0) 1428 return value; 1429 #endif 1430 } 1431 #if defined(__osf__) 1432 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV) 1433 * Tru64 fp behavior on inf/nan is somewhat broken. Another way 1434 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF) 1435 * but that breaks another set of infnan.t tests. */ 1436 # define FP_OVERFLOWS_TO_ZERO 1437 #endif 1438 for (bit = 1; exponent; bit <<= 1) { 1439 if (exponent & bit) { 1440 exponent ^= bit; 1441 result *= power; 1442 #ifdef FP_OVERFLOWS_TO_ZERO 1443 if (result == 0) 1444 # ifdef NV_INF 1445 return value < 0 ? -NV_INF : NV_INF; 1446 # else 1447 return value < 0 ? -FLT_MAX : FLT_MAX; 1448 # endif 1449 #endif 1450 /* Floating point exceptions are supposed to be turned off, 1451 * but if we're obviously done, don't risk another iteration. 1452 */ 1453 if (exponent == 0) break; 1454 } 1455 power *= power; 1456 } 1457 return negative ? value / result : value * result; 1458 } 1459 #endif /* #ifndef Perl_strtod */ 1460 1461 #ifdef Perl_strtod 1462 # define ATOF(s, x) my_atof2(s, &x) 1463 #else 1464 # define ATOF(s, x) Perl_atof2(s, x) 1465 #endif 1466 1467 NV 1468 Perl_my_atof(pTHX_ const char* s) 1469 { 1470 /* 's' must be NUL terminated */ 1471 1472 NV x = 0.0; 1473 1474 PERL_ARGS_ASSERT_MY_ATOF; 1475 1476 #if ! defined(USE_LOCALE_NUMERIC) 1477 1478 ATOF(s, x); 1479 1480 #else 1481 1482 { 1483 DECLARATION_FOR_LC_NUMERIC_MANIPULATION; 1484 STORE_LC_NUMERIC_SET_TO_NEEDED(); 1485 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) { 1486 ATOF(s,x); 1487 } 1488 else { 1489 1490 /* Look through the string for the first thing that looks like a 1491 * decimal point: either the value in the current locale or the 1492 * standard fallback of '.'. The one which appears earliest in the 1493 * input string is the one that we should have atof look for. Note 1494 * that we have to determine this beforehand because on some 1495 * systems, Perl_atof2 is just a wrapper around the system's atof. 1496 * */ 1497 const char * const standard_pos = strchr(s, '.'); 1498 const char * const local_pos 1499 = strstr(s, SvPV_nolen(PL_numeric_radix_sv)); 1500 const bool use_standard_radix 1501 = standard_pos && (!local_pos || standard_pos < local_pos); 1502 1503 if (use_standard_radix) { 1504 SET_NUMERIC_STANDARD(); 1505 LOCK_LC_NUMERIC_STANDARD(); 1506 } 1507 1508 ATOF(s,x); 1509 1510 if (use_standard_radix) { 1511 UNLOCK_LC_NUMERIC_STANDARD(); 1512 SET_NUMERIC_UNDERLYING(); 1513 } 1514 } 1515 RESTORE_LC_NUMERIC(); 1516 } 1517 1518 #endif 1519 1520 return x; 1521 } 1522 1523 #if defined(NV_INF) || defined(NV_NAN) 1524 1525 static char* 1526 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value) 1527 { 1528 const char *p0 = negative ? s - 1 : s; 1529 const char *p = p0; 1530 const int infnan = grok_infnan(&p, send); 1531 if (infnan && p != p0) { 1532 /* If we can generate inf/nan directly, let's do so. */ 1533 #ifdef NV_INF 1534 if ((infnan & IS_NUMBER_INFINITY)) { 1535 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF; 1536 return (char*)p; 1537 } 1538 #endif 1539 #ifdef NV_NAN 1540 if ((infnan & IS_NUMBER_NAN)) { 1541 *value = NV_NAN; 1542 return (char*)p; 1543 } 1544 #endif 1545 #ifdef Perl_strtod 1546 /* If still here, we didn't have either NV_INF or NV_NAN, 1547 * and can try falling back to native strtod/strtold. 1548 * 1549 * The native interface might not recognize all the possible 1550 * inf/nan strings Perl recognizes. What we can try 1551 * is to try faking the input. We will try inf/-inf/nan 1552 * as the most promising/portable input. */ 1553 { 1554 const char* fake = "silence compiler warning"; 1555 char* endp; 1556 NV nv; 1557 #ifdef NV_INF 1558 if ((infnan & IS_NUMBER_INFINITY)) { 1559 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf"; 1560 } 1561 #endif 1562 #ifdef NV_NAN 1563 if ((infnan & IS_NUMBER_NAN)) { 1564 fake = "nan"; 1565 } 1566 #endif 1567 assert(strNE(fake, "silence compiler warning")); 1568 nv = S_strtod(aTHX_ fake, &endp); 1569 if (fake != endp) { 1570 #ifdef NV_INF 1571 if ((infnan & IS_NUMBER_INFINITY)) { 1572 # ifdef Perl_isinf 1573 if (Perl_isinf(nv)) 1574 *value = nv; 1575 # else 1576 /* last resort, may generate SIGFPE */ 1577 *value = Perl_exp((NV)1e9); 1578 if ((infnan & IS_NUMBER_NEG)) 1579 *value = -*value; 1580 # endif 1581 return (char*)p; /* p, not endp */ 1582 } 1583 #endif 1584 #ifdef NV_NAN 1585 if ((infnan & IS_NUMBER_NAN)) { 1586 # ifdef Perl_isnan 1587 if (Perl_isnan(nv)) 1588 *value = nv; 1589 # else 1590 /* last resort, may generate SIGFPE */ 1591 *value = Perl_log((NV)-1.0); 1592 # endif 1593 return (char*)p; /* p, not endp */ 1594 #endif 1595 } 1596 } 1597 } 1598 #endif /* #ifdef Perl_strtod */ 1599 } 1600 return NULL; 1601 } 1602 1603 #endif /* if defined(NV_INF) || defined(NV_NAN) */ 1604 1605 char* 1606 Perl_my_atof2(pTHX_ const char* orig, NV* value) 1607 { 1608 PERL_ARGS_ASSERT_MY_ATOF2; 1609 return my_atof3(orig, value, 0); 1610 } 1611 1612 char* 1613 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len) 1614 { 1615 const char* s = orig; 1616 NV result[3] = {0.0, 0.0, 0.0}; 1617 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1618 const char* send = s + ((len != 0) 1619 ? len 1620 : strlen(orig)); /* one past the last */ 1621 bool negative = 0; 1622 #endif 1623 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod) 1624 UV accumulator[2] = {0,0}; /* before/after dp */ 1625 bool seen_digit = 0; 1626 I32 exp_adjust[2] = {0,0}; 1627 I32 exp_acc[2] = {-1, -1}; 1628 /* the current exponent adjust for the accumulators */ 1629 I32 exponent = 0; 1630 I32 seen_dp = 0; 1631 I32 digit = 0; 1632 I32 old_digit = 0; 1633 I32 sig_digits = 0; /* noof significant digits seen so far */ 1634 #endif 1635 1636 #if defined(USE_PERL_ATOF) || defined(Perl_strtod) 1637 PERL_ARGS_ASSERT_MY_ATOF3; 1638 1639 /* leading whitespace */ 1640 while (s < send && isSPACE(*s)) 1641 ++s; 1642 1643 /* sign */ 1644 switch (*s) { 1645 case '-': 1646 negative = 1; 1647 /* FALLTHROUGH */ 1648 case '+': 1649 ++s; 1650 } 1651 #endif 1652 1653 #ifdef Perl_strtod 1654 { 1655 char* endp; 1656 char* copy = NULL; 1657 1658 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1659 return endp; 1660 1661 /* strtold() accepts 0x-prefixed hex and in POSIX implementations, 1662 0b-prefixed binary numbers, which is backward incompatible 1663 */ 1664 if ((len == 0 || len - (s-orig) >= 2) && *s == '0' && 1665 (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) { 1666 *value = 0; 1667 return (char *)s+1; 1668 } 1669 1670 /* If the length is passed in, the input string isn't NUL-terminated, 1671 * and in it turns out the function below assumes it is; therefore we 1672 * create a copy and NUL-terminate that */ 1673 if (len) { 1674 Newx(copy, len + 1, char); 1675 Copy(orig, copy, len, char); 1676 copy[len] = '\0'; 1677 s = copy + (s - orig); 1678 } 1679 1680 result[2] = S_strtod(aTHX_ s, &endp); 1681 1682 /* If we created a copy, 'endp' is in terms of that. Convert back to 1683 * the original */ 1684 if (copy) { 1685 s = (s - copy) + (char *) orig; 1686 endp = (endp - copy) + (char *) orig; 1687 Safefree(copy); 1688 } 1689 1690 if (s != endp) { 1691 *value = negative ? -result[2] : result[2]; 1692 return endp; 1693 } 1694 return NULL; 1695 } 1696 #elif defined(USE_PERL_ATOF) 1697 1698 /* There is no point in processing more significant digits 1699 * than the NV can hold. Note that NV_DIG is a lower-bound value, 1700 * while we need an upper-bound value. We add 2 to account for this; 1701 * since it will have been conservative on both the first and last digit. 1702 * For example a 32-bit mantissa with an exponent of 4 would have 1703 * exact values in the set 1704 * 4 1705 * 8 1706 * .. 1707 * 17179869172 1708 * 17179869176 1709 * 17179869180 1710 * 1711 * where for the purposes of calculating NV_DIG we would have to discount 1712 * both the first and last digit, since neither can hold all values from 1713 * 0..9; but for calculating the value we must examine those two digits. 1714 */ 1715 #ifdef MAX_SIG_DIG_PLUS 1716 /* It is not necessarily the case that adding 2 to NV_DIG gets all the 1717 possible digits in a NV, especially if NVs are not IEEE compliant 1718 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */ 1719 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS) 1720 #else 1721 # define MAX_SIG_DIGITS (NV_DIG+2) 1722 #endif 1723 1724 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */ 1725 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10)) 1726 1727 #if defined(NV_INF) || defined(NV_NAN) 1728 { 1729 char* endp; 1730 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value))) 1731 return endp; 1732 } 1733 #endif 1734 1735 /* we accumulate digits into an integer; when this becomes too 1736 * large, we add the total to NV and start again */ 1737 1738 while (s < send) { 1739 if (isDIGIT(*s)) { 1740 seen_digit = 1; 1741 old_digit = digit; 1742 digit = *s++ - '0'; 1743 if (seen_dp) 1744 exp_adjust[1]++; 1745 1746 /* don't start counting until we see the first significant 1747 * digit, eg the 5 in 0.00005... */ 1748 if (!sig_digits && digit == 0) 1749 continue; 1750 1751 if (++sig_digits > MAX_SIG_DIGITS) { 1752 /* limits of precision reached */ 1753 if (digit > 5) { 1754 ++accumulator[seen_dp]; 1755 } else if (digit == 5) { 1756 if (old_digit % 2) { /* round to even - Allen */ 1757 ++accumulator[seen_dp]; 1758 } 1759 } 1760 if (seen_dp) { 1761 exp_adjust[1]--; 1762 } else { 1763 exp_adjust[0]++; 1764 } 1765 /* skip remaining digits */ 1766 while (s < send && isDIGIT(*s)) { 1767 ++s; 1768 if (! seen_dp) { 1769 exp_adjust[0]++; 1770 } 1771 } 1772 /* warn of loss of precision? */ 1773 } 1774 else { 1775 if (accumulator[seen_dp] > MAX_ACCUMULATE) { 1776 /* add accumulator to result and start again */ 1777 result[seen_dp] = S_mulexp10(result[seen_dp], 1778 exp_acc[seen_dp]) 1779 + (NV)accumulator[seen_dp]; 1780 accumulator[seen_dp] = 0; 1781 exp_acc[seen_dp] = 0; 1782 } 1783 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit; 1784 ++exp_acc[seen_dp]; 1785 } 1786 } 1787 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) { 1788 seen_dp = 1; 1789 if (sig_digits > MAX_SIG_DIGITS) { 1790 while (s < send && isDIGIT(*s)) { 1791 ++s; 1792 } 1793 break; 1794 } 1795 } 1796 else { 1797 break; 1798 } 1799 } 1800 1801 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0]; 1802 if (seen_dp) { 1803 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1]; 1804 } 1805 1806 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) { 1807 bool expnegative = 0; 1808 1809 ++s; 1810 switch (*s) { 1811 case '-': 1812 expnegative = 1; 1813 /* FALLTHROUGH */ 1814 case '+': 1815 ++s; 1816 } 1817 while (s < send && isDIGIT(*s)) 1818 exponent = exponent * 10 + (*s++ - '0'); 1819 if (expnegative) 1820 exponent = -exponent; 1821 } 1822 1823 /* now apply the exponent */ 1824 1825 if (seen_dp) { 1826 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]) 1827 + S_mulexp10(result[1],exponent-exp_adjust[1]); 1828 } else { 1829 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]); 1830 } 1831 1832 /* now apply the sign */ 1833 if (negative) 1834 result[2] = -result[2]; 1835 #endif /* USE_PERL_ATOF */ 1836 *value = result[2]; 1837 return (char *)s; 1838 } 1839 1840 /* 1841 =for apidoc isinfnan 1842 1843 C<Perl_isinfnan()> is a utility function that returns true if the NV 1844 argument is either an infinity or a C<NaN>, false otherwise. To test 1845 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>. 1846 1847 This is also the logical inverse of Perl_isfinite(). 1848 1849 =cut 1850 */ 1851 bool 1852 Perl_isinfnan(NV nv) 1853 { 1854 PERL_UNUSED_ARG(nv); 1855 #ifdef Perl_isinf 1856 if (Perl_isinf(nv)) 1857 return TRUE; 1858 #endif 1859 #ifdef Perl_isnan 1860 if (Perl_isnan(nv)) 1861 return TRUE; 1862 #endif 1863 return FALSE; 1864 } 1865 1866 /* 1867 =for apidoc isinfnansv 1868 1869 Checks whether the argument would be either an infinity or C<NaN> when used 1870 as a number, but is careful not to trigger non-numeric or uninitialized 1871 warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already. 1872 1873 =cut 1874 */ 1875 1876 bool 1877 Perl_isinfnansv(pTHX_ SV *sv) 1878 { 1879 PERL_ARGS_ASSERT_ISINFNANSV; 1880 if (!SvOK(sv)) 1881 return FALSE; 1882 if (SvNOKp(sv)) 1883 return Perl_isinfnan(SvNVX(sv)); 1884 if (SvIOKp(sv)) 1885 return FALSE; 1886 { 1887 STRLEN len; 1888 const char *s = SvPV_nomg_const(sv, len); 1889 return cBOOL(grok_infnan(&s, s+len)); 1890 } 1891 } 1892 1893 #ifndef HAS_MODFL 1894 /* C99 has truncl, pre-C99 Solaris had aintl. We can use either with 1895 * copysignl to emulate modfl, which is in some platforms missing or 1896 * broken. */ 1897 # if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL) 1898 long double 1899 Perl_my_modfl(long double x, long double *ip) 1900 { 1901 *ip = truncl(x); 1902 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1903 } 1904 # elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL) 1905 long double 1906 Perl_my_modfl(long double x, long double *ip) 1907 { 1908 *ip = aintl(x); 1909 return (x == *ip ? copysignl(0.0L, x) : x - *ip); 1910 } 1911 # endif 1912 #endif 1913 1914 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */ 1915 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL) 1916 long double 1917 Perl_my_frexpl(long double x, int *e) { 1918 *e = x == 0.0L ? 0 : ilogbl(x) + 1; 1919 return (scalbnl(x, -*e)); 1920 } 1921 #endif 1922 1923 /* 1924 =for apidoc Perl_signbit 1925 1926 Return a non-zero integer if the sign bit on an NV is set, and 0 if 1927 it is not. 1928 1929 If F<Configure> detects this system has a C<signbit()> that will work with 1930 our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise, 1931 fall back on this implementation. The main use of this function 1932 is catching C<-0.0>. 1933 1934 C<Configure> notes: This function is called C<'Perl_signbit'> instead of a 1935 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()> 1936 function or macro that doesn't happen to work with our particular choice 1937 of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect 1938 the standard system headers to be happy. Also, this is a no-context 1939 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in 1940 F<perl.h> as a simple macro call to the system's C<signbit()>. 1941 Users should just always call C<Perl_signbit()>. 1942 1943 =cut 1944 */ 1945 #if !defined(HAS_SIGNBIT) 1946 int 1947 Perl_signbit(NV x) { 1948 # ifdef Perl_fp_class_nzero 1949 return Perl_fp_class_nzero(x); 1950 /* Try finding the high byte, and assume it's highest bit 1951 * is the sign. This assumption is probably wrong somewhere. */ 1952 # elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN 1953 return (((unsigned char *)&x)[9] & 0x80); 1954 # elif defined(NV_LITTLE_ENDIAN) 1955 /* Note that NVSIZE is sizeof(NV), which would make the below be 1956 * wrong if the end bytes are unused, which happens with the x86 1957 * 80-bit long doubles, which is why take care of that above. */ 1958 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80); 1959 # elif defined(NV_BIG_ENDIAN) 1960 return (((unsigned char *)&x)[0] & 0x80); 1961 # else 1962 /* This last resort fallback is wrong for the negative zero. */ 1963 return (x < 0.0) ? 1 : 0; 1964 # endif 1965 } 1966 #endif 1967 1968 /* 1969 * ex: set ts=8 sts=4 sw=4 et: 1970 */ 1971