187c01607SMichael Jones //===-- String to float conversion utils ------------------------*- C++ -*-===// 287c01607SMichael Jones // 387c01607SMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 487c01607SMichael Jones // See https://llvm.org/LICENSE.txt for license information. 587c01607SMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 687c01607SMichael Jones // 787c01607SMichael Jones //===----------------------------------------------------------------------===// 887c01607SMichael Jones 96c4267fbSMichael Jones // ----------------------------------------------------------------------------- 106c4267fbSMichael Jones // **** WARNING **** 116c4267fbSMichael Jones // This file is shared with libc++. You should also be careful when adding 126c4267fbSMichael Jones // dependencies to this file, since it needs to build for all libc++ targets. 136c4267fbSMichael Jones // ----------------------------------------------------------------------------- 146c4267fbSMichael Jones 15270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H 16270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H 1787c01607SMichael Jones 18c703e657SGuillaume Chatelet #include "src/__support/CPP/bit.h" 1991eb0b65SGuillaume Chatelet #include "src/__support/CPP/limits.h" 20cb3c41c2SMichael Jones #include "src/__support/CPP/optional.h" 210504e932SNishant Mittal #include "src/__support/CPP/string_view.h" 2287c01607SMichael Jones #include "src/__support/FPUtil/FPBits.h" 23a9824312STue Ly #include "src/__support/FPUtil/rounding_mode.h" 2459c809cdSSiva Chandra Reddy #include "src/__support/common.h" 2587c01607SMichael Jones #include "src/__support/ctype_utils.h" 2687c01607SMichael Jones #include "src/__support/detailed_powers_of_ten.h" 2787c01607SMichael Jones #include "src/__support/high_precision_decimal.h" 285ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 291896ee38Slntue #include "src/__support/macros/null_check.h" 301896ee38Slntue #include "src/__support/macros/optimization.h" 3131d797f4SMichael Jones #include "src/__support/str_to_integer.h" 32cb3c41c2SMichael Jones #include "src/__support/str_to_num_result.h" 3309efe848SGuillaume Chatelet #include "src/__support/uint128.h" 3404a9c625SMichael Jones #include "src/errno/libc_errno.h" // For ERANGE 3587c01607SMichael Jones 36d851b5c1SMichael Jones #include <stdint.h> 37d851b5c1SMichael Jones 385ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 3987c01607SMichael Jones namespace internal { 4087c01607SMichael Jones 416c4267fbSMichael Jones // ----------------------------------------------------------------------------- 426c4267fbSMichael Jones // **** WARNING **** 436c4267fbSMichael Jones // This interface is shared with libc++, if you change this interface you need 446c4267fbSMichael Jones // to update it in both libc and libc++. 456c4267fbSMichael Jones // ----------------------------------------------------------------------------- 46cb3c41c2SMichael Jones template <class T> struct ExpandedFloat { 473546f4daSGuillaume Chatelet typename fputil::FPBits<T>::StorageType mantissa; 48cb3c41c2SMichael Jones int32_t exponent; 49cb3c41c2SMichael Jones }; 50cb3c41c2SMichael Jones 516c4267fbSMichael Jones // ----------------------------------------------------------------------------- 526c4267fbSMichael Jones // **** WARNING **** 536c4267fbSMichael Jones // This interface is shared with libc++, if you change this interface you need 546c4267fbSMichael Jones // to update it in both libc and libc++. 556c4267fbSMichael Jones // ----------------------------------------------------------------------------- 56cb3c41c2SMichael Jones template <class T> struct FloatConvertReturn { 57cb3c41c2SMichael Jones ExpandedFloat<T> num = {0, 0}; 58cb3c41c2SMichael Jones int error = 0; 59cb3c41c2SMichael Jones }; 60cb3c41c2SMichael Jones 6159c809cdSSiva Chandra Reddy LIBC_INLINE uint64_t low64(const UInt128 &num) { 6287c01607SMichael Jones return static_cast<uint64_t>(num & 0xffffffffffffffff); 6387c01607SMichael Jones } 6487c01607SMichael Jones 6559c809cdSSiva Chandra Reddy LIBC_INLINE uint64_t high64(const UInt128 &num) { 6687c01607SMichael Jones return static_cast<uint64_t>(num >> 64); 6787c01607SMichael Jones } 6887c01607SMichael Jones 6959c809cdSSiva Chandra Reddy template <class T> LIBC_INLINE void set_implicit_bit(fputil::FPBits<T> &) { 7059c809cdSSiva Chandra Reddy return; 7159c809cdSSiva Chandra Reddy } 72aa1902f9SMichael Jones 73f7d4236aSGuillaume Chatelet #if defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80) 74aa1902f9SMichael Jones template <> 75494734b0SSiva Chandra Reddy LIBC_INLINE void 76494734b0SSiva Chandra Reddy set_implicit_bit<long double>(fputil::FPBits<long double> &result) { 777b387d27SGuillaume Chatelet result.set_implicit_bit(result.get_biased_exponent() != 0); 78aa1902f9SMichael Jones } 79f7d4236aSGuillaume Chatelet #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80 80aa1902f9SMichael Jones 8187c01607SMichael Jones // This Eisel-Lemire implementation is based on the algorithm described in the 8287c01607SMichael Jones // paper Number Parsing at a Gigabyte per Second, Software: Practice and 8387c01607SMichael Jones // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the 8487c01607SMichael Jones // description by Nigel Tao 8587c01607SMichael Jones // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang 8687c01607SMichael Jones // implementation, also by Nigel Tao 8787c01607SMichael Jones // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) 8887c01607SMichael Jones // for some optimizations as well as handling 32 bit floats. 8987c01607SMichael Jones template <class T> 90cb3c41c2SMichael Jones LIBC_INLINE cpp::optional<ExpandedFloat<T>> 91cb3c41c2SMichael Jones eisel_lemire(ExpandedFloat<T> init_num, 92cb3c41c2SMichael Jones RoundDirection round = RoundDirection::Nearest) { 93c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 943546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 9587c01607SMichael Jones 963546f4daSGuillaume Chatelet StorageType mantissa = init_num.mantissa; 97cb3c41c2SMichael Jones int32_t exp10 = init_num.exponent; 98cb3c41c2SMichael Jones 9987c01607SMichael Jones if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a 10087c01607SMichael Jones // double, so we skip straight to the fallback. 101cb3c41c2SMichael Jones return cpp::nullopt; 10287c01607SMichael Jones } 10387c01607SMichael Jones 10487c01607SMichael Jones // Exp10 Range 10587c01607SMichael Jones if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 10687c01607SMichael Jones exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 107cb3c41c2SMichael Jones return cpp::nullopt; 10887c01607SMichael Jones } 10987c01607SMichael Jones 11087c01607SMichael Jones // Normalization 1113546f4daSGuillaume Chatelet uint32_t clz = cpp::countl_zero<StorageType>(mantissa); 11287c01607SMichael Jones mantissa <<= clz; 11387c01607SMichael Jones 1143546f4daSGuillaume Chatelet int32_t exp2 = 115c09e6905SGuillaume Chatelet exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz; 11687c01607SMichael Jones 11787c01607SMichael Jones // Multiplication 1181c92911eSMichael Jones const uint64_t *power_of_ten = 11987c01607SMichael Jones DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 12087c01607SMichael Jones 121300f8da8SSiva Chandra Reddy UInt128 first_approx = 122300f8da8SSiva Chandra Reddy static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[1]); 12387c01607SMichael Jones 12487c01607SMichael Jones // Wider Approximation 125300f8da8SSiva Chandra Reddy UInt128 final_approx; 12687c01607SMichael Jones // The halfway constant is used to check if the bits that will be shifted away 12787c01607SMichael Jones // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final 12887c01607SMichael Jones // mantissa size) - 3 (we shift away the last two bits separately for 129aa1902f9SMichael Jones // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, 130aa1902f9SMichael Jones // it's 6 bits for floats in this case. 131aa1902f9SMichael Jones const uint64_t halfway_constant = 132c09e6905SGuillaume Chatelet (uint64_t(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1; 1331c92911eSMichael Jones if ((high64(first_approx) & halfway_constant) == halfway_constant && 1341c92911eSMichael Jones low64(first_approx) + mantissa < mantissa) { 135300f8da8SSiva Chandra Reddy UInt128 low_bits = 136300f8da8SSiva Chandra Reddy static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[0]); 137300f8da8SSiva Chandra Reddy UInt128 second_approx = 138300f8da8SSiva Chandra Reddy first_approx + static_cast<UInt128>(high64(low_bits)); 13987c01607SMichael Jones 1401c92911eSMichael Jones if ((high64(second_approx) & halfway_constant) == halfway_constant && 1411c92911eSMichael Jones low64(second_approx) + 1 == 0 && 1421c92911eSMichael Jones low64(low_bits) + mantissa < mantissa) { 143cb3c41c2SMichael Jones return cpp::nullopt; 14487c01607SMichael Jones } 1451c92911eSMichael Jones final_approx = second_approx; 14687c01607SMichael Jones } else { 1471c92911eSMichael Jones final_approx = first_approx; 14887c01607SMichael Jones } 14987c01607SMichael Jones 15087c01607SMichael Jones // Shifting to 54 bits for doubles and 25 bits for floats 1513546f4daSGuillaume Chatelet StorageType msb = static_cast<StorageType>(high64(final_approx) >> 152c09e6905SGuillaume Chatelet (FPBits::STORAGE_LEN - 1)); 1533546f4daSGuillaume Chatelet StorageType final_mantissa = static_cast<StorageType>( 154c703e657SGuillaume Chatelet high64(final_approx) >> 155c09e6905SGuillaume Chatelet (msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))); 1562e8fa86eSAlex Brachet exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb 15787c01607SMichael Jones 158cb3c41c2SMichael Jones if (round == RoundDirection::Nearest) { 15987c01607SMichael Jones // Half-way ambiguity 1601c92911eSMichael Jones if (low64(final_approx) == 0 && 1611c92911eSMichael Jones (high64(final_approx) & halfway_constant) == 0 && 1621c92911eSMichael Jones (final_mantissa & 3) == 1) { 163cb3c41c2SMichael Jones return cpp::nullopt; 16487c01607SMichael Jones } 16587c01607SMichael Jones 166cb3c41c2SMichael Jones // Round to even. 1671c92911eSMichael Jones final_mantissa += final_mantissa & 1; 168cb3c41c2SMichael Jones 169cb3c41c2SMichael Jones } else if (round == RoundDirection::Up) { 170cb3c41c2SMichael Jones // If any of the bits being rounded away are non-zero, then round up. 171cb3c41c2SMichael Jones if (low64(final_approx) > 0 || 172cb3c41c2SMichael Jones (high64(final_approx) & halfway_constant) > 0) { 173cb3c41c2SMichael Jones // Add two since the last current lowest bit is about to be shifted away. 174cb3c41c2SMichael Jones final_mantissa += 2; 175cb3c41c2SMichael Jones } 176cb3c41c2SMichael Jones } 177cb3c41c2SMichael Jones // else round down, which has no effect. 178cb3c41c2SMichael Jones 179cb3c41c2SMichael Jones // From 54 to 53 bits for doubles and 25 to 24 bits for floats 1801c92911eSMichael Jones final_mantissa >>= 1; 181c09e6905SGuillaume Chatelet if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) { 1821c92911eSMichael Jones final_mantissa >>= 1; 18387c01607SMichael Jones ++exp2; 18487c01607SMichael Jones } 18587c01607SMichael Jones 18687c01607SMichael Jones // The if block is equivalent to (but has fewer branches than): 18787c01607SMichael Jones // if exp2 <= 0 || exp2 >= 0x7FF { etc } 188c09e6905SGuillaume Chatelet if (static_cast<uint32_t>(exp2) - 1 >= (1 << FPBits::EXP_LEN) - 2) { 189cb3c41c2SMichael Jones return cpp::nullopt; 19087c01607SMichael Jones } 19187c01607SMichael Jones 192cb3c41c2SMichael Jones ExpandedFloat<T> output; 193cb3c41c2SMichael Jones output.mantissa = final_mantissa; 194cb3c41c2SMichael Jones output.exponent = exp2; 195cb3c41c2SMichael Jones return output; 19687c01607SMichael Jones } 19787c01607SMichael Jones 1987eb32644SMichael Jones // TODO: Re-enable eisel-lemire for long double is double double once it's 1997eb32644SMichael Jones // properly supported. 2007eb32644SMichael Jones #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) && \ 2017eb32644SMichael Jones !defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE) 2029b397371SMichael Jones template <> 203cb3c41c2SMichael Jones LIBC_INLINE cpp::optional<ExpandedFloat<long double>> 204cb3c41c2SMichael Jones eisel_lemire<long double>(ExpandedFloat<long double> init_num, 205cb3c41c2SMichael Jones RoundDirection round) { 206c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<long double>; 2073546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 208cb3c41c2SMichael Jones 2097302c8dbSNick Desaulniers UInt128 mantissa = init_num.mantissa; 210cb3c41c2SMichael Jones int32_t exp10 = init_num.exponent; 211cb3c41c2SMichael Jones 2129b397371SMichael Jones // Exp10 Range 2139b397371SMichael Jones // This doesn't reach very far into the range for long doubles, since it's 2149b397371SMichael Jones // sized for doubles and their 11 exponent bits, and not for long doubles and 2159b397371SMichael Jones // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long 2169b397371SMichael Jones // double). This is a known tradeoff, and was made because a proper long 2179b397371SMichael Jones // double table would be approximately 16 times larger. This would have 2189b397371SMichael Jones // significant memory and storage costs all the time to speed up a relatively 2199b397371SMichael Jones // uncommon path. In addition the exp10_to_exp2 function only approximates 2209b397371SMichael Jones // multiplying by log(10)/log(2), and that approximation may not be accurate 2219b397371SMichael Jones // out to the full long double range. 2229b397371SMichael Jones if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || 2239b397371SMichael Jones exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { 224cb3c41c2SMichael Jones return cpp::nullopt; 2259b397371SMichael Jones } 2269b397371SMichael Jones 2279b397371SMichael Jones // Normalization 2287302c8dbSNick Desaulniers uint32_t clz = cpp::countl_zero(mantissa) - 2297302c8dbSNick Desaulniers ((sizeof(UInt128) - sizeof(StorageType)) * CHAR_BIT); 2309b397371SMichael Jones mantissa <<= clz; 2319b397371SMichael Jones 2323546f4daSGuillaume Chatelet int32_t exp2 = 233c09e6905SGuillaume Chatelet exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz; 2349b397371SMichael Jones 2359b397371SMichael Jones // Multiplication 2369b397371SMichael Jones const uint64_t *power_of_ten = 2379b397371SMichael Jones DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; 2389b397371SMichael Jones 2399b397371SMichael Jones // Since the input mantissa is more than 64 bits, we have to multiply with the 2409b397371SMichael Jones // full 128 bits of the power of ten to get an approximation with the same 2419b397371SMichael Jones // number of significant bits. This means that we only get the one 2429b397371SMichael Jones // approximation, and that approximation is 256 bits long. 243300f8da8SSiva Chandra Reddy UInt128 approx_upper = static_cast<UInt128>(high64(mantissa)) * 244300f8da8SSiva Chandra Reddy static_cast<UInt128>(power_of_ten[1]); 2459b397371SMichael Jones 246ae3b59e6SMichael Jones UInt128 approx_middle_a = static_cast<UInt128>(high64(mantissa)) * 247ae3b59e6SMichael Jones static_cast<UInt128>(power_of_ten[0]); 248ae3b59e6SMichael Jones UInt128 approx_middle_b = static_cast<UInt128>(low64(mantissa)) * 249300f8da8SSiva Chandra Reddy static_cast<UInt128>(power_of_ten[1]); 2509b397371SMichael Jones 251ae3b59e6SMichael Jones UInt128 approx_middle = approx_middle_a + approx_middle_b; 252ae3b59e6SMichael Jones 253ae3b59e6SMichael Jones // Handle overflow in the middle 254ae3b59e6SMichael Jones approx_upper += (approx_middle < approx_middle_a) ? UInt128(1) << 64 : 0; 255ae3b59e6SMichael Jones 256300f8da8SSiva Chandra Reddy UInt128 approx_lower = static_cast<UInt128>(low64(mantissa)) * 257300f8da8SSiva Chandra Reddy static_cast<UInt128>(power_of_ten[0]); 2589b397371SMichael Jones 259300f8da8SSiva Chandra Reddy UInt128 final_approx_lower = 260300f8da8SSiva Chandra Reddy approx_lower + (static_cast<UInt128>(low64(approx_middle)) << 64); 261300f8da8SSiva Chandra Reddy UInt128 final_approx_upper = approx_upper + high64(approx_middle) + 2629b397371SMichael Jones (final_approx_lower < approx_lower ? 1 : 0); 2639b397371SMichael Jones 2649b397371SMichael Jones // The halfway constant is used to check if the bits that will be shifted away 2659b397371SMichael Jones // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 2669b397371SMichael Jones // (final mantissa size) - 3 (we shift away the last two bits separately for 2679b397371SMichael Jones // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, 2689b397371SMichael Jones // it's 12 bits for 128 bit floats in this case. 269300f8da8SSiva Chandra Reddy constexpr UInt128 HALFWAY_CONSTANT = 270c09e6905SGuillaume Chatelet (UInt128(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1; 2719b397371SMichael Jones 2729b397371SMichael Jones if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && 2739b397371SMichael Jones final_approx_lower + mantissa < mantissa) { 274cb3c41c2SMichael Jones return cpp::nullopt; 2759b397371SMichael Jones } 2769b397371SMichael Jones 2779b397371SMichael Jones // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats 2783546f4daSGuillaume Chatelet uint32_t msb = 279c09e6905SGuillaume Chatelet static_cast<uint32_t>(final_approx_upper >> (FPBits::STORAGE_LEN - 1)); 2807302c8dbSNick Desaulniers UInt128 final_mantissa = final_approx_upper >> (msb + FPBits::STORAGE_LEN - 2817302c8dbSNick Desaulniers (FPBits::FRACTION_LEN + 3)); 28225a2aeb1SMikhail R. Gadelha exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb 2839b397371SMichael Jones 284cb3c41c2SMichael Jones if (round == RoundDirection::Nearest) { 2859b397371SMichael Jones // Half-way ambiguity 286cb3c41c2SMichael Jones if (final_approx_lower == 0 && 287cb3c41c2SMichael Jones (final_approx_upper & HALFWAY_CONSTANT) == 0 && 2889b397371SMichael Jones (final_mantissa & 3) == 1) { 289cb3c41c2SMichael Jones return cpp::nullopt; 2909b397371SMichael Jones } 291cb3c41c2SMichael Jones // Round to even. 292cb3c41c2SMichael Jones final_mantissa += final_mantissa & 1; 293cb3c41c2SMichael Jones 294cb3c41c2SMichael Jones } else if (round == RoundDirection::Up) { 295cb3c41c2SMichael Jones // If any of the bits being rounded away are non-zero, then round up. 296cb3c41c2SMichael Jones if (final_approx_lower > 0 || (final_approx_upper & HALFWAY_CONSTANT) > 0) { 297cb3c41c2SMichael Jones // Add two since the last current lowest bit is about to be shifted away. 298cb3c41c2SMichael Jones final_mantissa += 2; 299cb3c41c2SMichael Jones } 300cb3c41c2SMichael Jones } 301cb3c41c2SMichael Jones // else round down, which has no effect. 3029b397371SMichael Jones 3039b397371SMichael Jones // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit 3049b397371SMichael Jones // floats 3059b397371SMichael Jones final_mantissa >>= 1; 306c09e6905SGuillaume Chatelet if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) { 3079b397371SMichael Jones final_mantissa >>= 1; 3089b397371SMichael Jones ++exp2; 3099b397371SMichael Jones } 3109b397371SMichael Jones 3119b397371SMichael Jones // The if block is equivalent to (but has fewer branches than): 3129b397371SMichael Jones // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } 313c09e6905SGuillaume Chatelet if (exp2 - 1 >= (1 << FPBits::EXP_LEN) - 2) { 314cb3c41c2SMichael Jones return cpp::nullopt; 3159b397371SMichael Jones } 3169b397371SMichael Jones 317cb3c41c2SMichael Jones ExpandedFloat<long double> output; 3187302c8dbSNick Desaulniers output.mantissa = static_cast<StorageType>(final_mantissa); 319cb3c41c2SMichael Jones output.exponent = exp2; 320cb3c41c2SMichael Jones return output; 3219b397371SMichael Jones } 3227eb32644SMichael Jones #endif // !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) && 3237eb32644SMichael Jones // !defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE) 3249b397371SMichael Jones 32587c01607SMichael Jones // The nth item in POWERS_OF_TWO represents the greatest power of two less than 32687c01607SMichael Jones // 10^n. This tells us how much we can safely shift without overshooting. 32787c01607SMichael Jones constexpr uint8_t POWERS_OF_TWO[19] = { 32887c01607SMichael Jones 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59, 32987c01607SMichael Jones }; 33087c01607SMichael Jones constexpr int32_t NUM_POWERS_OF_TWO = 33187c01607SMichael Jones sizeof(POWERS_OF_TWO) / sizeof(POWERS_OF_TWO[0]); 33287c01607SMichael Jones 33387c01607SMichael Jones // Takes a mantissa and base 10 exponent and converts it into its closest 33487c01607SMichael Jones // floating point type T equivalent. This is the fallback algorithm used when 33587c01607SMichael Jones // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based 33687c01607SMichael Jones // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this 33787c01607SMichael Jones // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html 33887c01607SMichael Jones template <class T> 339d34b3c9cSMichael Jones LIBC_INLINE FloatConvertReturn<T> simple_decimal_conversion( 340d34b3c9cSMichael Jones const char *__restrict numStart, 341d34b3c9cSMichael Jones const size_t num_len = cpp::numeric_limits<size_t>::max(), 342cb3c41c2SMichael Jones RoundDirection round = RoundDirection::Nearest) { 343c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 3443546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 34587c01607SMichael Jones 34687c01607SMichael Jones int32_t exp2 = 0; 347d34b3c9cSMichael Jones HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart, num_len); 34887c01607SMichael Jones 349cb3c41c2SMichael Jones FloatConvertReturn<T> output; 350cb3c41c2SMichael Jones 3511c92911eSMichael Jones if (hpd.get_num_digits() == 0) { 352cb3c41c2SMichael Jones output.num = {0, 0}; 353cb3c41c2SMichael Jones return output; 35487c01607SMichael Jones } 35587c01607SMichael Jones 35687c01607SMichael Jones // If the exponent is too large and can't be represented in this size of 35787c01607SMichael Jones // float, return inf. 3581c92911eSMichael Jones if (hpd.get_decimal_point() > 0 && 359c09e6905SGuillaume Chatelet exp10_to_exp2(hpd.get_decimal_point() - 1) > FPBits::EXP_BIAS) { 3603ae5a9b6SGuillaume Chatelet output.num = {0, fputil::FPBits<T>::MAX_BIASED_EXPONENT}; 361cb3c41c2SMichael Jones output.error = ERANGE; 362cb3c41c2SMichael Jones return output; 36387c01607SMichael Jones } 36487c01607SMichael Jones // If the exponent is too small even for a subnormal, return 0. 3651c92911eSMichael Jones if (hpd.get_decimal_point() < 0 && 3661c92911eSMichael Jones exp10_to_exp2(-hpd.get_decimal_point()) > 367c09e6905SGuillaume Chatelet (FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN))) { 368cb3c41c2SMichael Jones output.num = {0, 0}; 369cb3c41c2SMichael Jones output.error = ERANGE; 370cb3c41c2SMichael Jones return output; 37187c01607SMichael Jones } 37287c01607SMichael Jones 37387c01607SMichael Jones // Right shift until the number is smaller than 1. 3741c92911eSMichael Jones while (hpd.get_decimal_point() > 0) { 3751c92911eSMichael Jones int32_t shift_amount = 0; 3761c92911eSMichael Jones if (hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 3771c92911eSMichael Jones shift_amount = 60; 37887c01607SMichael Jones } else { 3791c92911eSMichael Jones shift_amount = POWERS_OF_TWO[hpd.get_decimal_point()]; 38087c01607SMichael Jones } 3811c92911eSMichael Jones exp2 += shift_amount; 3821c92911eSMichael Jones hpd.shift(-shift_amount); 38387c01607SMichael Jones } 38487c01607SMichael Jones 38587c01607SMichael Jones // Left shift until the number is between 1/2 and 1 3861c92911eSMichael Jones while (hpd.get_decimal_point() < 0 || 3871c92911eSMichael Jones (hpd.get_decimal_point() == 0 && hpd.get_digits()[0] < 5)) { 3881c92911eSMichael Jones int32_t shift_amount = 0; 38987c01607SMichael Jones 3901c92911eSMichael Jones if (-hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) { 3911c92911eSMichael Jones shift_amount = 60; 3921c92911eSMichael Jones } else if (hpd.get_decimal_point() != 0) { 3931c92911eSMichael Jones shift_amount = POWERS_OF_TWO[-hpd.get_decimal_point()]; 39487c01607SMichael Jones } else { // This handles the case of the number being between .1 and .5 3951c92911eSMichael Jones shift_amount = 1; 39687c01607SMichael Jones } 3971c92911eSMichael Jones exp2 -= shift_amount; 3981c92911eSMichael Jones hpd.shift(shift_amount); 39987c01607SMichael Jones } 40087c01607SMichael Jones 40187c01607SMichael Jones // Left shift once so that the number is between 1 and 2 40287c01607SMichael Jones --exp2; 40387c01607SMichael Jones hpd.shift(1); 40487c01607SMichael Jones 40587c01607SMichael Jones // Get the biased exponent 406c09e6905SGuillaume Chatelet exp2 += FPBits::EXP_BIAS; 40787c01607SMichael Jones 40887c01607SMichael Jones // Handle the exponent being too large (and return inf). 4093ae5a9b6SGuillaume Chatelet if (exp2 >= FPBits::MAX_BIASED_EXPONENT) { 4103ae5a9b6SGuillaume Chatelet output.num = {0, FPBits::MAX_BIASED_EXPONENT}; 411cb3c41c2SMichael Jones output.error = ERANGE; 412cb3c41c2SMichael Jones return output; 41387c01607SMichael Jones } 41487c01607SMichael Jones 41587c01607SMichael Jones // Shift left to fill the mantissa 416c09e6905SGuillaume Chatelet hpd.shift(FPBits::FRACTION_LEN); 4173546f4daSGuillaume Chatelet StorageType final_mantissa = hpd.round_to_integer_type<StorageType>(); 41887c01607SMichael Jones 41987c01607SMichael Jones // Handle subnormals 42087c01607SMichael Jones if (exp2 <= 0) { 42187c01607SMichael Jones // Shift right until there is a valid exponent 42287c01607SMichael Jones while (exp2 < 0) { 42387c01607SMichael Jones hpd.shift(-1); 42487c01607SMichael Jones ++exp2; 42587c01607SMichael Jones } 42687c01607SMichael Jones // Shift right one more time to compensate for the left shift to get it 42787c01607SMichael Jones // between 1 and 2. 42887c01607SMichael Jones hpd.shift(-1); 4293546f4daSGuillaume Chatelet final_mantissa = hpd.round_to_integer_type<StorageType>(round); 43087c01607SMichael Jones 43187c01607SMichael Jones // Check if by shifting right we've caused this to round to a normal number. 432c09e6905SGuillaume Chatelet if ((final_mantissa >> FPBits::FRACTION_LEN) != 0) { 43387c01607SMichael Jones ++exp2; 43487c01607SMichael Jones } 43587c01607SMichael Jones } 43687c01607SMichael Jones 43787c01607SMichael Jones // Check if rounding added a bit, and shift down if that's the case. 438c09e6905SGuillaume Chatelet if (final_mantissa == StorageType(2) << FPBits::FRACTION_LEN) { 4391c92911eSMichael Jones final_mantissa >>= 1; 44087c01607SMichael Jones ++exp2; 441aa1902f9SMichael Jones 442aa1902f9SMichael Jones // Check if this rounding causes exp2 to go out of range and make the result 443aa1902f9SMichael Jones // INF. If this is the case, then finalMantissa and exp2 are already the 444aa1902f9SMichael Jones // correct values for an INF result. 4453ae5a9b6SGuillaume Chatelet if (exp2 >= FPBits::MAX_BIASED_EXPONENT) { 446cb3c41c2SMichael Jones output.error = ERANGE; 447aa1902f9SMichael Jones } 44887c01607SMichael Jones } 44987c01607SMichael Jones 4504cdf9884SMichael Jones if (exp2 == 0) { 451cb3c41c2SMichael Jones output.error = ERANGE; 4524cdf9884SMichael Jones } 4534cdf9884SMichael Jones 454cb3c41c2SMichael Jones output.num = {final_mantissa, exp2}; 455cb3c41c2SMichael Jones return output; 45687c01607SMichael Jones } 45787c01607SMichael Jones 45862c187cbSMichael Jones // This class is used for templating the constants for Clinger's Fast Path, 45962c187cbSMichael Jones // described as a method of approximation in 46062c187cbSMichael Jones // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990 46162c187cbSMichael Jones // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557. 46262c187cbSMichael Jones // As well as the additions by Gay that extend the useful range by the number of 46362c187cbSMichael Jones // exact digits stored by the float type, described in 46462c187cbSMichael Jones // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions; 46562c187cbSMichael Jones // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10. 46662c187cbSMichael Jones template <class T> class ClingerConsts; 46762c187cbSMichael Jones 46862c187cbSMichael Jones template <> class ClingerConsts<float> { 46962c187cbSMichael Jones public: 4701c92911eSMichael Jones static constexpr float POWERS_OF_TEN_ARRAY[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 47162c187cbSMichael Jones 1e6, 1e7, 1e8, 1e9, 1e10}; 4721c92911eSMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 10; 4731c92911eSMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 7; 4741c92911eSMichael Jones static constexpr float MAX_EXACT_INT = 16777215.0; 47562c187cbSMichael Jones }; 47662c187cbSMichael Jones 47762c187cbSMichael Jones template <> class ClingerConsts<double> { 47862c187cbSMichael Jones public: 4791c92911eSMichael Jones static constexpr double POWERS_OF_TEN_ARRAY[] = { 48062c187cbSMichael Jones 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 48162c187cbSMichael Jones 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 4821c92911eSMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 22; 4831c92911eSMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 15; 4841c92911eSMichael Jones static constexpr double MAX_EXACT_INT = 9007199254740991.0; 48562c187cbSMichael Jones }; 48662c187cbSMichael Jones 487f7d4236aSGuillaume Chatelet #if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) 488aa1902f9SMichael Jones template <> class ClingerConsts<long double> { 489aa1902f9SMichael Jones public: 4900f031daeSTue Ly static constexpr long double POWERS_OF_TEN_ARRAY[] = { 4910f031daeSTue Ly 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 4920f031daeSTue Ly 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 493aa1902f9SMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 494aa1902f9SMichael Jones ClingerConsts<double>::EXACT_POWERS_OF_TEN; 495aa1902f9SMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 496aa1902f9SMichael Jones ClingerConsts<double>::DIGITS_IN_MANTISSA; 497aa1902f9SMichael Jones static constexpr long double MAX_EXACT_INT = 498aa1902f9SMichael Jones ClingerConsts<double>::MAX_EXACT_INT; 499aa1902f9SMichael Jones }; 500f7d4236aSGuillaume Chatelet #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80) 501aa1902f9SMichael Jones template <> class ClingerConsts<long double> { 502aa1902f9SMichael Jones public: 503aa1902f9SMichael Jones static constexpr long double POWERS_OF_TEN_ARRAY[] = { 504aa1902f9SMichael Jones 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 505aa1902f9SMichael Jones 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 506aa1902f9SMichael Jones 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L}; 507aa1902f9SMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 27; 508aa1902f9SMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 21; 509aa1902f9SMichael Jones static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L; 510aa1902f9SMichael Jones }; 511f7d4236aSGuillaume Chatelet #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT128) 512aa1902f9SMichael Jones template <> class ClingerConsts<long double> { 513aa1902f9SMichael Jones public: 514aa1902f9SMichael Jones static constexpr long double POWERS_OF_TEN_ARRAY[] = { 515aa1902f9SMichael Jones 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, 516aa1902f9SMichael Jones 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, 517aa1902f9SMichael Jones 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, 518aa1902f9SMichael Jones 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, 519aa1902f9SMichael Jones 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L}; 520aa1902f9SMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 48; 521aa1902f9SMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 33; 522aa1902f9SMichael Jones static constexpr long double MAX_EXACT_INT = 523aa1902f9SMichael Jones 10384593717069655257060992658440191.0L; 524aa1902f9SMichael Jones }; 5257eb32644SMichael Jones #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE) 5267eb32644SMichael Jones // TODO: Add proper double double type support here, currently using constants 5277eb32644SMichael Jones // for double since it should be safe. 5287eb32644SMichael Jones template <> class ClingerConsts<long double> { 5297eb32644SMichael Jones public: 5307eb32644SMichael Jones static constexpr double POWERS_OF_TEN_ARRAY[] = { 5317eb32644SMichael Jones 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 5327eb32644SMichael Jones 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; 5337eb32644SMichael Jones static constexpr int32_t EXACT_POWERS_OF_TEN = 22; 5347eb32644SMichael Jones static constexpr int32_t DIGITS_IN_MANTISSA = 15; 5357eb32644SMichael Jones static constexpr double MAX_EXACT_INT = 9007199254740991.0; 5367eb32644SMichael Jones }; 537f7d4236aSGuillaume Chatelet #else 538f7d4236aSGuillaume Chatelet #error "Unknown long double type" 539aa1902f9SMichael Jones #endif 540aa1902f9SMichael Jones 54162c187cbSMichael Jones // Take an exact mantissa and exponent and attempt to convert it using only 54262c187cbSMichael Jones // exact floating point arithmetic. This only handles numbers with low 54362c187cbSMichael Jones // exponents, but handles them quickly. This is an implementation of Clinger's 54462c187cbSMichael Jones // Fast Path, as described above. 54562c187cbSMichael Jones template <class T> 546cb3c41c2SMichael Jones LIBC_INLINE cpp::optional<ExpandedFloat<T>> 547cb3c41c2SMichael Jones clinger_fast_path(ExpandedFloat<T> init_num, 548cb3c41c2SMichael Jones RoundDirection round = RoundDirection::Nearest) { 549c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 5503546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 551cb3c41c2SMichael Jones 5523546f4daSGuillaume Chatelet StorageType mantissa = init_num.mantissa; 553cb3c41c2SMichael Jones int32_t exp10 = init_num.exponent; 554cb3c41c2SMichael Jones 555c09e6905SGuillaume Chatelet if ((mantissa >> FPBits::FRACTION_LEN) > 0) { 556cb3c41c2SMichael Jones return cpp::nullopt; 55762c187cbSMichael Jones } 55862c187cbSMichael Jones 559c703e657SGuillaume Chatelet FPBits result; 5601557256aSTue Ly T float_mantissa; 5617302c8dbSNick Desaulniers if constexpr (is_big_int_v<StorageType> || sizeof(T) > sizeof(uint64_t)) { 562d851b5c1SMichael Jones float_mantissa = 5637299c7f6SMichael Jones (static_cast<T>(uint64_t(mantissa >> 64)) * static_cast<T>(0x1.0p64)) + 5647299c7f6SMichael Jones static_cast<T>(uint64_t(mantissa)); 5651557256aSTue Ly } else { 5661557256aSTue Ly float_mantissa = static_cast<T>(mantissa); 5671557256aSTue Ly } 56862c187cbSMichael Jones 56962c187cbSMichael Jones if (exp10 == 0) { 570c703e657SGuillaume Chatelet result = FPBits(float_mantissa); 57162c187cbSMichael Jones } 57262c187cbSMichael Jones if (exp10 > 0) { 5731c92911eSMichael Jones if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN + 5741c92911eSMichael Jones ClingerConsts<T>::DIGITS_IN_MANTISSA) { 575cb3c41c2SMichael Jones return cpp::nullopt; 57662c187cbSMichael Jones } 5771c92911eSMichael Jones if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 5781c92911eSMichael Jones float_mantissa = float_mantissa * 5791c92911eSMichael Jones ClingerConsts<T>::POWERS_OF_TEN_ARRAY 5801c92911eSMichael Jones [exp10 - ClingerConsts<T>::EXACT_POWERS_OF_TEN]; 5811c92911eSMichael Jones exp10 = ClingerConsts<T>::EXACT_POWERS_OF_TEN; 58262c187cbSMichael Jones } 5831c92911eSMichael Jones if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) { 584cb3c41c2SMichael Jones return cpp::nullopt; 58562c187cbSMichael Jones } 586c703e657SGuillaume Chatelet result = 587c703e657SGuillaume Chatelet FPBits(float_mantissa * ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]); 58862c187cbSMichael Jones } else if (exp10 < 0) { 5891c92911eSMichael Jones if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) { 590cb3c41c2SMichael Jones return cpp::nullopt; 59162c187cbSMichael Jones } 592c703e657SGuillaume Chatelet result = 593c703e657SGuillaume Chatelet FPBits(float_mantissa / ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]); 59462c187cbSMichael Jones } 595cb3c41c2SMichael Jones 596cb3c41c2SMichael Jones // If the rounding mode is not nearest, then the sign of the number may affect 597cb3c41c2SMichael Jones // the result. To make sure the rounding mode is respected properly, the 598cb3c41c2SMichael Jones // calculation is redone with a negative result, and the rounding mode is used 599cb3c41c2SMichael Jones // to select the correct result. 600cb3c41c2SMichael Jones if (round != RoundDirection::Nearest) { 601c703e657SGuillaume Chatelet FPBits negative_result; 602cb3c41c2SMichael Jones // I'm 99% sure this will break under fast math optimizations. 603c703e657SGuillaume Chatelet negative_result = FPBits((-float_mantissa) * 604c703e657SGuillaume Chatelet ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]); 605cb3c41c2SMichael Jones 606cb3c41c2SMichael Jones // If the results are equal, then we don't need to use the rounding mode. 6072856db0dSGuillaume Chatelet if (result.get_val() != -negative_result.get_val()) { 608c703e657SGuillaume Chatelet FPBits lower_result; 609c703e657SGuillaume Chatelet FPBits higher_result; 610cb3c41c2SMichael Jones 6112856db0dSGuillaume Chatelet if (result.get_val() < -negative_result.get_val()) { 612cb3c41c2SMichael Jones lower_result = result; 613cb3c41c2SMichael Jones higher_result = negative_result; 614cb3c41c2SMichael Jones } else { 615cb3c41c2SMichael Jones lower_result = negative_result; 616cb3c41c2SMichael Jones higher_result = result; 617cb3c41c2SMichael Jones } 618cb3c41c2SMichael Jones 619cb3c41c2SMichael Jones if (round == RoundDirection::Up) { 620cb3c41c2SMichael Jones result = higher_result; 621cb3c41c2SMichael Jones } else { 622cb3c41c2SMichael Jones result = lower_result; 623cb3c41c2SMichael Jones } 624cb3c41c2SMichael Jones } 625cb3c41c2SMichael Jones } 626cb3c41c2SMichael Jones 627cb3c41c2SMichael Jones ExpandedFloat<T> output; 6287299c7f6SMichael Jones output.mantissa = result.get_explicit_mantissa(); 6297b387d27SGuillaume Chatelet output.exponent = result.get_biased_exponent(); 630cb3c41c2SMichael Jones return output; 63162c187cbSMichael Jones } 63262c187cbSMichael Jones 6332cd20ad9SMichael Jones // The upper bound is the highest base-10 exponent that could possibly give a 6342cd20ad9SMichael Jones // non-inf result for this size of float. The value is 6352cd20ad9SMichael Jones // log10(2^(exponent bias)). 6362cd20ad9SMichael Jones // The generic approximation uses the fact that log10(2^x) ~= x/3 637d34b3c9cSMichael Jones template <typename T> LIBC_INLINE constexpr int32_t get_upper_bound() { 638c09e6905SGuillaume Chatelet return fputil::FPBits<T>::EXP_BIAS / 3; 6392cd20ad9SMichael Jones } 6402cd20ad9SMichael Jones 641d34b3c9cSMichael Jones template <> LIBC_INLINE constexpr int32_t get_upper_bound<float>() { 642d34b3c9cSMichael Jones return 39; 643d34b3c9cSMichael Jones } 6442cd20ad9SMichael Jones 645d34b3c9cSMichael Jones template <> LIBC_INLINE constexpr int32_t get_upper_bound<double>() { 646d34b3c9cSMichael Jones return 309; 647d34b3c9cSMichael Jones } 6482cd20ad9SMichael Jones 6492cd20ad9SMichael Jones // The lower bound is the largest negative base-10 exponent that could possibly 6502cd20ad9SMichael Jones // give a non-zero result for this size of float. The value is 6512cd20ad9SMichael Jones // log10(2^(exponent bias + final mantissa width + intermediate mantissa width)) 6522cd20ad9SMichael Jones // The intermediate mantissa is the integer that's been parsed from the string, 6532cd20ad9SMichael Jones // and the final mantissa is the fractional part of the output number. A very 6542cd20ad9SMichael Jones // low base 10 exponent with a very high intermediate mantissa can cancel each 6552cd20ad9SMichael Jones // other out, and subnormal numbers allow for the result to be at the very low 6562cd20ad9SMichael Jones // end of the final mantissa. 657d34b3c9cSMichael Jones template <typename T> LIBC_INLINE constexpr int32_t get_lower_bound() { 658c09e6905SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 659c09e6905SGuillaume Chatelet return -((FPBits::EXP_BIAS + 660c09e6905SGuillaume Chatelet static_cast<int32_t>(FPBits::FRACTION_LEN + FPBits::STORAGE_LEN)) / 6612cd20ad9SMichael Jones 3); 6622cd20ad9SMichael Jones } 6632cd20ad9SMichael Jones 664d34b3c9cSMichael Jones template <> LIBC_INLINE constexpr int32_t get_lower_bound<float>() { 6652cd20ad9SMichael Jones return -(39 + 6 + 10); 6662cd20ad9SMichael Jones } 6672cd20ad9SMichael Jones 668d34b3c9cSMichael Jones template <> LIBC_INLINE constexpr int32_t get_lower_bound<double>() { 6692cd20ad9SMichael Jones return -(309 + 15 + 20); 6702cd20ad9SMichael Jones } 6712cd20ad9SMichael Jones 6726c4267fbSMichael Jones // ----------------------------------------------------------------------------- 6736c4267fbSMichael Jones // **** WARNING **** 6746c4267fbSMichael Jones // This interface is shared with libc++, if you change this interface you need 6756c4267fbSMichael Jones // to update it in both libc and libc++. 6766c4267fbSMichael Jones // ----------------------------------------------------------------------------- 67787c01607SMichael Jones // Takes a mantissa and base 10 exponent and converts it into its closest 67887c01607SMichael Jones // floating point type T equivalient. First we try the Eisel-Lemire algorithm, 67987c01607SMichael Jones // then if that fails then we fall back to a more accurate algorithm for 68087c01607SMichael Jones // accuracy. The resulting mantissa and exponent are placed in outputMantissa 68187c01607SMichael Jones // and outputExp2. 68287c01607SMichael Jones template <class T> 683d34b3c9cSMichael Jones LIBC_INLINE FloatConvertReturn<T> decimal_exp_to_float( 684d34b3c9cSMichael Jones ExpandedFloat<T> init_num, bool truncated, RoundDirection round, 685d34b3c9cSMichael Jones const char *__restrict numStart, 686d34b3c9cSMichael Jones const size_t num_len = cpp::numeric_limits<size_t>::max()) { 687c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 6883546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 689cb3c41c2SMichael Jones 6903546f4daSGuillaume Chatelet StorageType mantissa = init_num.mantissa; 691cb3c41c2SMichael Jones int32_t exp10 = init_num.exponent; 692cb3c41c2SMichael Jones 693cb3c41c2SMichael Jones FloatConvertReturn<T> output; 694cb3c41c2SMichael Jones cpp::optional<ExpandedFloat<T>> opt_output; 695cb3c41c2SMichael Jones 69662c187cbSMichael Jones // If the exponent is too large and can't be represented in this size of 6972cd20ad9SMichael Jones // float, return inf. These bounds are relatively loose, but are mostly 6982cd20ad9SMichael Jones // serving as a first pass. Some close numbers getting through is okay. 6992cd20ad9SMichael Jones if (exp10 > get_upper_bound<T>()) { 7003ae5a9b6SGuillaume Chatelet output.num = {0, FPBits::MAX_BIASED_EXPONENT}; 701cb3c41c2SMichael Jones output.error = ERANGE; 702cb3c41c2SMichael Jones return output; 70362c187cbSMichael Jones } 70462c187cbSMichael Jones // If the exponent is too small even for a subnormal, return 0. 7052cd20ad9SMichael Jones if (exp10 < get_lower_bound<T>()) { 706cb3c41c2SMichael Jones output.num = {0, 0}; 707cb3c41c2SMichael Jones output.error = ERANGE; 708cb3c41c2SMichael Jones return output; 70962c187cbSMichael Jones } 71087c01607SMichael Jones 711cb3c41c2SMichael Jones // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail. 712cb3c41c2SMichael Jones // For this reason the "error" field in their return values is used to 713cb3c41c2SMichael Jones // represent whether they've failed as opposed to the errno value. Any 714cb3c41c2SMichael Jones // non-zero value represents a failure. 715cb3c41c2SMichael Jones 716c3228714SGuillaume Chatelet #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH 71762c187cbSMichael Jones if (!truncated) { 718cb3c41c2SMichael Jones opt_output = clinger_fast_path<T>(init_num, round); 719cb3c41c2SMichael Jones // If the algorithm succeeded the error will be 0, else it will be a 720cb3c41c2SMichael Jones // non-zero number. 721cb3c41c2SMichael Jones if (opt_output.has_value()) { 722cb3c41c2SMichael Jones return {opt_output.value(), 0}; 72362c187cbSMichael Jones } 72462c187cbSMichael Jones } 725c3228714SGuillaume Chatelet #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH 72687c01607SMichael Jones 727c3228714SGuillaume Chatelet #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE 72887c01607SMichael Jones // Try Eisel-Lemire 729cb3c41c2SMichael Jones opt_output = eisel_lemire<T>(init_num, round); 730cb3c41c2SMichael Jones if (opt_output.has_value()) { 73187c01607SMichael Jones if (!truncated) { 732cb3c41c2SMichael Jones return {opt_output.value(), 0}; 73387c01607SMichael Jones } 73487c01607SMichael Jones // If the mantissa is truncated, then the result may be off by the LSB, so 73587c01607SMichael Jones // check if rounding the mantissa up changes the result. If not, then it's 73687c01607SMichael Jones // safe, else use the fallback. 737900be901SVictor Toni auto second_output = eisel_lemire<T>({mantissa + 1, exp10}, round); 738900be901SVictor Toni if (second_output.has_value()) { 739900be901SVictor Toni if (opt_output->mantissa == second_output->mantissa && 740900be901SVictor Toni opt_output->exponent == second_output->exponent) { 741cb3c41c2SMichael Jones return {opt_output.value(), 0}; 74287c01607SMichael Jones } 74387c01607SMichael Jones } 74487c01607SMichael Jones } 745c3228714SGuillaume Chatelet #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE 74687c01607SMichael Jones 747c3228714SGuillaume Chatelet #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION 748d34b3c9cSMichael Jones output = simple_decimal_conversion<T>(numStart, num_len, round); 749c3228714SGuillaume Chatelet #else 750c3228714SGuillaume Chatelet #warning "Simple decimal conversion is disabled, result may not be correct." 751c3228714SGuillaume Chatelet #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION 75287c01607SMichael Jones 753cb3c41c2SMichael Jones return output; 75487c01607SMichael Jones } 75587c01607SMichael Jones 7566c4267fbSMichael Jones // ----------------------------------------------------------------------------- 7576c4267fbSMichael Jones // **** WARNING **** 7586c4267fbSMichael Jones // This interface is shared with libc++, if you change this interface you need 7596c4267fbSMichael Jones // to update it in both libc and libc++. 7606c4267fbSMichael Jones // ----------------------------------------------------------------------------- 7618298424cSMichael Jones // Takes a mantissa and base 2 exponent and converts it into its closest 7628298424cSMichael Jones // floating point type T equivalient. Since the exponent is already in the right 7638298424cSMichael Jones // form, this is mostly just shifting and rounding. This is used for hexadecimal 7648298424cSMichael Jones // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. 7658298424cSMichael Jones template <class T> 766cb3c41c2SMichael Jones LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num, 767201cc2d8STue Ly bool truncated, 768cb3c41c2SMichael Jones RoundDirection round) { 769c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 7703546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 7718298424cSMichael Jones 7723546f4daSGuillaume Chatelet StorageType mantissa = init_num.mantissa; 773cb3c41c2SMichael Jones int32_t exp2 = init_num.exponent; 774cb3c41c2SMichael Jones 775cb3c41c2SMichael Jones FloatConvertReturn<T> output; 776cb3c41c2SMichael Jones 7778298424cSMichael Jones // This is the number of leading zeroes a properly normalized float of type T 7788298424cSMichael Jones // should have. 779c09e6905SGuillaume Chatelet constexpr int32_t INF_EXP = (1 << FPBits::EXP_LEN) - 1; 7808298424cSMichael Jones 7813546f4daSGuillaume Chatelet // Normalization step 1: Bring the leading bit to the highest bit of 7823546f4daSGuillaume Chatelet // StorageType. 7833546f4daSGuillaume Chatelet uint32_t amount_to_shift_left = cpp::countl_zero<StorageType>(mantissa); 7841c92911eSMichael Jones mantissa <<= amount_to_shift_left; 7858298424cSMichael Jones 7863546f4daSGuillaume Chatelet // Keep exp2 representing the exponent of the lowest bit of StorageType. 7871c92911eSMichael Jones exp2 -= amount_to_shift_left; 7888298424cSMichael Jones 789f22a65c1SGuillaume Chatelet // biased_exponent represents the biased exponent of the most significant bit. 790c09e6905SGuillaume Chatelet int32_t biased_exponent = exp2 + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - 1; 7918298424cSMichael Jones 792201cc2d8STue Ly // Handle numbers that're too large and get squashed to inf 7931c92911eSMichael Jones if (biased_exponent >= INF_EXP) { 7948298424cSMichael Jones // This indicates an overflow, so we make the result INF and set errno. 795c09e6905SGuillaume Chatelet output.num = {0, (1 << FPBits::EXP_LEN) - 1}; 796cb3c41c2SMichael Jones output.error = ERANGE; 797cb3c41c2SMichael Jones return output; 798201cc2d8STue Ly } 799201cc2d8STue Ly 800f22a65c1SGuillaume Chatelet uint32_t amount_to_shift_right = 801c09e6905SGuillaume Chatelet FPBits::STORAGE_LEN - FPBits::FRACTION_LEN - 1; 802201cc2d8STue Ly 803201cc2d8STue Ly // Handle subnormals. 8041c92911eSMichael Jones if (biased_exponent <= 0) { 8051c92911eSMichael Jones amount_to_shift_right += 1 - biased_exponent; 8061c92911eSMichael Jones biased_exponent = 0; 807201cc2d8STue Ly 808c09e6905SGuillaume Chatelet if (amount_to_shift_right > FPBits::STORAGE_LEN) { 809201cc2d8STue Ly // Return 0 if the exponent is too small. 810cb3c41c2SMichael Jones output.num = {0, 0}; 811cb3c41c2SMichael Jones output.error = ERANGE; 812cb3c41c2SMichael Jones return output; 813201cc2d8STue Ly } 814201cc2d8STue Ly } 815201cc2d8STue Ly 8163546f4daSGuillaume Chatelet StorageType round_bit_mask = StorageType(1) << (amount_to_shift_right - 1); 8173546f4daSGuillaume Chatelet StorageType sticky_mask = round_bit_mask - 1; 8181557256aSTue Ly bool round_bit = static_cast<bool>(mantissa & round_bit_mask); 8191c92911eSMichael Jones bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated; 820201cc2d8STue Ly 821c09e6905SGuillaume Chatelet if (amount_to_shift_right < FPBits::STORAGE_LEN) { 822201cc2d8STue Ly // Shift the mantissa and clear the implicit bit. 8231c92911eSMichael Jones mantissa >>= amount_to_shift_right; 824c09e6905SGuillaume Chatelet mantissa &= FPBits::FRACTION_MASK; 825201cc2d8STue Ly } else { 8268298424cSMichael Jones mantissa = 0; 827201cc2d8STue Ly } 8283546f4daSGuillaume Chatelet bool least_significant_bit = static_cast<bool>(mantissa & StorageType(1)); 829cb3c41c2SMichael Jones 830cb3c41c2SMichael Jones // TODO: check that this rounding behavior is correct. 831cb3c41c2SMichael Jones 832cb3c41c2SMichael Jones if (round == RoundDirection::Nearest) { 833201cc2d8STue Ly // Perform rounding-to-nearest, tie-to-even. 8341c92911eSMichael Jones if (round_bit && (least_significant_bit || sticky_bit)) { 835201cc2d8STue Ly ++mantissa; 836201cc2d8STue Ly } 837cb3c41c2SMichael Jones } else if (round == RoundDirection::Up) { 838cb3c41c2SMichael Jones if (round_bit || sticky_bit) { 839cb3c41c2SMichael Jones ++mantissa; 840cb3c41c2SMichael Jones } 841cb3c41c2SMichael Jones } else /* (round == RoundDirection::Down)*/ { 842cb3c41c2SMichael Jones if (round_bit && sticky_bit) { 843cb3c41c2SMichael Jones ++mantissa; 844cb3c41c2SMichael Jones } 845cb3c41c2SMichael Jones } 846201cc2d8STue Ly 847c09e6905SGuillaume Chatelet if (mantissa > FPBits::FRACTION_MASK) { 848201cc2d8STue Ly // Rounding causes the exponent to increase. 8491c92911eSMichael Jones ++biased_exponent; 850201cc2d8STue Ly 8511c92911eSMichael Jones if (biased_exponent == INF_EXP) { 852cb3c41c2SMichael Jones output.error = ERANGE; 8538298424cSMichael Jones } 854201cc2d8STue Ly } 855201cc2d8STue Ly 8561c92911eSMichael Jones if (biased_exponent == 0) { 857cb3c41c2SMichael Jones output.error = ERANGE; 858201cc2d8STue Ly } 859201cc2d8STue Ly 860c09e6905SGuillaume Chatelet output.num = {mantissa & FPBits::FRACTION_MASK, biased_exponent}; 861cb3c41c2SMichael Jones return output; 8628298424cSMichael Jones } 8638298424cSMichael Jones 86487c01607SMichael Jones // checks if the next 4 characters of the string pointer are the start of a 86587c01607SMichael Jones // hexadecimal floating point number. Does not advance the string pointer. 86659c809cdSSiva Chandra Reddy LIBC_INLINE bool is_float_hex_start(const char *__restrict src, 86787c01607SMichael Jones const char decimalPoint) { 868cb3c41c2SMichael Jones if (!(src[0] == '0' && tolower(src[1]) == 'x')) { 86987c01607SMichael Jones return false; 87087c01607SMichael Jones } 871cb3c41c2SMichael Jones size_t first_digit = 2; 872cb3c41c2SMichael Jones if (src[2] == decimalPoint) { 873cb3c41c2SMichael Jones ++first_digit; 87487c01607SMichael Jones } 875cb3c41c2SMichael Jones return isalnum(src[first_digit]) && b36_char_to_int(src[first_digit]) < 16; 87687c01607SMichael Jones } 87787c01607SMichael Jones 8788298424cSMichael Jones // Takes the start of a string representing a decimal float, as well as the 8798298424cSMichael Jones // local decimalPoint. It returns if it suceeded in parsing any digits, and if 8808298424cSMichael Jones // the return value is true then the outputs are pointer to the end of the 8818298424cSMichael Jones // number, and the mantissa and exponent for the closest float T representation. 8828298424cSMichael Jones // If the return value is false, then it is assumed that there is no number 8838298424cSMichael Jones // here. 8848298424cSMichael Jones template <class T> 885cb3c41c2SMichael Jones LIBC_INLINE StrToNumResult<ExpandedFloat<T>> 8861c92911eSMichael Jones decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, 887cb3c41c2SMichael Jones RoundDirection round) { 888c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 8893546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 890c703e657SGuillaume Chatelet 8918298424cSMichael Jones constexpr uint32_t BASE = 10; 8928298424cSMichael Jones constexpr char EXPONENT_MARKER = 'e'; 8938298424cSMichael Jones 8948298424cSMichael Jones bool truncated = false; 8951c92911eSMichael Jones bool seen_digit = false; 8961c92911eSMichael Jones bool after_decimal = false; 8973546f4daSGuillaume Chatelet StorageType mantissa = 0; 8988298424cSMichael Jones int32_t exponent = 0; 8998298424cSMichael Jones 900cb3c41c2SMichael Jones size_t index = 0; 901cb3c41c2SMichael Jones 902cb3c41c2SMichael Jones StrToNumResult<ExpandedFloat<T>> output({0, 0}); 903cb3c41c2SMichael Jones 9048298424cSMichael Jones // The goal for the first step of parsing is to convert the number in src to 9058298424cSMichael Jones // the format mantissa * (base ^ exponent) 9068298424cSMichael Jones 907499ca806STue Ly // The loop fills the mantissa with as many digits as it can hold 9083546f4daSGuillaume Chatelet const StorageType bitstype_max_div_by_base = 9093546f4daSGuillaume Chatelet cpp::numeric_limits<StorageType>::max() / BASE; 910499ca806STue Ly while (true) { 911cb3c41c2SMichael Jones if (isdigit(src[index])) { 912*a0c4f854SMichael Jones uint32_t digit = b36_char_to_int(src[index]); 9131c92911eSMichael Jones seen_digit = true; 914499ca806STue Ly 9151c92911eSMichael Jones if (mantissa < bitstype_max_div_by_base) { 916499ca806STue Ly mantissa = (mantissa * BASE) + digit; 9171c92911eSMichael Jones if (after_decimal) { 918499ca806STue Ly --exponent; 919499ca806STue Ly } 920499ca806STue Ly } else { 921499ca806STue Ly if (digit > 0) 922499ca806STue Ly truncated = true; 9231c92911eSMichael Jones if (!after_decimal) 924499ca806STue Ly ++exponent; 925499ca806STue Ly } 926499ca806STue Ly 927cb3c41c2SMichael Jones ++index; 928499ca806STue Ly continue; 929499ca806STue Ly } 930cb3c41c2SMichael Jones if (src[index] == DECIMAL_POINT) { 9311c92911eSMichael Jones if (after_decimal) { 932cb3c41c2SMichael Jones break; // this means that src[index] points to a second decimal point, 933cb3c41c2SMichael Jones // ending the number. 934499ca806STue Ly } 9351c92911eSMichael Jones after_decimal = true; 936cb3c41c2SMichael Jones ++index; 9378298424cSMichael Jones continue; 9388298424cSMichael Jones } 939499ca806STue Ly // The character is neither a digit nor a decimal point. 940499ca806STue Ly break; 9418298424cSMichael Jones } 9428298424cSMichael Jones 9431c92911eSMichael Jones if (!seen_digit) 944cb3c41c2SMichael Jones return output; 9458298424cSMichael Jones 946d34b3c9cSMichael Jones // TODO: When adding max length argument, handle the case of a trailing 947d34b3c9cSMichael Jones // EXPONENT MARKER, see scanf for more details. 948cb3c41c2SMichael Jones if (tolower(src[index]) == EXPONENT_MARKER) { 949ae3b59e6SMichael Jones bool has_sign = false; 950ae3b59e6SMichael Jones if (src[index + 1] == '+' || src[index + 1] == '-') { 951ae3b59e6SMichael Jones has_sign = true; 952ae3b59e6SMichael Jones } 953ae3b59e6SMichael Jones if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) { 954cb3c41c2SMichael Jones ++index; 955cb3c41c2SMichael Jones auto result = strtointeger<int32_t>(src + index, 10); 956cb3c41c2SMichael Jones if (result.has_error()) 957cb3c41c2SMichael Jones output.error = result.error; 95874da5e6cSMichael Jones int32_t add_to_exponent = result.value; 959cb3c41c2SMichael Jones index += result.parsed_len; 960cc65ecfdSMichael Jones 961cc65ecfdSMichael Jones // Here we do this operation as int64 to avoid overflow. 962cc65ecfdSMichael Jones int64_t temp_exponent = static_cast<int64_t>(exponent) + 963cc65ecfdSMichael Jones static_cast<int64_t>(add_to_exponent); 964cc65ecfdSMichael Jones 965cc65ecfdSMichael Jones // If the result is in the valid range, then we use it. The valid range is 966cc65ecfdSMichael Jones // also within the int32 range, so this prevents overflow issues. 9673ae5a9b6SGuillaume Chatelet if (temp_exponent > FPBits::MAX_BIASED_EXPONENT) { 9683ae5a9b6SGuillaume Chatelet exponent = FPBits::MAX_BIASED_EXPONENT; 9693ae5a9b6SGuillaume Chatelet } else if (temp_exponent < -FPBits::MAX_BIASED_EXPONENT) { 9703ae5a9b6SGuillaume Chatelet exponent = -FPBits::MAX_BIASED_EXPONENT; 9713d953234SMichael Jones } else { 9723d953234SMichael Jones exponent = static_cast<int32_t>(temp_exponent); 973cc65ecfdSMichael Jones } 9748298424cSMichael Jones } 9758298424cSMichael Jones } 9768298424cSMichael Jones 977cb3c41c2SMichael Jones output.parsed_len = index; 9788298424cSMichael Jones if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 979cb3c41c2SMichael Jones output.value = {0, 0}; 9808298424cSMichael Jones } else { 981cb3c41c2SMichael Jones auto temp = 982d34b3c9cSMichael Jones decimal_exp_to_float<T>({mantissa, exponent}, truncated, round, src); 983cb3c41c2SMichael Jones output.value = temp.num; 984cb3c41c2SMichael Jones output.error = temp.error; 9858298424cSMichael Jones } 986cb3c41c2SMichael Jones return output; 9878298424cSMichael Jones } 9888298424cSMichael Jones 9898298424cSMichael Jones // Takes the start of a string representing a hexadecimal float, as well as the 9908298424cSMichael Jones // local decimal point. It returns if it suceeded in parsing any digits, and if 9918298424cSMichael Jones // the return value is true then the outputs are pointer to the end of the 9928298424cSMichael Jones // number, and the mantissa and exponent for the closest float T representation. 9938298424cSMichael Jones // If the return value is false, then it is assumed that there is no number 9948298424cSMichael Jones // here. 9958298424cSMichael Jones template <class T> 996cb3c41c2SMichael Jones LIBC_INLINE StrToNumResult<ExpandedFloat<T>> 997cb3c41c2SMichael Jones hexadecimal_string_to_float(const char *__restrict src, 998cb3c41c2SMichael Jones const char DECIMAL_POINT, RoundDirection round) { 999c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 10003546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 1001c703e657SGuillaume Chatelet 10028298424cSMichael Jones constexpr uint32_t BASE = 16; 10038298424cSMichael Jones constexpr char EXPONENT_MARKER = 'p'; 10048298424cSMichael Jones 10058298424cSMichael Jones bool truncated = false; 10061c92911eSMichael Jones bool seen_digit = false; 10071c92911eSMichael Jones bool after_decimal = false; 10083546f4daSGuillaume Chatelet StorageType mantissa = 0; 10098298424cSMichael Jones int32_t exponent = 0; 10108298424cSMichael Jones 1011cb3c41c2SMichael Jones size_t index = 0; 1012cb3c41c2SMichael Jones 1013cb3c41c2SMichael Jones StrToNumResult<ExpandedFloat<T>> output({0, 0}); 1014cb3c41c2SMichael Jones 10158298424cSMichael Jones // The goal for the first step of parsing is to convert the number in src to 10168298424cSMichael Jones // the format mantissa * (base ^ exponent) 10178298424cSMichael Jones 1018499ca806STue Ly // The loop fills the mantissa with as many digits as it can hold 10193546f4daSGuillaume Chatelet const StorageType bitstype_max_div_by_base = 10203546f4daSGuillaume Chatelet cpp::numeric_limits<StorageType>::max() / BASE; 1021499ca806STue Ly while (true) { 1022cb3c41c2SMichael Jones if (isalnum(src[index])) { 1023cb3c41c2SMichael Jones uint32_t digit = b36_char_to_int(src[index]); 1024201cc2d8STue Ly if (digit < BASE) 10251c92911eSMichael Jones seen_digit = true; 1026201cc2d8STue Ly else 1027201cc2d8STue Ly break; 1028499ca806STue Ly 10291c92911eSMichael Jones if (mantissa < bitstype_max_div_by_base) { 1030499ca806STue Ly mantissa = (mantissa * BASE) + digit; 10311c92911eSMichael Jones if (after_decimal) 1032499ca806STue Ly --exponent; 1033499ca806STue Ly } else { 1034499ca806STue Ly if (digit > 0) 1035499ca806STue Ly truncated = true; 10361c92911eSMichael Jones if (!after_decimal) 1037499ca806STue Ly ++exponent; 1038499ca806STue Ly } 1039cb3c41c2SMichael Jones ++index; 1040499ca806STue Ly continue; 1041499ca806STue Ly } 1042cb3c41c2SMichael Jones if (src[index] == DECIMAL_POINT) { 10431c92911eSMichael Jones if (after_decimal) { 1044cb3c41c2SMichael Jones break; // this means that src[index] points to a second decimal point, 1045cb3c41c2SMichael Jones // ending the number. 1046499ca806STue Ly } 10471c92911eSMichael Jones after_decimal = true; 1048cb3c41c2SMichael Jones ++index; 10498298424cSMichael Jones continue; 10508298424cSMichael Jones } 1051499ca806STue Ly // The character is neither a hexadecimal digit nor a decimal point. 10528298424cSMichael Jones break; 10538298424cSMichael Jones } 10548298424cSMichael Jones 10551c92911eSMichael Jones if (!seen_digit) 1056cb3c41c2SMichael Jones return output; 10578298424cSMichael Jones 10588298424cSMichael Jones // Convert the exponent from having a base of 16 to having a base of 2. 10598298424cSMichael Jones exponent *= 4; 10608298424cSMichael Jones 1061cb3c41c2SMichael Jones if (tolower(src[index]) == EXPONENT_MARKER) { 1062ae3b59e6SMichael Jones bool has_sign = false; 1063ae3b59e6SMichael Jones if (src[index + 1] == '+' || src[index + 1] == '-') { 1064ae3b59e6SMichael Jones has_sign = true; 1065ae3b59e6SMichael Jones } 1066ae3b59e6SMichael Jones if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) { 1067cb3c41c2SMichael Jones ++index; 1068cb3c41c2SMichael Jones auto result = strtointeger<int32_t>(src + index, 10); 1069cb3c41c2SMichael Jones if (result.has_error()) 1070cb3c41c2SMichael Jones output.error = result.error; 1071cb3c41c2SMichael Jones 107274da5e6cSMichael Jones int32_t add_to_exponent = result.value; 1073cb3c41c2SMichael Jones index += result.parsed_len; 1074cc65ecfdSMichael Jones 1075cc65ecfdSMichael Jones // Here we do this operation as int64 to avoid overflow. 1076cc65ecfdSMichael Jones int64_t temp_exponent = static_cast<int64_t>(exponent) + 1077cc65ecfdSMichael Jones static_cast<int64_t>(add_to_exponent); 1078cc65ecfdSMichael Jones 1079cc65ecfdSMichael Jones // If the result is in the valid range, then we use it. The valid range is 1080cc65ecfdSMichael Jones // also within the int32 range, so this prevents overflow issues. 10813ae5a9b6SGuillaume Chatelet if (temp_exponent > FPBits::MAX_BIASED_EXPONENT) { 10823ae5a9b6SGuillaume Chatelet exponent = FPBits::MAX_BIASED_EXPONENT; 10833ae5a9b6SGuillaume Chatelet } else if (temp_exponent < -FPBits::MAX_BIASED_EXPONENT) { 10843ae5a9b6SGuillaume Chatelet exponent = -FPBits::MAX_BIASED_EXPONENT; 1085b5f0a876SMichael Jones } else { 1086b5f0a876SMichael Jones exponent = static_cast<int32_t>(temp_exponent); 1087cc65ecfdSMichael Jones } 10888298424cSMichael Jones } 10898298424cSMichael Jones } 1090cb3c41c2SMichael Jones output.parsed_len = index; 10918298424cSMichael Jones if (mantissa == 0) { // if we have a 0, then also 0 the exponent. 1092cb3c41c2SMichael Jones output.value.exponent = 0; 1093cb3c41c2SMichael Jones output.value.mantissa = 0; 10948298424cSMichael Jones } else { 1095cb3c41c2SMichael Jones auto temp = binary_exp_to_float<T>({mantissa, exponent}, truncated, round); 1096cb3c41c2SMichael Jones output.error = temp.error; 1097cb3c41c2SMichael Jones output.value = temp.num; 10988298424cSMichael Jones } 1099cb3c41c2SMichael Jones return output; 11008298424cSMichael Jones } 11018298424cSMichael Jones 1102b43965adSMichael Flanders template <class T> 1103b43965adSMichael Flanders LIBC_INLINE typename fputil::FPBits<T>::StorageType 11040504e932SNishant Mittal nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) { 1105b43965adSMichael Flanders using FPBits = typename fputil::FPBits<T>; 1106b43965adSMichael Flanders using StorageType = typename FPBits::StorageType; 1107b43965adSMichael Flanders 1108b43965adSMichael Flanders StorageType nan_mantissa = 0; 11090504e932SNishant Mittal 11100504e932SNishant Mittal if (ncharseq.data() != nullptr && isdigit(ncharseq[0])) { 1111b43965adSMichael Flanders StrToNumResult<StorageType> strtoint_result = 1112b43965adSMichael Flanders strtointeger<StorageType>(ncharseq.data(), 0); 11130504e932SNishant Mittal if (!strtoint_result.has_error()) 11140504e932SNishant Mittal nan_mantissa = strtoint_result.value; 11150504e932SNishant Mittal 11160504e932SNishant Mittal if (strtoint_result.parsed_len != static_cast<ptrdiff_t>(ncharseq.size())) 11170504e932SNishant Mittal nan_mantissa = 0; 11180504e932SNishant Mittal } 11190504e932SNishant Mittal 11200504e932SNishant Mittal return nan_mantissa; 11210504e932SNishant Mittal } 11220504e932SNishant Mittal 112387c01607SMichael Jones // Takes a pointer to a string and a pointer to a string pointer. This function 112487c01607SMichael Jones // is used as the backend for all of the string to float functions. 1125d34b3c9cSMichael Jones // TODO: Add src_len member to match strtointeger. 1126d34b3c9cSMichael Jones // TODO: Next, move from char* and length to string_view 112787c01607SMichael Jones template <class T> 1128cb3c41c2SMichael Jones LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) { 1129c703e657SGuillaume Chatelet using FPBits = typename fputil::FPBits<T>; 11303546f4daSGuillaume Chatelet using StorageType = typename FPBits::StorageType; 1131c703e657SGuillaume Chatelet 1132c703e657SGuillaume Chatelet FPBits result = FPBits(); 11331c92911eSMichael Jones bool seen_digit = false; 1134cb3c41c2SMichael Jones char sign = '+'; 113587c01607SMichael Jones 1136cb3c41c2SMichael Jones int error = 0; 1137cb3c41c2SMichael Jones 1138cb3c41c2SMichael Jones ptrdiff_t index = first_non_whitespace(src) - src; 1139cb3c41c2SMichael Jones 1140cb3c41c2SMichael Jones if (src[index] == '+' || src[index] == '-') { 1141cb3c41c2SMichael Jones sign = src[index]; 1142cb3c41c2SMichael Jones ++index; 114387c01607SMichael Jones } 1144cb3c41c2SMichael Jones 1145cb3c41c2SMichael Jones if (sign == '-') { 114611ec512fSGuillaume Chatelet result.set_sign(Sign::NEG); 114787c01607SMichael Jones } 114887c01607SMichael Jones 114987c01607SMichael Jones static constexpr char DECIMAL_POINT = '.'; 11501c92911eSMichael Jones static const char *inf_string = "infinity"; 11511c92911eSMichael Jones static const char *nan_string = "nan"; 115287c01607SMichael Jones 1153cb3c41c2SMichael Jones if (isdigit(src[index]) || src[index] == DECIMAL_POINT) { // regular number 115487c01607SMichael Jones int base = 10; 1155cb3c41c2SMichael Jones if (is_float_hex_start(src + index, DECIMAL_POINT)) { 115687c01607SMichael Jones base = 16; 1157cb3c41c2SMichael Jones index += 2; 11581c92911eSMichael Jones seen_digit = true; 115987c01607SMichael Jones } 116087c01607SMichael Jones 1161cb3c41c2SMichael Jones RoundDirection round_direction = RoundDirection::Nearest; 1162cb3c41c2SMichael Jones 1163a9824312STue Ly switch (fputil::quick_get_round()) { 1164cb3c41c2SMichael Jones case FE_TONEAREST: 1165cb3c41c2SMichael Jones round_direction = RoundDirection::Nearest; 1166cb3c41c2SMichael Jones break; 1167cb3c41c2SMichael Jones case FE_UPWARD: 1168cb3c41c2SMichael Jones if (sign == '+') { 1169cb3c41c2SMichael Jones round_direction = RoundDirection::Up; 1170cb3c41c2SMichael Jones } else { 1171cb3c41c2SMichael Jones round_direction = RoundDirection::Down; 1172cb3c41c2SMichael Jones } 1173cb3c41c2SMichael Jones break; 1174cb3c41c2SMichael Jones case FE_DOWNWARD: 1175cb3c41c2SMichael Jones if (sign == '+') { 1176cb3c41c2SMichael Jones round_direction = RoundDirection::Down; 1177cb3c41c2SMichael Jones } else { 1178cb3c41c2SMichael Jones round_direction = RoundDirection::Up; 1179cb3c41c2SMichael Jones } 1180cb3c41c2SMichael Jones break; 1181cb3c41c2SMichael Jones case FE_TOWARDZERO: 1182cb3c41c2SMichael Jones round_direction = RoundDirection::Down; 1183cb3c41c2SMichael Jones break; 1184cb3c41c2SMichael Jones } 1185cb3c41c2SMichael Jones 1186cb3c41c2SMichael Jones StrToNumResult<ExpandedFloat<T>> parse_result({0, 0}); 11878298424cSMichael Jones if (base == 16) { 1188cb3c41c2SMichael Jones parse_result = hexadecimal_string_to_float<T>(src + index, DECIMAL_POINT, 1189cb3c41c2SMichael Jones round_direction); 11908298424cSMichael Jones } else { // base is 10 1191cb3c41c2SMichael Jones parse_result = decimal_string_to_float<T>(src + index, DECIMAL_POINT, 1192cb3c41c2SMichael Jones round_direction); 11938298424cSMichael Jones } 1194cb3c41c2SMichael Jones seen_digit = parse_result.parsed_len != 0; 1195cb3c41c2SMichael Jones result.set_mantissa(parse_result.value.mantissa); 11967b387d27SGuillaume Chatelet result.set_biased_exponent(parse_result.value.exponent); 1197cb3c41c2SMichael Jones index += parse_result.parsed_len; 1198cb3c41c2SMichael Jones error = parse_result.error; 1199cb3c41c2SMichael Jones } else if (tolower(src[index]) == 'n') { // NaN 1200cb3c41c2SMichael Jones if (tolower(src[index + 1]) == nan_string[1] && 1201cb3c41c2SMichael Jones tolower(src[index + 2]) == nan_string[2]) { 12021c92911eSMichael Jones seen_digit = true; 1203cb3c41c2SMichael Jones index += 3; 12043546f4daSGuillaume Chatelet StorageType nan_mantissa = 0; 120547d0c83eSMichael Jones // this handles the case of `NaN(n-character-sequence)`, where the 12061cbd25f8SMark de Wever // n-character-sequence is made of 0 or more letters, numbers, or 12071cbd25f8SMark de Wever // underscore characters in any order. 1208cb3c41c2SMichael Jones if (src[index] == '(') { 1209cb3c41c2SMichael Jones size_t left_paren = index; 1210cb3c41c2SMichael Jones ++index; 1211ad844632SMichael Jones while (isalnum(src[index]) || src[index] == '_') 1212cb3c41c2SMichael Jones ++index; 1213cb3c41c2SMichael Jones if (src[index] == ')') { 1214cb3c41c2SMichael Jones ++index; 1215b43965adSMichael Flanders nan_mantissa = nan_mantissa_from_ncharseq<T>( 12160504e932SNishant Mittal cpp::string_view(src + (left_paren + 1), index - left_paren - 2)); 1217cb3c41c2SMichael Jones } else { 1218cb3c41c2SMichael Jones index = left_paren; 1219cb3c41c2SMichael Jones } 122087c01607SMichael Jones } 1221ace383dfSGuillaume Chatelet result = FPBits(result.quiet_nan(result.sign(), nan_mantissa)); 122287c01607SMichael Jones } 1223cb3c41c2SMichael Jones } else if (tolower(src[index]) == 'i') { // INF 1224cb3c41c2SMichael Jones if (tolower(src[index + 1]) == inf_string[1] && 1225cb3c41c2SMichael Jones tolower(src[index + 2]) == inf_string[2]) { 12261c92911eSMichael Jones seen_digit = true; 122711ec512fSGuillaume Chatelet result = FPBits(result.inf(result.sign())); 1228cb3c41c2SMichael Jones if (tolower(src[index + 3]) == inf_string[3] && 1229cb3c41c2SMichael Jones tolower(src[index + 4]) == inf_string[4] && 1230cb3c41c2SMichael Jones tolower(src[index + 5]) == inf_string[5] && 1231cb3c41c2SMichael Jones tolower(src[index + 6]) == inf_string[6] && 1232cb3c41c2SMichael Jones tolower(src[index + 7]) == inf_string[7]) { 1233cb3c41c2SMichael Jones // if the string is "INFINITY" then consume 8 characters. 1234cb3c41c2SMichael Jones index += 8; 123587c01607SMichael Jones } else { 1236cb3c41c2SMichael Jones index += 3; 123787c01607SMichael Jones } 123887c01607SMichael Jones } 123987c01607SMichael Jones } 12401c92911eSMichael Jones if (!seen_digit) { // If there is nothing to actually parse, then return 0. 1241cb3c41c2SMichael Jones return {T(0), 0, error}; 124287c01607SMichael Jones } 124387c01607SMichael Jones 1244aa1902f9SMichael Jones // This function only does something if T is long double and the platform uses 1245aa1902f9SMichael Jones // special 80 bit long doubles. Otherwise it should be inlined out. 1246aa1902f9SMichael Jones set_implicit_bit<T>(result); 1247aa1902f9SMichael Jones 12482856db0dSGuillaume Chatelet return {result.get_val(), index, error}; 124987c01607SMichael Jones } 125087c01607SMichael Jones 12510504e932SNishant Mittal template <class T> LIBC_INLINE StrToNumResult<T> strtonan(const char *arg) { 12520504e932SNishant Mittal using FPBits = typename fputil::FPBits<T>; 12530504e932SNishant Mittal using StorageType = typename FPBits::StorageType; 12540504e932SNishant Mittal 12551896ee38Slntue LIBC_CRASH_ON_NULLPTR(arg); 12561896ee38Slntue 12570504e932SNishant Mittal FPBits result; 12580504e932SNishant Mittal int error = 0; 12590504e932SNishant Mittal StorageType nan_mantissa = 0; 12600504e932SNishant Mittal 12610504e932SNishant Mittal ptrdiff_t index = 0; 12620504e932SNishant Mittal while (isalnum(arg[index]) || arg[index] == '_') 12630504e932SNishant Mittal ++index; 12640504e932SNishant Mittal 1265b43965adSMichael Flanders if (arg[index] == '\0') 1266b43965adSMichael Flanders nan_mantissa = nan_mantissa_from_ncharseq<T>(cpp::string_view(arg, index)); 12670504e932SNishant Mittal 12682137894aSGuillaume Chatelet result = FPBits::quiet_nan(Sign::POS, nan_mantissa); 12692856db0dSGuillaume Chatelet return {result.get_val(), 0, error}; 12700504e932SNishant Mittal } 12710504e932SNishant Mittal 127287c01607SMichael Jones } // namespace internal 12735ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 127487c01607SMichael Jones 1275270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H 1276