1 //===-- Utilities to convert floating point values to string ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H 10 #define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H 11 12 #include <stdint.h> 13 14 #include "src/__support/CPP/limits.h" 15 #include "src/__support/CPP/type_traits.h" 16 #include "src/__support/FPUtil/FPBits.h" 17 #include "src/__support/FPUtil/dyadic_float.h" 18 #include "src/__support/big_int.h" 19 #include "src/__support/common.h" 20 #include "src/__support/libc_assert.h" 21 #include "src/__support/macros/attributes.h" 22 #include "src/__support/macros/config.h" 23 #include "src/__support/sign.h" 24 25 // This file has 5 compile-time flags to allow the user to configure the float 26 // to string behavior. These were used to explore tradeoffs during the design 27 // phase, and can still be used to gain specific properties. Unless you 28 // specifically know what you're doing, you should leave all these flags off. 29 30 // LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD 31 // This flag disables the separate long double conversion implementation. It is 32 // not based on the Ryu algorithm, instead generating the digits by 33 // multiplying/dividing the written-out number by 10^9 to get blocks. It's 34 // significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE, 35 // and is small in binary size. Its downside is that it always calculates all 36 // of the digits above the decimal point, making it inefficient for %e calls 37 // with large exponents. This specialization overrides other flags, so this 38 // flag must be set for other flags to effect the long double behavior. 39 40 // LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE 41 // The Mega Table is ~5 megabytes when compiled. It lists the constants needed 42 // to perform the Ryu Printf algorithm (described below) for all long double 43 // values. This makes it extremely fast for both doubles and long doubles, in 44 // exchange for large binary size. 45 46 // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT 47 // Dyadic floats are software floating point numbers, and their accuracy can be 48 // as high as necessary. This option uses 256 bit dyadic floats to calculate 49 // the table values that Ryu Printf needs. This is reasonably fast and very 50 // small compared to the Mega Table, but the 256 bit floats only give accurate 51 // results for the first ~50 digits of the output. In practice this shouldn't 52 // be a problem since long doubles are only accurate for ~35 digits, but the 53 // trailing values all being 0s may cause brittle tests to fail. 54 55 // LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC 56 // Integer Calculation uses wide integers to do the calculations for the Ryu 57 // Printf table, which is just as accurate as the Mega Table without requiring 58 // as much code size. These integers can be very large (~32KB at max, though 59 // always on the stack) to handle the edges of the long double range. They are 60 // also very slow, taking multiple seconds on a powerful CPU to calculate the 61 // values at the end of the range. If no flag is set, this is used for long 62 // doubles, the flag only changes the double behavior. 63 64 // LIBC_COPT_FLOAT_TO_STR_NO_TABLE 65 // This flag doesn't change the actual calculation method, instead it is used 66 // to disable the normal Ryu Printf table for configurations that don't use any 67 // table at all. 68 69 // Default Config: 70 // If no flags are set, doubles use the normal (and much more reasonably sized) 71 // Ryu Printf table and long doubles use their specialized implementation. This 72 // provides good performance and binary size. 73 74 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE 75 #include "src/__support/ryu_long_double_constants.h" 76 #elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE) 77 #include "src/__support/ryu_constants.h" 78 #else 79 constexpr size_t IDX_SIZE = 1; 80 constexpr size_t MID_INT_SIZE = 192; 81 #endif 82 83 // This implementation is based on the Ryu Printf algorithm by Ulf Adams: 84 // Ulf Adams. 2019. Ryū revisited: printf floating point conversion. 85 // Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages. 86 // https://doi.org/10.1145/3360595 87 88 // This version is modified to require significantly less memory (it doesn't use 89 // a large buffer to store the result). 90 91 // The general concept of this algorithm is as follows: 92 // We want to calculate a 9 digit segment of a floating point number using this 93 // formula: floor((mantissa * 2^exponent)/10^i) % 10^9. 94 // To do so normally would involve large integers (~1000 bits for doubles), so 95 // we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a 96 // lookup table. The resulting intermediate value needs to be about 192 bits to 97 // store the result with enough precision. Since this is all being done with 98 // integers for appropriate precision, we would run into a problem if 99 // i > exponent since then 2^exponent / 10^i would be less than 1. To correct 100 // for this, the actual calculation done is 2^(exponent + c) / 10^i, and then 101 // when multiplying by the mantissa we reverse this by dividing by 2^c, like so: 102 // floor((mantissa * table[exponent][i])/(2^c)) % 10^9. 103 // This gives a 9 digit value, which is small enough to fit in a 32 bit integer, 104 // and that integer is converted into a string as normal, and called a block. In 105 // this implementation, the most recent block is buffered, so that if rounding 106 // is necessary the block can be adjusted before being written to the output. 107 // Any block that is all 9s adds one to the max block counter and doesn't clear 108 // the buffer because they can cause the block above them to be rounded up. 109 110 namespace LIBC_NAMESPACE_DECL { 111 112 using BlockInt = uint32_t; 113 constexpr uint32_t BLOCK_SIZE = 9; 114 constexpr uint64_t EXP5_9 = 1953125; 115 constexpr uint64_t EXP10_9 = 1000000000; 116 117 using FPBits = fputil::FPBits<long double>; 118 119 // Larger numbers prefer a slightly larger constant than is used for the smaller 120 // numbers. 121 constexpr size_t CALC_SHIFT_CONST = 128; 122 123 namespace internal { 124 125 // Returns floor(log_10(2^e)); requires 0 <= e <= 42039. 126 LIBC_INLINE constexpr uint32_t log10_pow2(uint64_t e) { 127 LIBC_ASSERT(e <= 42039 && 128 "Incorrect exponent to perform log10_pow2 approximation."); 129 // This approximation is based on the float value for log_10(2). It first 130 // gives an incorrect result for our purposes at 42039 (well beyond the 16383 131 // maximum for long doubles). 132 133 // To get these constants I first evaluated log_10(2) to get an approximation 134 // of 0.301029996. Next I passed that value through a string to double 135 // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent 136 // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional 137 // number). Next I shifted the mantissa right 12 bits to create more space for 138 // the multiplication result, adding 12 to the exponent to compensate. To 139 // check that this approximation works for our purposes I used the following 140 // python code: 141 // for i in range(16384): 142 // if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)): 143 // print(i) 144 // The reason we add 1 is because this evaluation truncates the result, giving 145 // us the floor, whereas counting the digits of the power of 2 gives us the 146 // ceiling. With a similar loop I checked the maximum valid value and found 147 // 42039. 148 return static_cast<uint32_t>((e * 0x13441350fbdll) >> 42); 149 } 150 151 // Same as above, but with different constants. 152 LIBC_INLINE constexpr uint32_t log2_pow5(uint64_t e) { 153 return static_cast<uint32_t>((e * 0x12934f0979bll) >> 39); 154 } 155 156 // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any 157 // power of 2 was also a power of 10, but since that doesn't exist this is 158 // always accurate. This is used to calculate the maximum number of base-10 159 // digits a given e-bit number could have. 160 LIBC_INLINE constexpr uint32_t ceil_log10_pow2(uint32_t e) { 161 return log10_pow2(e) + 1; 162 } 163 164 LIBC_INLINE constexpr uint32_t div_ceil(uint32_t num, uint32_t denom) { 165 return (num + (denom - 1)) / denom; 166 } 167 168 // Returns the maximum number of 9 digit blocks a number described by the given 169 // index (which is ceil(exponent/16)) and mantissa width could need. 170 LIBC_INLINE constexpr uint32_t length_for_num(uint32_t idx, 171 uint32_t mantissa_width) { 172 return div_ceil(ceil_log10_pow2(idx) + ceil_log10_pow2(mantissa_width + 1), 173 BLOCK_SIZE); 174 } 175 176 // The formula for the table when i is positive (or zero) is as follows: 177 // floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1) 178 // Rewritten slightly we get: 179 // floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1) 180 181 // TODO: Fix long doubles (needs bigger table or alternate algorithm.) 182 // Currently the table values are generated, which is very slow. 183 template <size_t INT_SIZE> 184 LIBC_INLINE constexpr UInt<MID_INT_SIZE> get_table_positive(int exponent, 185 size_t i) { 186 // INT_SIZE is the size of int that is used for the internal calculations of 187 // this function. It should be large enough to hold 2^(exponent+constant), so 188 // ~1000 for double and ~16000 for long double. Be warned that the time 189 // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of 190 // bits in the number being exponentiated and m is the exponent. 191 const int shift_amount = 192 static_cast<int>(exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * i)); 193 if (shift_amount < 0) { 194 return 1; 195 } 196 UInt<INT_SIZE> num(0); 197 // MOD_SIZE is one of the limiting factors for how big the constant argument 198 // can get, since it needs to be small enough to fit in the result UInt, 199 // otherwise we'll get truncation on return. 200 constexpr UInt<INT_SIZE> MOD_SIZE = 201 (UInt<INT_SIZE>(EXP10_9) 202 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); 203 204 num = UInt<INT_SIZE>(1) << (shift_amount); 205 if (i > 0) { 206 UInt<INT_SIZE> fives(EXP5_9); 207 fives.pow_n(i); 208 num = num / fives; 209 } 210 211 num = num + 1; 212 if (num > MOD_SIZE) { 213 auto rem = num.div_uint_half_times_pow_2( 214 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) 215 .value(); 216 num = rem; 217 } 218 return num; 219 } 220 221 template <size_t INT_SIZE> 222 LIBC_INLINE UInt<MID_INT_SIZE> get_table_positive_df(int exponent, size_t i) { 223 static_assert(INT_SIZE == 256, 224 "Only 256 is supported as an int size right now."); 225 // This version uses dyadic floats with 256 bit mantissas to perform the same 226 // calculation as above. Due to floating point imprecision it is only accurate 227 // for the first 50 digits, but it's much faster. Since even 128 bit long 228 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are 229 // enough for these floats to be converted back and forth safely. This is 230 // ideal for avoiding the size of the long double table. 231 const int shift_amount = 232 static_cast<int>(exponent + CALC_SHIFT_CONST - (9 * i)); 233 if (shift_amount < 0) { 234 return 1; 235 } 236 fputil::DyadicFloat<INT_SIZE> num(Sign::POS, 0, 1); 237 constexpr UInt<INT_SIZE> MOD_SIZE = 238 (UInt<INT_SIZE>(EXP10_9) 239 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); 240 241 constexpr UInt<INT_SIZE> FIVE_EXP_MINUS_NINE_MANT{ 242 {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030, 243 0x89705f4136b4a597}}; 244 245 static const fputil::DyadicFloat<INT_SIZE> FIVE_EXP_MINUS_NINE( 246 Sign::POS, -276, FIVE_EXP_MINUS_NINE_MANT); 247 248 if (i > 0) { 249 fputil::DyadicFloat<INT_SIZE> fives = 250 fputil::pow_n(FIVE_EXP_MINUS_NINE, static_cast<uint32_t>(i)); 251 num = fives; 252 } 253 num = mul_pow_2(num, shift_amount); 254 255 // Adding one is part of the formula. 256 UInt<INT_SIZE> int_num = num.as_mantissa_type() + 1; 257 if (int_num > MOD_SIZE) { 258 auto rem = 259 int_num 260 .div_uint_half_times_pow_2( 261 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) 262 .value(); 263 int_num = rem; 264 } 265 266 UInt<MID_INT_SIZE> result = int_num; 267 268 return result; 269 } 270 271 // The formula for the table when i is negative (or zero) is as follows: 272 // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0) 273 // Since we know i is always negative, we just take it as unsigned and treat it 274 // as negative. We do the same with exponent, while they're both always negative 275 // in theory, in practice they're converted to positive for simpler 276 // calculations. 277 // The formula being used looks more like this: 278 // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0) 279 template <size_t INT_SIZE> 280 LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i) { 281 int shift_amount = CALC_SHIFT_CONST - exponent; 282 UInt<INT_SIZE> num(1); 283 constexpr UInt<INT_SIZE> MOD_SIZE = 284 (UInt<INT_SIZE>(EXP10_9) 285 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); 286 287 size_t ten_blocks = i; 288 size_t five_blocks = 0; 289 if (shift_amount < 0) { 290 int block_shifts = (-shift_amount) / BLOCK_SIZE; 291 if (block_shifts < static_cast<int>(ten_blocks)) { 292 ten_blocks = ten_blocks - block_shifts; 293 five_blocks = block_shifts; 294 shift_amount = shift_amount + (block_shifts * BLOCK_SIZE); 295 } else { 296 ten_blocks = 0; 297 five_blocks = i; 298 shift_amount = shift_amount + (static_cast<int>(i) * BLOCK_SIZE); 299 } 300 } 301 302 if (five_blocks > 0) { 303 UInt<INT_SIZE> fives(EXP5_9); 304 fives.pow_n(five_blocks); 305 num = fives; 306 } 307 if (ten_blocks > 0) { 308 UInt<INT_SIZE> tens(EXP10_9); 309 tens.pow_n(ten_blocks); 310 if (five_blocks <= 0) { 311 num = tens; 312 } else { 313 num *= tens; 314 } 315 } 316 317 if (shift_amount > 0) { 318 num = num << shift_amount; 319 } else { 320 num = num >> (-shift_amount); 321 } 322 if (num > MOD_SIZE) { 323 auto rem = num.div_uint_half_times_pow_2( 324 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) 325 .value(); 326 num = rem; 327 } 328 return num; 329 } 330 331 template <size_t INT_SIZE> 332 LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative_df(int exponent, size_t i) { 333 static_assert(INT_SIZE == 256, 334 "Only 256 is supported as an int size right now."); 335 // This version uses dyadic floats with 256 bit mantissas to perform the same 336 // calculation as above. Due to floating point imprecision it is only accurate 337 // for the first 50 digits, but it's much faster. Since even 128 bit long 338 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are 339 // enough for these floats to be converted back and forth safely. This is 340 // ideal for avoiding the size of the long double table. 341 342 int shift_amount = CALC_SHIFT_CONST - exponent; 343 344 fputil::DyadicFloat<INT_SIZE> num(Sign::POS, 0, 1); 345 constexpr UInt<INT_SIZE> MOD_SIZE = 346 (UInt<INT_SIZE>(EXP10_9) 347 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); 348 349 constexpr UInt<INT_SIZE> TEN_EXP_NINE_MANT(EXP10_9); 350 351 static const fputil::DyadicFloat<INT_SIZE> TEN_EXP_NINE(Sign::POS, 0, 352 TEN_EXP_NINE_MANT); 353 354 if (i > 0) { 355 fputil::DyadicFloat<INT_SIZE> tens = 356 fputil::pow_n(TEN_EXP_NINE, static_cast<uint32_t>(i)); 357 num = tens; 358 } 359 num = mul_pow_2(num, shift_amount); 360 361 UInt<INT_SIZE> int_num = num.as_mantissa_type(); 362 if (int_num > MOD_SIZE) { 363 auto rem = 364 int_num 365 .div_uint_half_times_pow_2( 366 EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) 367 .value(); 368 int_num = rem; 369 } 370 371 UInt<MID_INT_SIZE> result = int_num; 372 373 return result; 374 } 375 376 LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa, 377 const UInt<MID_INT_SIZE> &large, 378 const int32_t shift_amount) { 379 // make sure the number of bits is always divisible by 64 380 UInt<internal::div_ceil(MID_INT_SIZE + FPBits::STORAGE_LEN, 64) * 64> val( 381 large); 382 val = (val * mantissa) >> shift_amount; 383 return static_cast<uint32_t>( 384 val.div_uint_half_times_pow_2(static_cast<uint32_t>(EXP10_9), 0).value()); 385 } 386 387 } // namespace internal 388 389 // Convert floating point values to their string representation. 390 // Because the result may not fit in a reasonably sized array, the caller must 391 // request blocks of digits and convert them from integers to strings themself. 392 // Blocks contain the most digits that can be stored in an BlockInt. This is 9 393 // digits for a 32 bit int and 18 digits for a 64 bit int. 394 // The intended use pattern is to create a FloatToString object of the 395 // appropriate type, then call get_positive_blocks to get an approximate number 396 // of blocks there are before the decimal point. Now the client code can start 397 // calling get_positive_block in a loop from the number of positive blocks to 398 // zero. This will give all digits before the decimal point. Then the user can 399 // start calling get_negative_block in a loop from 0 until the number of digits 400 // they need is reached. As an optimization, the client can use 401 // zero_blocks_after_point to find the number of blocks that are guaranteed to 402 // be zero after the decimal point and before the non-zero digits. Additionally, 403 // is_lowest_block will return if the current block is the lowest non-zero 404 // block. 405 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0> 406 class FloatToString { 407 fputil::FPBits<T> float_bits; 408 int exponent; 409 FPBits::StorageType mantissa; 410 411 static constexpr int FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN; 412 static constexpr int EXP_BIAS = fputil::FPBits<T>::EXP_BIAS; 413 414 public: 415 LIBC_INLINE constexpr FloatToString(T init_float) : float_bits(init_float) { 416 exponent = float_bits.get_explicit_exponent(); 417 mantissa = float_bits.get_explicit_mantissa(); 418 419 // Adjust for the width of the mantissa. 420 exponent -= FRACTION_LEN; 421 } 422 423 LIBC_INLINE constexpr bool is_nan() { return float_bits.is_nan(); } 424 LIBC_INLINE constexpr bool is_inf() { return float_bits.is_inf(); } 425 LIBC_INLINE constexpr bool is_inf_or_nan() { 426 return float_bits.is_inf_or_nan(); 427 } 428 429 // get_block returns an integer that represents the digits in the requested 430 // block. 431 LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) { 432 if (exponent >= -FRACTION_LEN) { 433 // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to 434 // find the coarse section of the POW10_SPLIT table that will be used to 435 // calculate the 9 digit window, as well as some other related values. 436 const uint32_t idx = 437 exponent < 0 438 ? 0 439 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; 440 441 // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - 442 // exponent 443 444 const uint32_t pos_exp = idx * IDX_SIZE; 445 446 UInt<MID_INT_SIZE> val; 447 448 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) 449 // ----------------------- DYADIC FLOAT CALC MODE ------------------------ 450 const int32_t SHIFT_CONST = CALC_SHIFT_CONST; 451 val = internal::get_table_positive_df<256>(IDX_SIZE * idx, block_index); 452 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) 453 454 // ---------------------------- INT CALC MODE ---------------------------- 455 const int32_t SHIFT_CONST = CALC_SHIFT_CONST; 456 const uint64_t MAX_POW_2_SIZE = 457 pos_exp + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index); 458 const uint64_t MAX_POW_5_SIZE = 459 internal::log2_pow5(BLOCK_SIZE * block_index); 460 const uint64_t MAX_INT_SIZE = 461 (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE; 462 463 if (MAX_INT_SIZE < 1024) { 464 val = internal::get_table_positive<1024>(pos_exp, block_index); 465 } else if (MAX_INT_SIZE < 2048) { 466 val = internal::get_table_positive<2048>(pos_exp, block_index); 467 } else if (MAX_INT_SIZE < 4096) { 468 val = internal::get_table_positive<4096>(pos_exp, block_index); 469 } else if (MAX_INT_SIZE < 8192) { 470 val = internal::get_table_positive<8192>(pos_exp, block_index); 471 } else if (MAX_INT_SIZE < 16384) { 472 val = internal::get_table_positive<16384>(pos_exp, block_index); 473 } else { 474 val = internal::get_table_positive<16384 + 128>(pos_exp, block_index); 475 } 476 #else 477 // ----------------------------- TABLE MODE ------------------------------ 478 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; 479 480 val = POW10_SPLIT[POW10_OFFSET[idx] + block_index]; 481 #endif 482 const uint32_t shift_amount = SHIFT_CONST + pos_exp - exponent; 483 484 const BlockInt digits = 485 internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount)); 486 return digits; 487 } else { 488 return 0; 489 } 490 } 491 492 LIBC_INLINE constexpr BlockInt get_negative_block(int block_index) { 493 if (exponent < 0) { 494 const int32_t idx = -exponent / IDX_SIZE; 495 496 UInt<MID_INT_SIZE> val; 497 498 const uint32_t pos_exp = static_cast<uint32_t>(idx * IDX_SIZE); 499 500 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) 501 // ----------------------- DYADIC FLOAT CALC MODE ------------------------ 502 const int32_t SHIFT_CONST = CALC_SHIFT_CONST; 503 val = internal::get_table_negative_df<256>(pos_exp, block_index + 1); 504 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) 505 // ---------------------------- INT CALC MODE ---------------------------- 506 const int32_t SHIFT_CONST = CALC_SHIFT_CONST; 507 508 const uint64_t NUM_FIVES = (block_index + 1) * BLOCK_SIZE; 509 // Round MAX_INT_SIZE up to the nearest 64 (adding 1 because log2_pow5 510 // implicitly rounds down). 511 const uint64_t MAX_INT_SIZE = 512 ((internal::log2_pow5(NUM_FIVES) / 64) + 1) * 64; 513 514 if (MAX_INT_SIZE < 1024) { 515 val = internal::get_table_negative<1024>(pos_exp, block_index + 1); 516 } else if (MAX_INT_SIZE < 2048) { 517 val = internal::get_table_negative<2048>(pos_exp, block_index + 1); 518 } else if (MAX_INT_SIZE < 4096) { 519 val = internal::get_table_negative<4096>(pos_exp, block_index + 1); 520 } else if (MAX_INT_SIZE < 8192) { 521 val = internal::get_table_negative<8192>(pos_exp, block_index + 1); 522 } else if (MAX_INT_SIZE < 16384) { 523 val = internal::get_table_negative<16384>(pos_exp, block_index + 1); 524 } else { 525 val = internal::get_table_negative<16384 + 8192>(pos_exp, 526 block_index + 1); 527 } 528 #else 529 // ----------------------------- TABLE MODE ------------------------------ 530 // if the requested block is zero 531 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; 532 if (block_index < MIN_BLOCK_2[idx]) { 533 return 0; 534 } 535 const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx]; 536 // If every digit after the requested block is zero. 537 if (p >= POW10_OFFSET_2[idx + 1]) { 538 return 0; 539 } 540 541 val = POW10_SPLIT_2[p]; 542 #endif 543 const int32_t shift_amount = 544 SHIFT_CONST + (-exponent - static_cast<int32_t>(pos_exp)); 545 BlockInt digits = 546 internal::mul_shift_mod_1e9(mantissa, val, shift_amount); 547 return digits; 548 } else { 549 return 0; 550 } 551 } 552 553 LIBC_INLINE constexpr BlockInt get_block(int block_index) { 554 if (block_index >= 0) { 555 return get_positive_block(block_index); 556 } else { 557 return get_negative_block(-1 - block_index); 558 } 559 } 560 561 LIBC_INLINE constexpr size_t get_positive_blocks() { 562 if (exponent < -FRACTION_LEN) 563 return 0; 564 const uint32_t idx = 565 exponent < 0 566 ? 0 567 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; 568 return internal::length_for_num(idx * IDX_SIZE, FRACTION_LEN); 569 } 570 571 // This takes the index of a block after the decimal point (a negative block) 572 // and return if it's sure that all of the digits after it are zero. 573 LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) { 574 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE 575 // The decimal representation of 2**(-i) will have exactly i digits after 576 // the decimal point. 577 int num_requested_digits = 578 static_cast<int>((negative_block_index + 1) * BLOCK_SIZE); 579 580 return num_requested_digits > -exponent; 581 #else 582 const int32_t idx = -exponent / IDX_SIZE; 583 const size_t p = 584 POW10_OFFSET_2[idx] + negative_block_index - MIN_BLOCK_2[idx]; 585 // If the remaining digits are all 0, then this is the lowest block. 586 return p >= POW10_OFFSET_2[idx + 1]; 587 #endif 588 } 589 590 LIBC_INLINE constexpr size_t zero_blocks_after_point() { 591 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE 592 if (exponent < -FRACTION_LEN) { 593 const int pos_exp = -exponent - 1; 594 const uint32_t pos_idx = 595 static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE; 596 const int32_t pos_len = ((internal::ceil_log10_pow2(pos_idx * IDX_SIZE) - 597 internal::ceil_log10_pow2(FRACTION_LEN + 1)) / 598 BLOCK_SIZE) - 599 1; 600 return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0); 601 } 602 return 0; 603 #else 604 return MIN_BLOCK_2[-exponent / IDX_SIZE]; 605 #endif 606 } 607 }; 608 609 #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) && \ 610 !defined(LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD) 611 // --------------------------- LONG DOUBLE FUNCTIONS --------------------------- 612 613 // this algorithm will work exactly the same for 80 bit and 128 bit long 614 // doubles. They have the same max exponent, but even if they didn't the 615 // constants should be calculated to be correct for any provided floating point 616 // type. 617 618 template <> class FloatToString<long double> { 619 fputil::FPBits<long double> float_bits; 620 bool is_negative = 0; 621 int exponent = 0; 622 FPBits::StorageType mantissa = 0; 623 624 static constexpr int FRACTION_LEN = fputil::FPBits<long double>::FRACTION_LEN; 625 static constexpr int EXP_BIAS = fputil::FPBits<long double>::EXP_BIAS; 626 static constexpr size_t UINT_WORD_SIZE = 64; 627 628 static constexpr size_t FLOAT_AS_INT_WIDTH = 629 internal::div_ceil(fputil::FPBits<long double>::MAX_BIASED_EXPONENT - 630 FPBits::EXP_BIAS, 631 UINT_WORD_SIZE) * 632 UINT_WORD_SIZE; 633 static constexpr size_t EXTRA_INT_WIDTH = 634 internal::div_ceil(sizeof(long double) * CHAR_BIT, UINT_WORD_SIZE) * 635 UINT_WORD_SIZE; 636 637 using wide_int = UInt<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>; 638 639 // float_as_fixed represents the floating point number as a fixed point number 640 // with the point EXTRA_INT_WIDTH bits from the left of the number. This can 641 // store any number with a negative exponent. 642 wide_int float_as_fixed = 0; 643 int int_block_index = 0; 644 645 static constexpr size_t BLOCK_BUFFER_LEN = 646 internal::div_ceil(internal::log10_pow2(FLOAT_AS_INT_WIDTH), BLOCK_SIZE) + 647 1; 648 BlockInt block_buffer[BLOCK_BUFFER_LEN] = {0}; 649 size_t block_buffer_valid = 0; 650 651 template <size_t Bits> 652 LIBC_INLINE static constexpr BlockInt grab_digits(UInt<Bits> &int_num) { 653 auto wide_result = int_num.div_uint_half_times_pow_2(EXP5_9, 9); 654 // the optional only comes into effect when dividing by 0, which will 655 // never happen here. Thus, we just assert that it has value. 656 LIBC_ASSERT(wide_result.has_value()); 657 return static_cast<BlockInt>(wide_result.value()); 658 } 659 660 LIBC_INLINE static constexpr void zero_leading_digits(wide_int &int_num) { 661 // WORD_SIZE is the width of the numbers used to internally represent the 662 // UInt 663 for (size_t i = 0; i < EXTRA_INT_WIDTH / wide_int::WORD_SIZE; ++i) 664 int_num[i + (FLOAT_AS_INT_WIDTH / wide_int::WORD_SIZE)] = 0; 665 } 666 667 // init_convert initializes float_as_int, cur_block, and block_buffer based on 668 // the mantissa and exponent of the initial number. Calling it will always 669 // return the class to the starting state. 670 LIBC_INLINE constexpr void init_convert() { 671 // No calculation necessary for the 0 case. 672 if (mantissa == 0 && exponent == 0) 673 return; 674 675 if (exponent > 0) { 676 // if the exponent is positive, then the number is fully above the decimal 677 // point. In this case we represent the float as an integer, then divide 678 // by 10^BLOCK_SIZE and take the remainder as our next block. This 679 // generates the digits from right to left, but the digits will be written 680 // from left to right, so it caches the results so they can be read in 681 // reverse order. 682 683 wide_int float_as_int = mantissa; 684 685 float_as_int <<= exponent; 686 int_block_index = 0; 687 688 while (float_as_int > 0) { 689 LIBC_ASSERT(int_block_index < static_cast<int>(BLOCK_BUFFER_LEN)); 690 block_buffer[int_block_index] = 691 grab_digits<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>(float_as_int); 692 ++int_block_index; 693 } 694 block_buffer_valid = int_block_index; 695 696 } else { 697 // if the exponent is not positive, then the number is at least partially 698 // below the decimal point. In this case we represent the float as a fixed 699 // point number with the decimal point after the top EXTRA_INT_WIDTH bits. 700 float_as_fixed = mantissa; 701 702 const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent; 703 static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8); 704 float_as_fixed <<= SHIFT_AMOUNT; 705 706 // If there are still digits above the decimal point, handle those. 707 if (cpp::countl_zero(float_as_fixed) < 708 static_cast<int>(EXTRA_INT_WIDTH)) { 709 UInt<EXTRA_INT_WIDTH> above_decimal_point = 710 float_as_fixed >> FLOAT_AS_INT_WIDTH; 711 712 size_t positive_int_block_index = 0; 713 while (above_decimal_point > 0) { 714 block_buffer[positive_int_block_index] = 715 grab_digits<EXTRA_INT_WIDTH>(above_decimal_point); 716 ++positive_int_block_index; 717 } 718 block_buffer_valid = positive_int_block_index; 719 720 // Zero all digits above the decimal point. 721 zero_leading_digits(float_as_fixed); 722 int_block_index = 0; 723 } 724 } 725 } 726 727 public: 728 LIBC_INLINE constexpr FloatToString(long double init_float) 729 : float_bits(init_float) { 730 is_negative = float_bits.is_neg(); 731 exponent = float_bits.get_explicit_exponent(); 732 mantissa = float_bits.get_explicit_mantissa(); 733 734 // Adjust for the width of the mantissa. 735 exponent -= FRACTION_LEN; 736 737 this->init_convert(); 738 } 739 740 LIBC_INLINE constexpr size_t get_positive_blocks() { 741 if (exponent < -FRACTION_LEN) 742 return 0; 743 744 const uint32_t idx = 745 exponent < 0 746 ? 0 747 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; 748 return internal::length_for_num(idx * IDX_SIZE, FRACTION_LEN); 749 } 750 751 LIBC_INLINE constexpr size_t zero_blocks_after_point() { 752 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE 753 return MIN_BLOCK_2[-exponent / IDX_SIZE]; 754 #else 755 if (exponent >= -FRACTION_LEN) 756 return 0; 757 758 const int pos_exp = -exponent - 1; 759 const uint32_t pos_idx = 760 static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE; 761 const int32_t pos_len = ((internal::ceil_log10_pow2(pos_idx * IDX_SIZE) - 762 internal::ceil_log10_pow2(FRACTION_LEN + 1)) / 763 BLOCK_SIZE) - 764 1; 765 return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0); 766 #endif 767 } 768 769 LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) { 770 // The decimal representation of 2**(-i) will have exactly i digits after 771 // the decimal point. 772 const int num_requested_digits = 773 static_cast<int>((negative_block_index + 1) * BLOCK_SIZE); 774 775 return num_requested_digits > -exponent; 776 } 777 778 LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) { 779 if (exponent < -FRACTION_LEN) 780 return 0; 781 if (block_index > static_cast<int>(block_buffer_valid) || block_index < 0) 782 return 0; 783 784 LIBC_ASSERT(block_index < static_cast<int>(BLOCK_BUFFER_LEN)); 785 786 return block_buffer[block_index]; 787 } 788 789 LIBC_INLINE constexpr BlockInt get_negative_block(int negative_block_index) { 790 if (exponent >= 0) 791 return 0; 792 793 // negative_block_index starts at 0 with the first block after the decimal 794 // point, and 1 with the second and so on. This converts to the same 795 // block_index used everywhere else. 796 797 const int block_index = -1 - negative_block_index; 798 799 // If we're currently after the requested block (remember these are 800 // negative indices) we reset the number to the start. This is only 801 // likely to happen in %g calls. This will also reset int_block_index. 802 // if (block_index > int_block_index) { 803 // init_convert(); 804 // } 805 806 // Printf is the only existing user of this code and it will only ever move 807 // downwards, except for %g but that currently creates a second 808 // float_to_string object so this assertion still holds. If a new user needs 809 // the ability to step backwards, uncomment the code above. 810 LIBC_ASSERT(block_index <= int_block_index); 811 812 // If we are currently before the requested block. Step until we reach the 813 // requested block. This is likely to only be one step. 814 while (block_index < int_block_index) { 815 zero_leading_digits(float_as_fixed); 816 float_as_fixed.mul(EXP10_9); 817 --int_block_index; 818 } 819 820 // We're now on the requested block, return the current block. 821 return static_cast<BlockInt>(float_as_fixed >> FLOAT_AS_INT_WIDTH); 822 } 823 824 LIBC_INLINE constexpr BlockInt get_block(int block_index) { 825 if (block_index >= 0) 826 return get_positive_block(block_index); 827 828 return get_negative_block(-1 - block_index); 829 } 830 }; 831 832 #endif // !LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64 && 833 // !LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD 834 835 } // namespace LIBC_NAMESPACE_DECL 836 837 #endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H 838