1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template <int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact, const char *end) { 23 SetToZero(); 24 if (end && p >= end) { 25 return false; 26 } 27 // Skip leading spaces 28 for (; p != end && *p == ' '; ++p) { 29 } 30 if (p == end) { 31 return false; 32 } 33 const char *q{p}; 34 isNegative_ = *q == '-'; 35 if (*q == '-' || *q == '+') { 36 ++q; 37 } 38 const char *start{q}; 39 for (; q != end && *q == '0'; ++q) { 40 } 41 const char *firstDigit{q}; 42 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 43 } 44 const char *point{nullptr}; 45 if (q != end && *q == '.') { 46 point = q; 47 for (++q; q != end && *q >= '0' && *q <= '9'; ++q) { 48 } 49 } 50 if (q == start || (q == start + 1 && start == point)) { 51 return false; // require at least one digit 52 } 53 // There's a valid number here; set the reference argument to point to 54 // the first character afterward, which might be an exponent part. 55 p = q; 56 // Strip off trailing zeroes 57 if (point) { 58 while (q[-1] == '0') { 59 --q; 60 } 61 if (q[-1] == '.') { 62 point = nullptr; 63 --q; 64 } 65 } 66 if (!point) { 67 while (q > firstDigit && q[-1] == '0') { 68 --q; 69 ++exponent_; 70 } 71 } 72 // Trim any excess digits 73 const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; 74 if (q > limit) { 75 inexact = true; 76 if (point >= limit) { 77 q = point; 78 point = nullptr; 79 } 80 if (!point) { 81 exponent_ += q - limit; 82 } 83 q = limit; 84 } 85 if (point) { 86 exponent_ -= static_cast<int>(q - point - 1); 87 } 88 if (q == firstDigit) { 89 exponent_ = 0; // all zeros 90 } 91 // Rack the decimal digits up into big Digits. 92 for (auto times{radix}; q-- > firstDigit;) { 93 if (*q != '.') { 94 if (times == radix) { 95 digit_[digits_++] = *q - '0'; 96 times = 10; 97 } else { 98 digit_[digits_ - 1] += times * (*q - '0'); 99 times *= 10; 100 } 101 } 102 } 103 // Look for an optional exponent field. 104 if (p == end) { 105 return true; 106 } 107 q = p; 108 switch (*q) { 109 case 'e': 110 case 'E': 111 case 'd': 112 case 'D': 113 case 'q': 114 case 'Q': { 115 if (++q == end) { 116 break; 117 } 118 bool negExpo{*q == '-'}; 119 if (*q == '-' || *q == '+') { 120 ++q; 121 } 122 if (q != end && *q >= '0' && *q <= '9') { 123 int expo{0}; 124 for (; q != end && *q == '0'; ++q) { 125 } 126 const char *expDig{q}; 127 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 128 expo = 10 * expo + *q - '0'; 129 } 130 if (q >= expDig + 8) { 131 // There's a ridiculous number of nonzero exponent digits. 132 // The decimal->binary conversion routine will cope with 133 // returning 0 or Inf, but we must ensure that "expo" didn't 134 // overflow back around to something legal. 135 expo = 10 * Real::decimalRange; 136 exponent_ = 0; 137 } 138 p = q; // exponent is valid; advance the termination pointer 139 if (negExpo) { 140 exponent_ -= expo; 141 } else { 142 exponent_ += expo; 143 } 144 } 145 } break; 146 default: 147 break; 148 } 149 return true; 150 } 151 152 template <int PREC, int LOG10RADIX> 153 void BigRadixFloatingPointNumber<PREC, 154 LOG10RADIX>::LoseLeastSignificantDigit() { 155 Digit LSD{digit_[0]}; 156 for (int j{0}; j < digits_ - 1; ++j) { 157 digit_[j] = digit_[j + 1]; 158 } 159 digit_[digits_ - 1] = 0; 160 bool incr{false}; 161 switch (rounding_) { 162 case RoundNearest: 163 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); 164 break; 165 case RoundUp: 166 incr = LSD > 0 && !isNegative_; 167 break; 168 case RoundDown: 169 incr = LSD > 0 && isNegative_; 170 break; 171 case RoundToZero: 172 break; 173 case RoundCompatible: 174 incr = LSD >= radix / 2; 175 break; 176 } 177 for (int j{0}; (digit_[j] += incr) == radix; ++j) { 178 digit_[j] = 0; 179 } 180 } 181 182 // This local utility class represents an unrounded nonnegative 183 // binary floating-point value with an unbiased (i.e., signed) 184 // binary exponent, an integer value (not a fraction) with an implied 185 // binary point to its *right*, and some guard bits for rounding. 186 template <int PREC> class IntermediateFloat { 187 public: 188 static constexpr int precision{PREC}; 189 using IntType = common::HostUnsignedIntType<precision>; 190 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 191 static constexpr IntType mask{topBit + (topBit - 1)}; 192 193 IntermediateFloat() {} 194 IntermediateFloat(const IntermediateFloat &) = default; 195 196 // Assumes that exponent_ is valid on entry, and may increment it. 197 // Returns the number of guard_ bits that have been determined. 198 template <typename UINT> bool SetTo(UINT n) { 199 static constexpr int nBits{CHAR_BIT * sizeof n}; 200 if constexpr (precision >= nBits) { 201 value_ = n; 202 guard_ = 0; 203 return 0; 204 } else { 205 int shift{common::BitsNeededFor(n) - precision}; 206 if (shift <= 0) { 207 value_ = n; 208 guard_ = 0; 209 return 0; 210 } else { 211 value_ = n >> shift; 212 exponent_ += shift; 213 n <<= nBits - shift; 214 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 215 return shift; 216 } 217 } 218 } 219 220 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 221 bool IsFull() const { return value_ >= topBit; } 222 void AdjustExponent(int by) { exponent_ += by; } 223 void SetGuard(int g) { 224 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 225 } 226 227 ConversionToBinaryResult<PREC> ToBinary( 228 bool isNegative, FortranRounding) const; 229 230 private: 231 static constexpr int guardBits{3}; // guard, round, sticky 232 using GuardType = int; 233 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 234 235 IntType value_{0}; 236 GuardType guard_{0}; 237 int exponent_{0}; 238 }; 239 240 // The standard says that these overflow cases round to "representable" 241 // numbers, and some popular compilers interpret that to mean +/-HUGE() 242 // rather than +/-Inf. 243 static inline constexpr bool RoundOverflowToHuge( 244 enum FortranRounding rounding, bool isNegative) { 245 return rounding == RoundToZero || (!isNegative && rounding == RoundDown) || 246 (isNegative && rounding == RoundUp); 247 } 248 249 template <int PREC> 250 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 251 bool isNegative, FortranRounding rounding) const { 252 using Binary = BinaryFloatingPointNumber<PREC>; 253 // Create a fraction with a binary point to the left of the integer 254 // value_, and bias the exponent. 255 IntType fraction{value_}; 256 GuardType guard{guard_}; 257 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 258 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 259 guard = (guard & 1) | (guard >> 1) | 260 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 261 fraction >>= 1; 262 ++expo; 263 } 264 int flags{Exact}; 265 if (guard != 0) { 266 flags |= Inexact; 267 } 268 if (fraction == 0) { 269 if (guard <= oneHalf) { 270 if ((!isNegative && rounding == RoundUp) || 271 (isNegative && rounding == RoundDown)) { 272 // round to least nonzero value 273 expo = 0; 274 } else { // round to zero 275 if (guard != 0) { 276 flags |= Underflow; 277 } 278 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 279 } 280 } 281 } else { 282 // The value is nonzero; normalize it. 283 while (fraction < topBit && expo > 1) { 284 --expo; 285 fraction = fraction * 2 + (guard >> (guardBits - 2)); 286 guard = 287 (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 288 } 289 } 290 // Apply rounding 291 bool incr{false}; 292 switch (rounding) { 293 case RoundNearest: 294 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 295 break; 296 case RoundUp: 297 incr = guard != 0 && !isNegative; 298 break; 299 case RoundDown: 300 incr = guard != 0 && isNegative; 301 break; 302 case RoundToZero: 303 break; 304 case RoundCompatible: 305 incr = guard >= oneHalf; 306 break; 307 } 308 if (incr) { 309 if (fraction == mask) { 310 // rounding causes a carry 311 ++expo; 312 fraction = topBit; 313 } else { 314 ++fraction; 315 } 316 } 317 if (expo == 1 && fraction < topBit) { 318 expo = 0; // subnormal 319 flags |= Underflow; 320 } else if (expo == 0) { 321 flags |= Underflow; 322 } else if (expo >= Binary::maxExponent) { 323 if (RoundOverflowToHuge(rounding, isNegative)) { 324 expo = Binary::maxExponent - 1; 325 fraction = mask; 326 } else { // Inf 327 expo = Binary::maxExponent; 328 flags |= Overflow; 329 if constexpr (Binary::bits == 80) { // x87 330 fraction = IntType{1} << 63; 331 } else { 332 fraction = 0; 333 } 334 } 335 } 336 using Raw = typename Binary::RawType; 337 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 338 raw |= static_cast<Raw>(expo) << Binary::significandBits; 339 if constexpr (Binary::isImplicitMSB) { 340 fraction &= ~topBit; 341 } 342 raw |= fraction; 343 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 344 } 345 346 template <int PREC, int LOG10RADIX> 347 ConversionToBinaryResult<PREC> 348 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 349 // On entry, *this holds a multi-precision integer value in a radix of a 350 // large power of ten. Its radix point is defined to be to the right of its 351 // digits, and "exponent_" is the power of ten by which it is to be scaled. 352 Normalize(); 353 if (digits_ == 0) { // zero value 354 return {Real{SignBit()}}; 355 } 356 // The value is not zero: x = D. * 10.**E 357 // Shift our perspective on the radix (& decimal) point so that 358 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 359 exponent_ += digits_ * log10Radix; 360 // Sanity checks for ridiculous exponents 361 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 362 if (exponent_ < -crazy) { 363 enum ConversionResultFlags flags { 364 static_cast<enum ConversionResultFlags>(Inexact | Underflow) 365 }; 366 if ((!isNegative_ && rounding_ == RoundUp) || 367 (isNegative_ && rounding_ == RoundDown)) { 368 // return least nonzero value 369 return {Real{Raw{1} | SignBit()}, flags}; 370 } else { // underflow to +/-0. 371 return {Real{SignBit()}, flags}; 372 } 373 } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf 374 if (RoundOverflowToHuge(rounding_, isNegative_)) { 375 return {Real{HUGE()}}; 376 } else { 377 return {Real{Infinity()}, Overflow}; 378 } 379 } 380 // Apply any negative decimal exponent by multiplication 381 // by a power of two, adjusting the binary exponent to compensate. 382 IntermediateFloat<PREC> f; 383 while (exponent_ < log10Radix) { 384 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 385 f.AdjustExponent(-9); 386 digitLimit_ = digits_; 387 if (int carry{MultiplyWithoutNormalization<512>()}) { 388 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 389 PushCarry(carry); 390 exponent_ += log10Radix; 391 } 392 } 393 // Apply any positive decimal exponent greater than 394 // is needed to treat the topmost digit as an integer 395 // part by multiplying by 10 or 10000 repeatedly. 396 while (exponent_ > log10Radix) { 397 digitLimit_ = digits_; 398 int carry; 399 if (exponent_ >= log10Radix + 4) { 400 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 401 exponent_ -= 4; 402 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 403 f.AdjustExponent(4); 404 } else { 405 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 406 --exponent_; 407 carry = MultiplyWithoutNormalization<5>(); 408 f.AdjustExponent(1); 409 } 410 if (carry != 0) { 411 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 412 PushCarry(carry); 413 exponent_ += log10Radix; 414 } 415 } 416 // So exponent_ is now log10Radix, meaning that the 417 // MSD can be taken as an integer part and transferred 418 // to the binary result. 419 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 420 int guardShift{f.SetTo(digit_[--digits_])}; 421 // Transfer additional bits until the result is normal. 422 digitLimit_ = digits_; 423 while (!f.IsFull()) { 424 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 425 f.AdjustExponent(-1); 426 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 427 f.ShiftIn(carry); 428 } 429 // Get the next few bits for rounding. Allow for some guard bits 430 // that may have already been set in f.SetTo() above. 431 int guard{0}; 432 if (guardShift == 0) { 433 guard = MultiplyWithoutNormalization<4>(); 434 } else if (guardShift == 1) { 435 guard = MultiplyWithoutNormalization<2>(); 436 } 437 guard = guard + guard + !IsZero(); 438 f.SetGuard(guard); 439 return f.ToBinary(isNegative_, rounding_); 440 } 441 442 template <int PREC, int LOG10RADIX> 443 ConversionToBinaryResult<PREC> 444 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary( 445 const char *&p, const char *limit) { 446 bool inexact{false}; 447 if (ParseNumber(p, inexact, limit)) { 448 auto result{ConvertToBinary()}; 449 if (inexact) { 450 result.flags = 451 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 452 } 453 return result; 454 } else { 455 // Could not parse a decimal floating-point number. p has been 456 // advanced over any leading spaces. Most Fortran compilers set 457 // the sign bit for -NaN. 458 const char *q{p}; 459 if (!limit || q < limit) { 460 isNegative_ = *q == '-'; 461 if (isNegative_ || *q == '+') { 462 ++q; 463 } 464 } 465 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'N' && 466 toupper(q[1]) == 'A' && toupper(q[2]) == 'N') { 467 // NaN 468 p = q + 3; 469 bool isQuiet{true}; 470 if ((!limit || p < limit) && *p == '(') { 471 int depth{1}; 472 do { 473 ++p; 474 if (limit && p >= limit) { 475 // Invalid input 476 return {Real{NaN(false)}, Invalid}; 477 } else if (*p == '(') { 478 ++depth; 479 } else if (*p == ')') { 480 --depth; 481 } else if (*p != ' ') { 482 // Implementation dependent, but other compilers 483 // all return quiet NaNs. 484 } 485 } while (depth > 0); 486 ++p; 487 } 488 return {Real{NaN(isQuiet)}}; 489 } else { // Inf? 490 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' && 491 toupper(q[1]) == 'N' && toupper(q[2]) == 'F') { 492 if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' && 493 toupper(q[4]) == 'N' && toupper(q[5]) == 'I' && 494 toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') { 495 p = q + 8; 496 } else { 497 p = q + 3; 498 } 499 return {Real{Infinity()}}; 500 } else { 501 // Invalid input 502 return {Real{NaN()}, Invalid}; 503 } 504 } 505 } 506 } 507 508 template <int PREC> 509 ConversionToBinaryResult<PREC> ConvertToBinary( 510 const char *&p, enum FortranRounding rounding, const char *end) { 511 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end); 512 } 513 514 template ConversionToBinaryResult<8> ConvertToBinary<8>( 515 const char *&, enum FortranRounding, const char *end); 516 template ConversionToBinaryResult<11> ConvertToBinary<11>( 517 const char *&, enum FortranRounding, const char *end); 518 template ConversionToBinaryResult<24> ConvertToBinary<24>( 519 const char *&, enum FortranRounding, const char *end); 520 template ConversionToBinaryResult<53> ConvertToBinary<53>( 521 const char *&, enum FortranRounding, const char *end); 522 template ConversionToBinaryResult<64> ConvertToBinary<64>( 523 const char *&, enum FortranRounding, const char *end); 524 template ConversionToBinaryResult<113> ConvertToBinary<113>( 525 const char *&, enum FortranRounding, const char *end); 526 527 extern "C" { 528 enum ConversionResultFlags ConvertDecimalToFloat( 529 const char **p, float *f, enum FortranRounding rounding) { 530 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 531 std::memcpy(reinterpret_cast<void *>(f), 532 reinterpret_cast<const void *>(&result.binary), sizeof *f); 533 return result.flags; 534 } 535 enum ConversionResultFlags ConvertDecimalToDouble( 536 const char **p, double *d, enum FortranRounding rounding) { 537 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 538 std::memcpy(reinterpret_cast<void *>(d), 539 reinterpret_cast<const void *>(&result.binary), sizeof *d); 540 return result.flags; 541 } 542 enum ConversionResultFlags ConvertDecimalToLongDouble( 543 const char **p, long double *ld, enum FortranRounding rounding) { 544 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 545 std::memcpy(reinterpret_cast<void *>(ld), 546 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 547 return result.flags; 548 } 549 } 550 } // namespace Fortran::decimal 551