1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template <int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact, const char *end) { 23 SetToZero(); 24 if (end && p >= end) { 25 return false; 26 } 27 // Skip leading spaces 28 for (; p != end && *p == ' '; ++p) { 29 } 30 if (p == end) { 31 return false; 32 } 33 const char *q{p}; 34 isNegative_ = *q == '-'; 35 if (*q == '-' || *q == '+') { 36 ++q; 37 } 38 const char *start{q}; 39 for (; q != end && *q == '0'; ++q) { 40 } 41 const char *firstDigit{q}; 42 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 43 } 44 const char *point{nullptr}; 45 if (q != end && *q == '.') { 46 point = q; 47 for (++q; q != end && *q >= '0' && *q <= '9'; ++q) { 48 } 49 } 50 if (q == start || (q == start + 1 && start == point)) { 51 return false; // require at least one digit 52 } 53 // There's a valid number here; set the reference argument to point to 54 // the first character afterward, which might be an exponent part. 55 p = q; 56 // Strip off trailing zeroes 57 if (point) { 58 while (q[-1] == '0') { 59 --q; 60 } 61 if (q[-1] == '.') { 62 point = nullptr; 63 --q; 64 } 65 } 66 if (!point) { 67 while (q > firstDigit && q[-1] == '0') { 68 --q; 69 ++exponent_; 70 } 71 } 72 // Trim any excess digits 73 const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; 74 if (q > limit) { 75 inexact = true; 76 if (point >= limit) { 77 q = point; 78 point = nullptr; 79 } 80 if (!point) { 81 exponent_ += q - limit; 82 } 83 q = limit; 84 } 85 if (point) { 86 exponent_ -= static_cast<int>(q - point - 1); 87 } 88 if (q == firstDigit) { 89 exponent_ = 0; // all zeros 90 } 91 // Rack the decimal digits up into big Digits. 92 for (auto times{radix}; q-- > firstDigit;) { 93 if (*q != '.') { 94 if (times == radix) { 95 digit_[digits_++] = *q - '0'; 96 times = 10; 97 } else { 98 digit_[digits_ - 1] += times * (*q - '0'); 99 times *= 10; 100 } 101 } 102 } 103 // Look for an optional exponent field. 104 if (p == end) { 105 return true; 106 } 107 q = p; 108 switch (*q) { 109 case 'e': 110 case 'E': 111 case 'd': 112 case 'D': 113 case 'q': 114 case 'Q': { 115 if (++q == end) { 116 break; 117 } 118 bool negExpo{*q == '-'}; 119 if (*q == '-' || *q == '+') { 120 ++q; 121 } 122 if (q != end && *q >= '0' && *q <= '9') { 123 int expo{0}; 124 for (; q != end && *q == '0'; ++q) { 125 } 126 const char *expDig{q}; 127 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 128 expo = 10 * expo + *q - '0'; 129 } 130 if (q >= expDig + 8) { 131 // There's a ridiculous number of nonzero exponent digits. 132 // The decimal->binary conversion routine will cope with 133 // returning 0 or Inf, but we must ensure that "expo" didn't 134 // overflow back around to something legal. 135 expo = 10 * Real::decimalRange; 136 exponent_ = 0; 137 } 138 p = q; // exponent is valid; advance the termination pointer 139 if (negExpo) { 140 exponent_ -= expo; 141 } else { 142 exponent_ += expo; 143 } 144 } 145 } break; 146 default: 147 break; 148 } 149 return true; 150 } 151 152 template <int PREC, int LOG10RADIX> 153 void BigRadixFloatingPointNumber<PREC, 154 LOG10RADIX>::LoseLeastSignificantDigit() { 155 Digit LSD{digit_[0]}; 156 for (int j{0}; j < digits_ - 1; ++j) { 157 digit_[j] = digit_[j + 1]; 158 } 159 digit_[digits_ - 1] = 0; 160 bool incr{false}; 161 switch (rounding_) { 162 case RoundNearest: 163 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); 164 break; 165 case RoundUp: 166 incr = LSD > 0 && !isNegative_; 167 break; 168 case RoundDown: 169 incr = LSD > 0 && isNegative_; 170 break; 171 case RoundToZero: 172 break; 173 case RoundCompatible: 174 incr = LSD >= radix / 2; 175 break; 176 } 177 for (int j{0}; (digit_[j] += incr) == radix; ++j) { 178 digit_[j] = 0; 179 } 180 } 181 182 // This local utility class represents an unrounded nonnegative 183 // binary floating-point value with an unbiased (i.e., signed) 184 // binary exponent, an integer value (not a fraction) with an implied 185 // binary point to its *right*, and some guard bits for rounding. 186 template <int PREC> class IntermediateFloat { 187 public: 188 static constexpr int precision{PREC}; 189 using IntType = common::HostUnsignedIntType<precision>; 190 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 191 static constexpr IntType mask{topBit + (topBit - 1)}; 192 193 IntermediateFloat() {} 194 IntermediateFloat(const IntermediateFloat &) = default; 195 196 // Assumes that exponent_ is valid on entry, and may increment it. 197 // Returns the number of guard_ bits that have been determined. 198 template <typename UINT> bool SetTo(UINT n) { 199 static constexpr int nBits{CHAR_BIT * sizeof n}; 200 if constexpr (precision >= nBits) { 201 value_ = n; 202 guard_ = 0; 203 return 0; 204 } else { 205 int shift{common::BitsNeededFor(n) - precision}; 206 if (shift <= 0) { 207 value_ = n; 208 guard_ = 0; 209 return 0; 210 } else { 211 value_ = n >> shift; 212 exponent_ += shift; 213 n <<= nBits - shift; 214 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 215 return shift; 216 } 217 } 218 } 219 220 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 221 bool IsFull() const { return value_ >= topBit; } 222 void AdjustExponent(int by) { exponent_ += by; } 223 void SetGuard(int g) { 224 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 225 } 226 227 ConversionToBinaryResult<PREC> ToBinary( 228 bool isNegative, FortranRounding) const; 229 230 private: 231 static constexpr int guardBits{3}; // guard, round, sticky 232 using GuardType = int; 233 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 234 235 IntType value_{0}; 236 GuardType guard_{0}; 237 int exponent_{0}; 238 }; 239 240 template <int PREC> 241 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 242 bool isNegative, FortranRounding rounding) const { 243 using Binary = BinaryFloatingPointNumber<PREC>; 244 // Create a fraction with a binary point to the left of the integer 245 // value_, and bias the exponent. 246 IntType fraction{value_}; 247 GuardType guard{guard_}; 248 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 249 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 250 guard = (guard & 1) | (guard >> 1) | 251 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 252 fraction >>= 1; 253 ++expo; 254 } 255 int flags{Exact}; 256 if (guard != 0) { 257 flags |= Inexact; 258 } 259 if (fraction == 0) { 260 if (guard <= oneHalf) { 261 if ((!isNegative && rounding == RoundUp) || 262 (isNegative && rounding == RoundDown)) { 263 // round to minimum nonzero value 264 } else { // round to zero 265 if (guard != 0) { 266 flags |= Underflow; 267 } 268 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 269 } 270 } 271 } else { 272 // The value is nonzero; normalize it. 273 while (fraction < topBit && expo > 1) { 274 --expo; 275 fraction = fraction * 2 + (guard >> (guardBits - 2)); 276 guard = 277 (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 278 } 279 } 280 // Apply rounding 281 bool incr{false}; 282 switch (rounding) { 283 case RoundNearest: 284 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 285 break; 286 case RoundUp: 287 incr = guard != 0 && !isNegative; 288 break; 289 case RoundDown: 290 incr = guard != 0 && isNegative; 291 break; 292 case RoundToZero: 293 break; 294 case RoundCompatible: 295 incr = guard >= oneHalf; 296 break; 297 } 298 if (incr) { 299 if (fraction == mask) { 300 // rounding causes a carry 301 ++expo; 302 fraction = topBit; 303 } else { 304 ++fraction; 305 } 306 } 307 if (expo == 1 && fraction < topBit) { 308 expo = 0; // subnormal 309 flags |= Underflow; 310 } else if (expo == 0) { 311 flags |= Underflow; 312 } else if (expo >= Binary::maxExponent) { 313 expo = Binary::maxExponent; // Inf 314 flags |= Overflow; 315 if constexpr (Binary::bits == 80) { // x87 316 fraction = IntType{1} << 63; 317 } else { 318 fraction = 0; 319 } 320 } 321 using Raw = typename Binary::RawType; 322 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 323 raw |= static_cast<Raw>(expo) << Binary::significandBits; 324 if constexpr (Binary::isImplicitMSB) { 325 fraction &= ~topBit; 326 } 327 raw |= fraction; 328 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 329 } 330 331 template <int PREC, int LOG10RADIX> 332 ConversionToBinaryResult<PREC> 333 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 334 // On entry, *this holds a multi-precision integer value in a radix of a 335 // large power of ten. Its radix point is defined to be to the right of its 336 // digits, and "exponent_" is the power of ten by which it is to be scaled. 337 Normalize(); 338 if (digits_ == 0) { // zero value 339 return {Real{SignBit()}}; 340 } 341 // The value is not zero: x = D. * 10.**E 342 // Shift our perspective on the radix (& decimal) point so that 343 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 344 exponent_ += digits_ * log10Radix; 345 // Sanity checks for ridiculous exponents 346 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 347 if (exponent_ < -crazy) { 348 enum ConversionResultFlags flags { 349 static_cast<enum ConversionResultFlags>(Inexact | Underflow) 350 }; 351 if ((!isNegative_ && rounding_ == RoundUp) || 352 (isNegative_ && rounding_ == RoundDown)) { 353 // return least nonzero value 354 return {Real{Raw{1} | SignBit()}, flags}; 355 } else { // underflow to +/-0. 356 return {Real{SignBit()}, flags}; 357 } 358 } else if (exponent_ > crazy) { // overflow to +/-Inf. 359 return {Real{Infinity()}, Overflow}; 360 } 361 // Apply any negative decimal exponent by multiplication 362 // by a power of two, adjusting the binary exponent to compensate. 363 IntermediateFloat<PREC> f; 364 while (exponent_ < log10Radix) { 365 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 366 f.AdjustExponent(-9); 367 digitLimit_ = digits_; 368 if (int carry{MultiplyWithoutNormalization<512>()}) { 369 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 370 PushCarry(carry); 371 exponent_ += log10Radix; 372 } 373 } 374 // Apply any positive decimal exponent greater than 375 // is needed to treat the topmost digit as an integer 376 // part by multiplying by 10 or 10000 repeatedly. 377 while (exponent_ > log10Radix) { 378 digitLimit_ = digits_; 379 int carry; 380 if (exponent_ >= log10Radix + 4) { 381 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 382 exponent_ -= 4; 383 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 384 f.AdjustExponent(4); 385 } else { 386 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 387 --exponent_; 388 carry = MultiplyWithoutNormalization<5>(); 389 f.AdjustExponent(1); 390 } 391 if (carry != 0) { 392 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 393 PushCarry(carry); 394 exponent_ += log10Radix; 395 } 396 } 397 // So exponent_ is now log10Radix, meaning that the 398 // MSD can be taken as an integer part and transferred 399 // to the binary result. 400 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 401 int guardShift{f.SetTo(digit_[--digits_])}; 402 // Transfer additional bits until the result is normal. 403 digitLimit_ = digits_; 404 while (!f.IsFull()) { 405 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 406 f.AdjustExponent(-1); 407 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 408 f.ShiftIn(carry); 409 } 410 // Get the next few bits for rounding. Allow for some guard bits 411 // that may have already been set in f.SetTo() above. 412 int guard{0}; 413 if (guardShift == 0) { 414 guard = MultiplyWithoutNormalization<4>(); 415 } else if (guardShift == 1) { 416 guard = MultiplyWithoutNormalization<2>(); 417 } 418 guard = guard + guard + !IsZero(); 419 f.SetGuard(guard); 420 return f.ToBinary(isNegative_, rounding_); 421 } 422 423 template <int PREC, int LOG10RADIX> 424 ConversionToBinaryResult<PREC> 425 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary( 426 const char *&p, const char *limit) { 427 bool inexact{false}; 428 if (ParseNumber(p, inexact, limit)) { 429 auto result{ConvertToBinary()}; 430 if (inexact) { 431 result.flags = 432 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 433 } 434 return result; 435 } else { 436 // Could not parse a decimal floating-point number. p has been 437 // advanced over any leading spaces. Most Fortran compilers set 438 // the sign bit for -NaN. 439 const char *q{p}; 440 if (!limit || q < limit) { 441 isNegative_ = *q == '-'; 442 if (isNegative_ || *q == '+') { 443 ++q; 444 } 445 } 446 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'N' && 447 toupper(q[1]) == 'A' && toupper(q[2]) == 'N') { 448 // NaN 449 p = q + 3; 450 bool isQuiet{true}; 451 if ((!limit || p < limit) && *p == '(') { 452 int depth{1}; 453 do { 454 ++p; 455 if (limit && p >= limit) { 456 // Invalid input 457 return {Real{NaN(false)}, Invalid}; 458 } else if (*p == '(') { 459 ++depth; 460 } else if (*p == ')') { 461 --depth; 462 } else if (*p != ' ') { 463 // Implementation dependent, but other compilers 464 // all return quiet NaNs. 465 } 466 } while (depth > 0); 467 ++p; 468 } 469 return {Real{NaN(isQuiet)}}; 470 } else { // Inf? 471 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' && 472 toupper(q[1]) == 'N' && toupper(q[2]) == 'F') { 473 if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' && 474 toupper(q[4]) == 'N' && toupper(q[5]) == 'I' && 475 toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') { 476 p = q + 8; 477 } else { 478 p = q + 3; 479 } 480 return {Real{Infinity()}}; 481 } else { 482 // Invalid input 483 return {Real{NaN()}, Invalid}; 484 } 485 } 486 } 487 } 488 489 template <int PREC> 490 ConversionToBinaryResult<PREC> ConvertToBinary( 491 const char *&p, enum FortranRounding rounding, const char *end) { 492 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end); 493 } 494 495 template ConversionToBinaryResult<8> ConvertToBinary<8>( 496 const char *&, enum FortranRounding, const char *end); 497 template ConversionToBinaryResult<11> ConvertToBinary<11>( 498 const char *&, enum FortranRounding, const char *end); 499 template ConversionToBinaryResult<24> ConvertToBinary<24>( 500 const char *&, enum FortranRounding, const char *end); 501 template ConversionToBinaryResult<53> ConvertToBinary<53>( 502 const char *&, enum FortranRounding, const char *end); 503 template ConversionToBinaryResult<64> ConvertToBinary<64>( 504 const char *&, enum FortranRounding, const char *end); 505 template ConversionToBinaryResult<113> ConvertToBinary<113>( 506 const char *&, enum FortranRounding, const char *end); 507 508 extern "C" { 509 enum ConversionResultFlags ConvertDecimalToFloat( 510 const char **p, float *f, enum FortranRounding rounding) { 511 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 512 std::memcpy(reinterpret_cast<void *>(f), 513 reinterpret_cast<const void *>(&result.binary), sizeof *f); 514 return result.flags; 515 } 516 enum ConversionResultFlags ConvertDecimalToDouble( 517 const char **p, double *d, enum FortranRounding rounding) { 518 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 519 std::memcpy(reinterpret_cast<void *>(d), 520 reinterpret_cast<const void *>(&result.binary), sizeof *d); 521 return result.flags; 522 } 523 enum ConversionResultFlags ConvertDecimalToLongDouble( 524 const char **p, long double *ld, enum FortranRounding rounding) { 525 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 526 std::memcpy(reinterpret_cast<void *>(ld), 527 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 528 return result.flags; 529 } 530 } 531 } // namespace Fortran::decimal 532