1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template <int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact, const char *end) { 23 SetToZero(); 24 if (end && p >= end) { 25 return false; 26 } 27 // Skip leading spaces 28 for (; p != end && *p == ' '; ++p) { 29 } 30 if (p == end) { 31 return false; 32 } 33 const char *q{p}; 34 isNegative_ = *q == '-'; 35 if (*q == '-' || *q == '+') { 36 ++q; 37 } 38 const char *start{q}; 39 for (; q != end && *q == '0'; ++q) { 40 } 41 const char *firstDigit{q}; 42 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 43 } 44 const char *point{nullptr}; 45 if (q != end && *q == '.') { 46 point = q; 47 for (++q; q != end && *q >= '0' && *q <= '9'; ++q) { 48 } 49 } 50 if (q == start || (q == start + 1 && start == point)) { 51 return false; // require at least one digit 52 } 53 // There's a valid number here; set the reference argument to point to 54 // the first character afterward, which might be an exponent part. 55 p = q; 56 // Strip off trailing zeroes 57 if (point) { 58 while (q[-1] == '0') { 59 --q; 60 } 61 if (q[-1] == '.') { 62 point = nullptr; 63 --q; 64 } 65 } 66 if (!point) { 67 while (q > firstDigit && q[-1] == '0') { 68 --q; 69 ++exponent_; 70 } 71 } 72 // Trim any excess digits 73 const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; 74 if (q > limit) { 75 inexact = true; 76 if (point >= limit) { 77 q = point; 78 point = nullptr; 79 } 80 if (!point) { 81 exponent_ += q - limit; 82 } 83 q = limit; 84 } 85 if (point) { 86 exponent_ -= static_cast<int>(q - point - 1); 87 } 88 if (q == firstDigit) { 89 exponent_ = 0; // all zeros 90 } 91 // Rack the decimal digits up into big Digits. 92 for (auto times{radix}; q-- > firstDigit;) { 93 if (*q != '.') { 94 if (times == radix) { 95 digit_[digits_++] = *q - '0'; 96 times = 10; 97 } else { 98 digit_[digits_ - 1] += times * (*q - '0'); 99 times *= 10; 100 } 101 } 102 } 103 // Look for an optional exponent field. 104 if (p == end) { 105 return true; 106 } 107 q = p; 108 switch (*q) { 109 case 'e': 110 case 'E': 111 case 'd': 112 case 'D': 113 case 'q': 114 case 'Q': { 115 if (++q == end) { 116 break; 117 } 118 bool negExpo{*q == '-'}; 119 if (*q == '-' || *q == '+') { 120 ++q; 121 } 122 if (q != end && *q >= '0' && *q <= '9') { 123 int expo{0}; 124 for (; q != end && *q == '0'; ++q) { 125 } 126 const char *expDig{q}; 127 for (; q != end && *q >= '0' && *q <= '9'; ++q) { 128 expo = 10 * expo + *q - '0'; 129 } 130 if (q >= expDig + 8) { 131 // There's a ridiculous number of nonzero exponent digits. 132 // The decimal->binary conversion routine will cope with 133 // returning 0 or Inf, but we must ensure that "expo" didn't 134 // overflow back around to something legal. 135 expo = 10 * Real::decimalRange; 136 exponent_ = 0; 137 } 138 p = q; // exponent is valid; advance the termination pointer 139 if (negExpo) { 140 exponent_ -= expo; 141 } else { 142 exponent_ += expo; 143 } 144 } 145 } break; 146 default: 147 break; 148 } 149 return true; 150 } 151 152 template <int PREC, int LOG10RADIX> 153 void BigRadixFloatingPointNumber<PREC, 154 LOG10RADIX>::LoseLeastSignificantDigit() { 155 Digit LSD{digit_[0]}; 156 for (int j{0}; j < digits_ - 1; ++j) { 157 digit_[j] = digit_[j + 1]; 158 } 159 digit_[digits_ - 1] = 0; 160 bool incr{false}; 161 switch (rounding_) { 162 case RoundNearest: 163 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); 164 break; 165 case RoundUp: 166 incr = LSD > 0 && !isNegative_; 167 break; 168 case RoundDown: 169 incr = LSD > 0 && isNegative_; 170 break; 171 case RoundToZero: 172 break; 173 case RoundCompatible: 174 incr = LSD >= radix / 2; 175 break; 176 } 177 for (int j{0}; (digit_[j] += incr) == radix; ++j) { 178 digit_[j] = 0; 179 } 180 } 181 182 // This local utility class represents an unrounded nonnegative 183 // binary floating-point value with an unbiased (i.e., signed) 184 // binary exponent, an integer value (not a fraction) with an implied 185 // binary point to its *right*, and some guard bits for rounding. 186 template <int PREC> class IntermediateFloat { 187 public: 188 static constexpr int precision{PREC}; 189 using IntType = common::HostUnsignedIntType<precision>; 190 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 191 static constexpr IntType mask{topBit + (topBit - 1)}; 192 193 IntermediateFloat() {} 194 IntermediateFloat(const IntermediateFloat &) = default; 195 196 // Assumes that exponent_ is valid on entry, and may increment it. 197 // Returns the number of guard_ bits that have been determined. 198 template <typename UINT> bool SetTo(UINT n) { 199 static constexpr int nBits{CHAR_BIT * sizeof n}; 200 if constexpr (precision >= nBits) { 201 value_ = n; 202 guard_ = 0; 203 return 0; 204 } else { 205 int shift{common::BitsNeededFor(n) - precision}; 206 if (shift <= 0) { 207 value_ = n; 208 guard_ = 0; 209 return 0; 210 } else { 211 value_ = n >> shift; 212 exponent_ += shift; 213 n <<= nBits - shift; 214 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 215 return shift; 216 } 217 } 218 } 219 220 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 221 bool IsFull() const { return value_ >= topBit; } 222 void AdjustExponent(int by) { exponent_ += by; } 223 void SetGuard(int g) { 224 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 225 } 226 227 ConversionToBinaryResult<PREC> ToBinary( 228 bool isNegative, FortranRounding) const; 229 230 private: 231 static constexpr int guardBits{3}; // guard, round, sticky 232 using GuardType = int; 233 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 234 235 IntType value_{0}; 236 GuardType guard_{0}; 237 int exponent_{0}; 238 }; 239 240 template <int PREC> 241 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 242 bool isNegative, FortranRounding rounding) const { 243 using Binary = BinaryFloatingPointNumber<PREC>; 244 // Create a fraction with a binary point to the left of the integer 245 // value_, and bias the exponent. 246 IntType fraction{value_}; 247 GuardType guard{guard_}; 248 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 249 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 250 guard = (guard & 1) | (guard >> 1) | 251 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 252 fraction >>= 1; 253 ++expo; 254 } 255 int flags{Exact}; 256 if (guard != 0) { 257 flags |= Inexact; 258 } 259 if (fraction == 0 && guard <= oneHalf) { 260 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 261 } 262 // The value is nonzero; normalize it. 263 while (fraction < topBit && expo > 1) { 264 --expo; 265 fraction = fraction * 2 + (guard >> (guardBits - 2)); 266 guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 267 } 268 // Apply rounding 269 bool incr{false}; 270 switch (rounding) { 271 case RoundNearest: 272 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 273 break; 274 case RoundUp: 275 incr = guard != 0 && !isNegative; 276 break; 277 case RoundDown: 278 incr = guard != 0 && isNegative; 279 break; 280 case RoundToZero: 281 break; 282 case RoundCompatible: 283 incr = guard >= oneHalf; 284 break; 285 } 286 if (incr) { 287 if (fraction == mask) { 288 // rounding causes a carry 289 ++expo; 290 fraction = topBit; 291 } else { 292 ++fraction; 293 } 294 } 295 if (expo == 1 && fraction < topBit) { 296 expo = 0; // subnormal 297 } 298 if (expo >= Binary::maxExponent) { 299 expo = Binary::maxExponent; // Inf 300 flags |= Overflow; 301 if constexpr (Binary::bits == 80) { // x87 302 fraction = IntType{1} << 63; 303 } else { 304 fraction = 0; 305 } 306 } 307 using Raw = typename Binary::RawType; 308 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 309 raw |= static_cast<Raw>(expo) << Binary::significandBits; 310 if constexpr (Binary::isImplicitMSB) { 311 fraction &= ~topBit; 312 } 313 raw |= fraction; 314 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 315 } 316 317 template <int PREC, int LOG10RADIX> 318 ConversionToBinaryResult<PREC> 319 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 320 // On entry, *this holds a multi-precision integer value in a radix of a 321 // large power of ten. Its radix point is defined to be to the right of its 322 // digits, and "exponent_" is the power of ten by which it is to be scaled. 323 Normalize(); 324 if (digits_ == 0) { // zero value 325 return {Real{SignBit()}}; 326 } 327 // The value is not zero: x = D. * 10.**E 328 // Shift our perspective on the radix (& decimal) point so that 329 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 330 exponent_ += digits_ * log10Radix; 331 // Sanity checks for ridiculous exponents 332 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 333 if (exponent_ < -crazy) { // underflow to +/-0. 334 return {Real{SignBit()}, Inexact}; 335 } else if (exponent_ > crazy) { // overflow to +/-Inf. 336 return {Real{Infinity()}, Overflow}; 337 } 338 // Apply any negative decimal exponent by multiplication 339 // by a power of two, adjusting the binary exponent to compensate. 340 IntermediateFloat<PREC> f; 341 while (exponent_ < log10Radix) { 342 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 343 f.AdjustExponent(-9); 344 digitLimit_ = digits_; 345 if (int carry{MultiplyWithoutNormalization<512>()}) { 346 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 347 PushCarry(carry); 348 exponent_ += log10Radix; 349 } 350 } 351 // Apply any positive decimal exponent greater than 352 // is needed to treat the topmost digit as an integer 353 // part by multiplying by 10 or 10000 repeatedly. 354 while (exponent_ > log10Radix) { 355 digitLimit_ = digits_; 356 int carry; 357 if (exponent_ >= log10Radix + 4) { 358 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 359 exponent_ -= 4; 360 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 361 f.AdjustExponent(4); 362 } else { 363 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 364 --exponent_; 365 carry = MultiplyWithoutNormalization<5>(); 366 f.AdjustExponent(1); 367 } 368 if (carry != 0) { 369 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 370 PushCarry(carry); 371 exponent_ += log10Radix; 372 } 373 } 374 // So exponent_ is now log10Radix, meaning that the 375 // MSD can be taken as an integer part and transferred 376 // to the binary result. 377 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 378 int guardShift{f.SetTo(digit_[--digits_])}; 379 // Transfer additional bits until the result is normal. 380 digitLimit_ = digits_; 381 while (!f.IsFull()) { 382 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 383 f.AdjustExponent(-1); 384 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 385 f.ShiftIn(carry); 386 } 387 // Get the next few bits for rounding. Allow for some guard bits 388 // that may have already been set in f.SetTo() above. 389 int guard{0}; 390 if (guardShift == 0) { 391 guard = MultiplyWithoutNormalization<4>(); 392 } else if (guardShift == 1) { 393 guard = MultiplyWithoutNormalization<2>(); 394 } 395 guard = guard + guard + !IsZero(); 396 f.SetGuard(guard); 397 return f.ToBinary(isNegative_, rounding_); 398 } 399 400 template <int PREC, int LOG10RADIX> 401 ConversionToBinaryResult<PREC> 402 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary( 403 const char *&p, const char *limit) { 404 bool inexact{false}; 405 if (ParseNumber(p, inexact, limit)) { 406 auto result{ConvertToBinary()}; 407 if (inexact) { 408 result.flags = 409 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 410 } 411 return result; 412 } else { 413 // Could not parse a decimal floating-point number. p has been 414 // advanced over any leading spaces. Most Fortran compilers set 415 // the sign bit for -NaN. 416 const char *q{p}; 417 if (!limit || q < limit) { 418 isNegative_ = *q == '-'; 419 if (isNegative_ || *q == '+') { 420 ++q; 421 } 422 } 423 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'N' && 424 toupper(q[1]) == 'A' && toupper(q[2]) == 'N') { 425 // NaN 426 p = q + 3; 427 bool isQuiet{true}; 428 if ((!limit || p < limit) && *p == '(') { 429 int depth{1}; 430 do { 431 ++p; 432 if (limit && p >= limit) { 433 // Invalid input 434 return {Real{NaN(false)}, Invalid}; 435 } else if (*p == '(') { 436 ++depth; 437 } else if (*p == ')') { 438 --depth; 439 } else if (*p != ' ') { 440 // Implementation dependent, but other compilers 441 // all return quiet NaNs. 442 } 443 } while (depth > 0); 444 ++p; 445 } 446 return {Real{NaN(isQuiet)}}; 447 } else { // Inf? 448 if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' && 449 toupper(q[1]) == 'N' && toupper(q[2]) == 'F') { 450 if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' && 451 toupper(q[4]) == 'N' && toupper(q[5]) == 'I' && 452 toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') { 453 p = q + 8; 454 } else { 455 p = q + 3; 456 } 457 return {Real{Infinity()}}; 458 } else { 459 // Invalid input 460 return {Real{NaN()}, Invalid}; 461 } 462 } 463 } 464 } 465 466 template <int PREC> 467 ConversionToBinaryResult<PREC> ConvertToBinary( 468 const char *&p, enum FortranRounding rounding, const char *end) { 469 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end); 470 } 471 472 template ConversionToBinaryResult<8> ConvertToBinary<8>( 473 const char *&, enum FortranRounding, const char *end); 474 template ConversionToBinaryResult<11> ConvertToBinary<11>( 475 const char *&, enum FortranRounding, const char *end); 476 template ConversionToBinaryResult<24> ConvertToBinary<24>( 477 const char *&, enum FortranRounding, const char *end); 478 template ConversionToBinaryResult<53> ConvertToBinary<53>( 479 const char *&, enum FortranRounding, const char *end); 480 template ConversionToBinaryResult<64> ConvertToBinary<64>( 481 const char *&, enum FortranRounding, const char *end); 482 template ConversionToBinaryResult<113> ConvertToBinary<113>( 483 const char *&, enum FortranRounding, const char *end); 484 485 extern "C" { 486 enum ConversionResultFlags ConvertDecimalToFloat( 487 const char **p, float *f, enum FortranRounding rounding) { 488 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 489 std::memcpy(reinterpret_cast<void *>(f), 490 reinterpret_cast<const void *>(&result.binary), sizeof *f); 491 return result.flags; 492 } 493 enum ConversionResultFlags ConvertDecimalToDouble( 494 const char **p, double *d, enum FortranRounding rounding) { 495 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 496 std::memcpy(reinterpret_cast<void *>(d), 497 reinterpret_cast<const void *>(&result.binary), sizeof *d); 498 return result.flags; 499 } 500 enum ConversionResultFlags ConvertDecimalToLongDouble( 501 const char **p, long double *ld, enum FortranRounding rounding) { 502 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 503 std::memcpy(reinterpret_cast<void *>(ld), 504 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 505 return result.flags; 506 } 507 } 508 } // namespace Fortran::decimal 509