1 //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "big-radix-floating-point.h" 10 #include "flang/Common/bit-population-count.h" 11 #include "flang/Common/leading-zero-bit-count.h" 12 #include "flang/Decimal/binary-floating-point.h" 13 #include "flang/Decimal/decimal.h" 14 #include <cinttypes> 15 #include <cstring> 16 #include <ctype.h> 17 18 namespace Fortran::decimal { 19 20 template<int PREC, int LOG10RADIX> 21 bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( 22 const char *&p, bool &inexact) { 23 SetToZero(); 24 while (*p == ' ') { 25 ++p; 26 } 27 const char *q{p}; 28 isNegative_ = *q == '-'; 29 if (*q == '-' || *q == '+') { 30 ++q; 31 } 32 const char *start{q}; 33 while (*q == '0') { 34 ++q; 35 } 36 const char *first{q}; 37 for (; *q >= '0' && *q <= '9'; ++q) { 38 } 39 const char *point{nullptr}; 40 if (*q == '.') { 41 point = q; 42 for (++q; *q >= '0' && *q <= '9'; ++q) { 43 } 44 } 45 if (q == start || (q == start + 1 && *start == '.')) { 46 return false; // require at least one digit 47 } 48 // There's a valid number here; set the reference argument to point to 49 // the first character afterward. 50 p = q; 51 // Strip off trailing zeroes 52 if (point) { 53 while (q[-1] == '0') { 54 --q; 55 } 56 if (q[-1] == '.') { 57 point = nullptr; 58 --q; 59 } 60 } 61 if (!point) { 62 while (q > first && q[-1] == '0') { 63 --q; 64 ++exponent_; 65 } 66 } 67 // Trim any excess digits 68 const char *limit{first + maxDigits * log10Radix + (point != nullptr)}; 69 if (q > limit) { 70 inexact = true; 71 if (point >= limit) { 72 q = point; 73 point = nullptr; 74 } 75 if (!point) { 76 exponent_ += q - limit; 77 } 78 q = limit; 79 } 80 if (point) { 81 exponent_ -= static_cast<int>(q - point - 1); 82 } 83 if (q == first) { 84 exponent_ = 0; // all zeros 85 } 86 // Rack the decimal digits up into big Digits. 87 for (auto times{radix}; q-- > first;) { 88 if (*q != '.') { 89 if (times == radix) { 90 digit_[digits_++] = *q - '0'; 91 times = 10; 92 } else { 93 digit_[digits_ - 1] += times * (*q - '0'); 94 times *= 10; 95 } 96 } 97 } 98 // Look for an optional exponent field. 99 q = p; 100 switch (*q) { 101 case 'e': 102 case 'E': 103 case 'd': 104 case 'D': 105 case 'q': 106 case 'Q': { 107 bool negExpo{*++q == '-'}; 108 if (*q == '-' || *q == '+') { 109 ++q; 110 } 111 if (*q >= '0' && *q <= '9') { 112 int expo{0}; 113 while (*q == '0') { 114 ++q; 115 } 116 const char *expDig{q}; 117 while (*q >= '0' && *q <= '9') { 118 expo = 10 * expo + *q++ - '0'; 119 } 120 if (q >= expDig + 8) { 121 // There's a ridiculous number of nonzero exponent digits. 122 // The decimal->binary conversion routine will cope with 123 // returning 0 or Inf, but we must ensure that "expo" didn't 124 // overflow back around to something legal. 125 expo = 10 * Real::decimalRange; 126 exponent_ = 0; 127 } 128 p = q; // exponent was valid 129 if (negExpo) { 130 exponent_ -= expo; 131 } else { 132 exponent_ += expo; 133 } 134 } 135 } break; 136 default: break; 137 } 138 return true; 139 } 140 141 // This local utility class represents an unrounded nonnegative 142 // binary floating-point value with an unbiased (i.e., signed) 143 // binary exponent, an integer value (not a fraction) with an implied 144 // binary point to its *right*, and some guard bits for rounding. 145 template<int PREC> class IntermediateFloat { 146 public: 147 static constexpr int precision{PREC}; 148 using IntType = common::HostUnsignedIntType<precision>; 149 static constexpr IntType topBit{IntType{1} << (precision - 1)}; 150 static constexpr IntType mask{topBit + (topBit - 1)}; 151 152 IntermediateFloat() {} 153 IntermediateFloat(const IntermediateFloat &) = default; 154 155 // Assumes that exponent_ is valid on entry, and may increment it. 156 // Returns the number of guard_ bits that have been determined. 157 template<typename UINT> bool SetTo(UINT n) { 158 static constexpr int nBits{CHAR_BIT * sizeof n}; 159 if constexpr (precision >= nBits) { 160 value_ = n; 161 guard_ = 0; 162 return 0; 163 } else { 164 int shift{common::BitsNeededFor(n) - precision}; 165 if (shift <= 0) { 166 value_ = n; 167 guard_ = 0; 168 return 0; 169 } else { 170 value_ = n >> shift; 171 exponent_ += shift; 172 n <<= nBits - shift; 173 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); 174 return shift; 175 } 176 } 177 } 178 179 void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } 180 bool IsFull() const { return value_ >= topBit; } 181 void AdjustExponent(int by) { exponent_ += by; } 182 void SetGuard(int g) { 183 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); 184 } 185 186 ConversionToBinaryResult<PREC> ToBinary( 187 bool isNegative, FortranRounding) const; 188 189 private: 190 static constexpr int guardBits{3}; // guard, round, sticky 191 using GuardType = int; 192 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; 193 194 IntType value_{0}; 195 GuardType guard_{0}; 196 int exponent_{0}; 197 }; 198 199 template<int PREC> 200 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( 201 bool isNegative, FortranRounding rounding) const { 202 using Binary = BinaryFloatingPointNumber<PREC>; 203 // Create a fraction with a binary point to the left of the integer 204 // value_, and bias the exponent. 205 IntType fraction{value_}; 206 GuardType guard{guard_}; 207 int expo{exponent_ + Binary::exponentBias + (precision - 1)}; 208 while (expo < 1 && (fraction > 0 || guard > oneHalf)) { 209 guard = (guard & 1) | (guard >> 1) | 210 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); 211 fraction >>= 1; 212 ++expo; 213 } 214 int flags{Exact}; 215 if (guard != 0) { 216 flags |= Inexact; 217 } 218 if (fraction == 0 && guard <= oneHalf) { 219 return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; 220 } 221 // The value is nonzero; normalize it. 222 while (fraction < topBit && expo > 1) { 223 --expo; 224 fraction = fraction * 2 + (guard >> (guardBits - 2)); 225 guard = (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); 226 } 227 // Apply rounding 228 bool incr{false}; 229 switch (rounding) { 230 case RoundNearest: 231 case RoundDefault: 232 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); 233 break; 234 case RoundUp: incr = guard != 0 && !isNegative; break; 235 case RoundDown: incr = guard != 0 && isNegative; break; 236 case RoundToZero: break; 237 case RoundCompatible: incr = guard >= oneHalf; break; 238 } 239 if (incr) { 240 if (fraction == mask) { 241 // rounding causes a carry 242 ++expo; 243 fraction = topBit; 244 } else { 245 ++fraction; 246 } 247 } 248 if (expo == 1 && fraction < topBit) { 249 expo = 0; // subnormal 250 } 251 if (expo >= Binary::maxExponent) { 252 expo = Binary::maxExponent; // Inf 253 flags |= Overflow; 254 fraction = 0; 255 } 256 using Raw = typename Binary::RawType; 257 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); 258 raw |= static_cast<Raw>(expo) << Binary::significandBits; 259 if constexpr (Binary::isImplicitMSB) { 260 fraction &= ~topBit; 261 } 262 raw |= fraction; 263 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; 264 } 265 266 template<int PREC, int LOG10RADIX> 267 ConversionToBinaryResult<PREC> 268 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { 269 // On entry, *this holds a multi-precision integer value in a radix of a 270 // large power of ten. Its radix point is defined to be to the right of its 271 // digits, and "exponent_" is the power of ten by which it is to be scaled. 272 Normalize(); 273 if (digits_ == 0) { // zero value 274 return {Real{SignBit()}}; 275 } 276 // The value is not zero: x = D. * 10.**E 277 // Shift our perspective on the radix (& decimal) point so that 278 // it sits to the *left* of the digits: i.e., x = .D * 10.**E 279 exponent_ += digits_ * log10Radix; 280 // Sanity checks for ridiculous exponents 281 static constexpr int crazy{2 * Real::decimalRange + log10Radix}; 282 if (exponent_ < -crazy) { // underflow to +/-0. 283 return {Real{SignBit()}, Inexact}; 284 } else if (exponent_ > crazy) { // overflow to +/-Inf. 285 return {Real{Infinity()}, Overflow}; 286 } 287 // Apply any negative decimal exponent by multiplication 288 // by a power of two, adjusting the binary exponent to compensate. 289 IntermediateFloat<PREC> f; 290 while (exponent_ < log10Radix) { 291 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) 292 f.AdjustExponent(-9); 293 digitLimit_ = digits_; 294 if (int carry{MultiplyWithoutNormalization<512>()}) { 295 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 296 PushCarry(carry); 297 exponent_ += log10Radix; 298 } 299 } 300 // Apply any positive decimal exponent greater than 301 // is needed to treat the topmost digit as an integer 302 // part by multiplying by 10 or 10000 repeatedly. 303 while (exponent_ > log10Radix) { 304 digitLimit_ = digits_; 305 int carry; 306 if (exponent_ >= log10Radix + 4) { 307 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) 308 exponent_ -= 4; 309 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); 310 f.AdjustExponent(4); 311 } else { 312 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) 313 --exponent_; 314 carry = MultiplyWithoutNormalization<5>(); 315 f.AdjustExponent(1); 316 } 317 if (carry != 0) { 318 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) 319 PushCarry(carry); 320 exponent_ += log10Radix; 321 } 322 } 323 // So exponent_ is now log10Radix, meaning that the 324 // MSD can be taken as an integer part and transferred 325 // to the binary result. 326 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) 327 int guardShift{f.SetTo(digit_[--digits_])}; 328 // Transfer additional bits until the result is normal. 329 digitLimit_ = digits_; 330 while (!f.IsFull()) { 331 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) 332 f.AdjustExponent(-1); 333 std::uint32_t carry = MultiplyWithoutNormalization<2>(); 334 f.ShiftIn(carry); 335 } 336 // Get the next few bits for rounding. Allow for some guard bits 337 // that may have already been set in f.SetTo() above. 338 int guard{0}; 339 if (guardShift == 0) { 340 guard = MultiplyWithoutNormalization<4>(); 341 } else if (guardShift == 1) { 342 guard = MultiplyWithoutNormalization<2>(); 343 } 344 guard = guard + guard + !IsZero(); 345 f.SetGuard(guard); 346 return f.ToBinary(isNegative_, rounding_); 347 } 348 349 template<int PREC, int LOG10RADIX> 350 ConversionToBinaryResult<PREC> 351 BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(const char *&p) { 352 bool inexact{false}; 353 if (ParseNumber(p, inexact)) { 354 auto result{ConvertToBinary()}; 355 if (inexact) { 356 result.flags = 357 static_cast<enum ConversionResultFlags>(result.flags | Inexact); 358 } 359 return result; 360 } else { 361 // Could not parse a decimal floating-point number. p has been 362 // advanced over any leading spaces. 363 if (toupper(p[0]) == 'N' && toupper(p[1]) == 'A' && toupper(p[2]) == 'N') { 364 // NaN 365 p += 3; 366 return {Real{NaN()}}; 367 } else { 368 // Try to parse Inf, maybe with a sign 369 const char *q{p}; 370 isNegative_ = *q == '-'; 371 if (*q == '-' || *q == '+') { 372 ++q; 373 } 374 if (toupper(q[0]) == 'I' && toupper(q[1]) == 'N' && 375 toupper(q[2]) == 'F') { 376 p = q + 3; 377 return {Real{Infinity()}}; 378 } else { 379 // Invalid input 380 return {Real{NaN()}, Invalid}; 381 } 382 } 383 } 384 } 385 386 template<int PREC> 387 ConversionToBinaryResult<PREC> ConvertToBinary( 388 const char *&p, enum FortranRounding rounding) { 389 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p); 390 } 391 392 template ConversionToBinaryResult<8> ConvertToBinary<8>( 393 const char *&, enum FortranRounding); 394 template ConversionToBinaryResult<11> ConvertToBinary<11>( 395 const char *&, enum FortranRounding); 396 template ConversionToBinaryResult<24> ConvertToBinary<24>( 397 const char *&, enum FortranRounding); 398 template ConversionToBinaryResult<53> ConvertToBinary<53>( 399 const char *&, enum FortranRounding); 400 template ConversionToBinaryResult<64> ConvertToBinary<64>( 401 const char *&, enum FortranRounding); 402 template ConversionToBinaryResult<112> ConvertToBinary<112>( 403 const char *&, enum FortranRounding); 404 405 extern "C" { 406 enum ConversionResultFlags ConvertDecimalToFloat( 407 const char **p, float *f, enum FortranRounding rounding) { 408 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; 409 std::memcpy(reinterpret_cast<void *>(f), 410 reinterpret_cast<const void *>(&result.binary), sizeof *f); 411 return result.flags; 412 } 413 enum ConversionResultFlags ConvertDecimalToDouble( 414 const char **p, double *d, enum FortranRounding rounding) { 415 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; 416 std::memcpy(reinterpret_cast<void *>(d), 417 reinterpret_cast<const void *>(&result.binary), sizeof *d); 418 return result.flags; 419 } 420 #if __x86_64__ && !defined(_MSC_VER) 421 enum ConversionResultFlags ConvertDecimalToLongDouble( 422 const char **p, long double *ld, enum FortranRounding rounding) { 423 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; 424 std::memcpy(reinterpret_cast<void *>(ld), 425 reinterpret_cast<const void *>(&result.binary), sizeof *ld); 426 return result.flags; 427 } 428 #endif 429 } 430 } 431