1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN and Float6E2M3FN types, 73 // which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3 floating point type where NaN is 87 // represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 // Returns true if any number described by this semantics can be precisely 123 // represented by the specified semantics. Does not take into account 124 // the value of fltNonfiniteBehavior. 125 bool isRepresentableBy(const fltSemantics &S) const { 126 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 127 precision <= S.precision; 128 } 129 }; 130 131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 137 static constexpr fltSemantics semFloat8E5M2FNUZ = { 138 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3FN = { 140 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 141 static constexpr fltSemantics semFloat8E4M3FNUZ = { 142 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 143 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 144 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 145 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 146 static constexpr fltSemantics semFloat6E3M2FN = { 147 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 148 static constexpr fltSemantics semFloat6E2M3FN = { 149 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 150 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 151 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 152 153 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 154 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 155 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 156 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 157 to each other, and two 11-bit exponents. 158 159 Note: we need to make the value different from semBogus as otherwise 160 an unsafe optimization may collapse both values to a single address, 161 and we heavily rely on them having distinct addresses. */ 162 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 163 164 /* These are legacy semantics for the fallback, inaccrurate implementation of 165 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 166 operation. It's equivalent to having an IEEE number with consecutive 106 167 bits of mantissa and 11 bits of exponent. 168 169 It's not equivalent to IBM double-double. For example, a legit IBM 170 double-double, 1 + epsilon: 171 172 1 + epsilon = 1 + (1 >> 1076) 173 174 is not representable by a consecutive 106 bits of mantissa. 175 176 Currently, these semantics are used in the following way: 177 178 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 179 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 180 semPPCDoubleDoubleLegacy -> IEEE operations 181 182 We use bitcastToAPInt() to get the bit representation (in APInt) of the 183 underlying IEEEdouble, then use the APInt constructor to construct the 184 legacy IEEE float. 185 186 TODO: Implement all operations in semPPCDoubleDouble, and delete these 187 semantics. */ 188 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 189 53 + 53, 128}; 190 191 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 192 switch (S) { 193 case S_IEEEhalf: 194 return IEEEhalf(); 195 case S_BFloat: 196 return BFloat(); 197 case S_IEEEsingle: 198 return IEEEsingle(); 199 case S_IEEEdouble: 200 return IEEEdouble(); 201 case S_IEEEquad: 202 return IEEEquad(); 203 case S_PPCDoubleDouble: 204 return PPCDoubleDouble(); 205 case S_Float8E5M2: 206 return Float8E5M2(); 207 case S_Float8E5M2FNUZ: 208 return Float8E5M2FNUZ(); 209 case S_Float8E4M3FN: 210 return Float8E4M3FN(); 211 case S_Float8E4M3FNUZ: 212 return Float8E4M3FNUZ(); 213 case S_Float8E4M3B11FNUZ: 214 return Float8E4M3B11FNUZ(); 215 case S_FloatTF32: 216 return FloatTF32(); 217 case S_Float6E3M2FN: 218 return Float6E3M2FN(); 219 case S_Float6E2M3FN: 220 return Float6E2M3FN(); 221 case S_x87DoubleExtended: 222 return x87DoubleExtended(); 223 } 224 llvm_unreachable("Unrecognised floating semantics"); 225 } 226 227 APFloatBase::Semantics 228 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 229 if (&Sem == &llvm::APFloat::IEEEhalf()) 230 return S_IEEEhalf; 231 else if (&Sem == &llvm::APFloat::BFloat()) 232 return S_BFloat; 233 else if (&Sem == &llvm::APFloat::IEEEsingle()) 234 return S_IEEEsingle; 235 else if (&Sem == &llvm::APFloat::IEEEdouble()) 236 return S_IEEEdouble; 237 else if (&Sem == &llvm::APFloat::IEEEquad()) 238 return S_IEEEquad; 239 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 240 return S_PPCDoubleDouble; 241 else if (&Sem == &llvm::APFloat::Float8E5M2()) 242 return S_Float8E5M2; 243 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 244 return S_Float8E5M2FNUZ; 245 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 246 return S_Float8E4M3FN; 247 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 248 return S_Float8E4M3FNUZ; 249 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 250 return S_Float8E4M3B11FNUZ; 251 else if (&Sem == &llvm::APFloat::FloatTF32()) 252 return S_FloatTF32; 253 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 254 return S_Float6E3M2FN; 255 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 256 return S_Float6E2M3FN; 257 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 258 return S_x87DoubleExtended; 259 else 260 llvm_unreachable("Unknown floating semantics"); 261 } 262 263 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 264 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 265 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 266 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 267 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 268 const fltSemantics &APFloatBase::PPCDoubleDouble() { 269 return semPPCDoubleDouble; 270 } 271 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 272 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 273 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 274 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 275 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 276 return semFloat8E4M3B11FNUZ; 277 } 278 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 279 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 280 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 281 const fltSemantics &APFloatBase::x87DoubleExtended() { 282 return semX87DoubleExtended; 283 } 284 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 285 286 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 287 constexpr RoundingMode APFloatBase::rmTowardPositive; 288 constexpr RoundingMode APFloatBase::rmTowardNegative; 289 constexpr RoundingMode APFloatBase::rmTowardZero; 290 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 291 292 /* A tight upper bound on number of parts required to hold the value 293 pow(5, power) is 294 295 power * 815 / (351 * integerPartWidth) + 1 296 297 However, whilst the result may require only this many parts, 298 because we are multiplying two values to get it, the 299 multiplication may require an extra part with the excess part 300 being zero (consider the trivial case of 1 * 1, tcFullMultiply 301 requires two parts to hold the single-part result). So we add an 302 extra one to guarantee enough space whilst multiplying. */ 303 const unsigned int maxExponent = 16383; 304 const unsigned int maxPrecision = 113; 305 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 306 const unsigned int maxPowerOfFiveParts = 307 2 + 308 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 309 310 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 311 return semantics.precision; 312 } 313 APFloatBase::ExponentType 314 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 315 return semantics.maxExponent; 316 } 317 APFloatBase::ExponentType 318 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 319 return semantics.minExponent; 320 } 321 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 322 return semantics.sizeInBits; 323 } 324 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 325 bool isSigned) { 326 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 327 // at least one more bit than the MaxExponent to hold the max FP value. 328 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 329 // Extra sign bit needed. 330 if (isSigned) 331 ++MinBitWidth; 332 return MinBitWidth; 333 } 334 335 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 336 const fltSemantics &Dst) { 337 // Exponent range must be larger. 338 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 339 return false; 340 341 // If the mantissa is long enough, the result value could still be denormal 342 // with a larger exponent range. 343 // 344 // FIXME: This condition is probably not accurate but also shouldn't be a 345 // practical concern with existing types. 346 return Dst.precision >= Src.precision; 347 } 348 349 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 350 return Sem.sizeInBits; 351 } 352 353 static constexpr APFloatBase::ExponentType 354 exponentZero(const fltSemantics &semantics) { 355 return semantics.minExponent - 1; 356 } 357 358 static constexpr APFloatBase::ExponentType 359 exponentInf(const fltSemantics &semantics) { 360 return semantics.maxExponent + 1; 361 } 362 363 static constexpr APFloatBase::ExponentType 364 exponentNaN(const fltSemantics &semantics) { 365 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 366 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 367 return exponentZero(semantics); 368 return semantics.maxExponent; 369 } 370 return semantics.maxExponent + 1; 371 } 372 373 /* A bunch of private, handy routines. */ 374 375 static inline Error createError(const Twine &Err) { 376 return make_error<StringError>(Err, inconvertibleErrorCode()); 377 } 378 379 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 380 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 381 } 382 383 /* Returns 0U-9U. Return values >= 10U are not digits. */ 384 static inline unsigned int 385 decDigitValue(unsigned int c) 386 { 387 return c - '0'; 388 } 389 390 /* Return the value of a decimal exponent of the form 391 [+-]ddddddd. 392 393 If the exponent overflows, returns a large exponent with the 394 appropriate sign. */ 395 static Expected<int> readExponent(StringRef::iterator begin, 396 StringRef::iterator end) { 397 bool isNegative; 398 unsigned int absExponent; 399 const unsigned int overlargeExponent = 24000; /* FIXME. */ 400 StringRef::iterator p = begin; 401 402 // Treat no exponent as 0 to match binutils 403 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 404 return 0; 405 } 406 407 isNegative = (*p == '-'); 408 if (*p == '-' || *p == '+') { 409 p++; 410 if (p == end) 411 return createError("Exponent has no digits"); 412 } 413 414 absExponent = decDigitValue(*p++); 415 if (absExponent >= 10U) 416 return createError("Invalid character in exponent"); 417 418 for (; p != end; ++p) { 419 unsigned int value; 420 421 value = decDigitValue(*p); 422 if (value >= 10U) 423 return createError("Invalid character in exponent"); 424 425 absExponent = absExponent * 10U + value; 426 if (absExponent >= overlargeExponent) { 427 absExponent = overlargeExponent; 428 break; 429 } 430 } 431 432 if (isNegative) 433 return -(int) absExponent; 434 else 435 return (int) absExponent; 436 } 437 438 /* This is ugly and needs cleaning up, but I don't immediately see 439 how whilst remaining safe. */ 440 static Expected<int> totalExponent(StringRef::iterator p, 441 StringRef::iterator end, 442 int exponentAdjustment) { 443 int unsignedExponent; 444 bool negative, overflow; 445 int exponent = 0; 446 447 if (p == end) 448 return createError("Exponent has no digits"); 449 450 negative = *p == '-'; 451 if (*p == '-' || *p == '+') { 452 p++; 453 if (p == end) 454 return createError("Exponent has no digits"); 455 } 456 457 unsignedExponent = 0; 458 overflow = false; 459 for (; p != end; ++p) { 460 unsigned int value; 461 462 value = decDigitValue(*p); 463 if (value >= 10U) 464 return createError("Invalid character in exponent"); 465 466 unsignedExponent = unsignedExponent * 10 + value; 467 if (unsignedExponent > 32767) { 468 overflow = true; 469 break; 470 } 471 } 472 473 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 474 overflow = true; 475 476 if (!overflow) { 477 exponent = unsignedExponent; 478 if (negative) 479 exponent = -exponent; 480 exponent += exponentAdjustment; 481 if (exponent > 32767 || exponent < -32768) 482 overflow = true; 483 } 484 485 if (overflow) 486 exponent = negative ? -32768: 32767; 487 488 return exponent; 489 } 490 491 static Expected<StringRef::iterator> 492 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 493 StringRef::iterator *dot) { 494 StringRef::iterator p = begin; 495 *dot = end; 496 while (p != end && *p == '0') 497 p++; 498 499 if (p != end && *p == '.') { 500 *dot = p++; 501 502 if (end - begin == 1) 503 return createError("Significand has no digits"); 504 505 while (p != end && *p == '0') 506 p++; 507 } 508 509 return p; 510 } 511 512 /* Given a normal decimal floating point number of the form 513 514 dddd.dddd[eE][+-]ddd 515 516 where the decimal point and exponent are optional, fill out the 517 structure D. Exponent is appropriate if the significand is 518 treated as an integer, and normalizedExponent if the significand 519 is taken to have the decimal point after a single leading 520 non-zero digit. 521 522 If the value is zero, V->firstSigDigit points to a non-digit, and 523 the return exponent is zero. 524 */ 525 struct decimalInfo { 526 const char *firstSigDigit; 527 const char *lastSigDigit; 528 int exponent; 529 int normalizedExponent; 530 }; 531 532 static Error interpretDecimal(StringRef::iterator begin, 533 StringRef::iterator end, decimalInfo *D) { 534 StringRef::iterator dot = end; 535 536 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 537 if (!PtrOrErr) 538 return PtrOrErr.takeError(); 539 StringRef::iterator p = *PtrOrErr; 540 541 D->firstSigDigit = p; 542 D->exponent = 0; 543 D->normalizedExponent = 0; 544 545 for (; p != end; ++p) { 546 if (*p == '.') { 547 if (dot != end) 548 return createError("String contains multiple dots"); 549 dot = p++; 550 if (p == end) 551 break; 552 } 553 if (decDigitValue(*p) >= 10U) 554 break; 555 } 556 557 if (p != end) { 558 if (*p != 'e' && *p != 'E') 559 return createError("Invalid character in significand"); 560 if (p == begin) 561 return createError("Significand has no digits"); 562 if (dot != end && p - begin == 1) 563 return createError("Significand has no digits"); 564 565 /* p points to the first non-digit in the string */ 566 auto ExpOrErr = readExponent(p + 1, end); 567 if (!ExpOrErr) 568 return ExpOrErr.takeError(); 569 D->exponent = *ExpOrErr; 570 571 /* Implied decimal point? */ 572 if (dot == end) 573 dot = p; 574 } 575 576 /* If number is all zeroes accept any exponent. */ 577 if (p != D->firstSigDigit) { 578 /* Drop insignificant trailing zeroes. */ 579 if (p != begin) { 580 do 581 do 582 p--; 583 while (p != begin && *p == '0'); 584 while (p != begin && *p == '.'); 585 } 586 587 /* Adjust the exponents for any decimal point. */ 588 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 589 D->normalizedExponent = (D->exponent + 590 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 591 - (dot > D->firstSigDigit && dot < p))); 592 } 593 594 D->lastSigDigit = p; 595 return Error::success(); 596 } 597 598 /* Return the trailing fraction of a hexadecimal number. 599 DIGITVALUE is the first hex digit of the fraction, P points to 600 the next digit. */ 601 static Expected<lostFraction> 602 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 603 unsigned int digitValue) { 604 unsigned int hexDigit; 605 606 /* If the first trailing digit isn't 0 or 8 we can work out the 607 fraction immediately. */ 608 if (digitValue > 8) 609 return lfMoreThanHalf; 610 else if (digitValue < 8 && digitValue > 0) 611 return lfLessThanHalf; 612 613 // Otherwise we need to find the first non-zero digit. 614 while (p != end && (*p == '0' || *p == '.')) 615 p++; 616 617 if (p == end) 618 return createError("Invalid trailing hexadecimal fraction!"); 619 620 hexDigit = hexDigitValue(*p); 621 622 /* If we ran off the end it is exactly zero or one-half, otherwise 623 a little more. */ 624 if (hexDigit == UINT_MAX) 625 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 626 else 627 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 628 } 629 630 /* Return the fraction lost were a bignum truncated losing the least 631 significant BITS bits. */ 632 static lostFraction 633 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 634 unsigned int partCount, 635 unsigned int bits) 636 { 637 unsigned int lsb; 638 639 lsb = APInt::tcLSB(parts, partCount); 640 641 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 642 if (bits <= lsb) 643 return lfExactlyZero; 644 if (bits == lsb + 1) 645 return lfExactlyHalf; 646 if (bits <= partCount * APFloatBase::integerPartWidth && 647 APInt::tcExtractBit(parts, bits - 1)) 648 return lfMoreThanHalf; 649 650 return lfLessThanHalf; 651 } 652 653 /* Shift DST right BITS bits noting lost fraction. */ 654 static lostFraction 655 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 656 { 657 lostFraction lost_fraction; 658 659 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 660 661 APInt::tcShiftRight(dst, parts, bits); 662 663 return lost_fraction; 664 } 665 666 /* Combine the effect of two lost fractions. */ 667 static lostFraction 668 combineLostFractions(lostFraction moreSignificant, 669 lostFraction lessSignificant) 670 { 671 if (lessSignificant != lfExactlyZero) { 672 if (moreSignificant == lfExactlyZero) 673 moreSignificant = lfLessThanHalf; 674 else if (moreSignificant == lfExactlyHalf) 675 moreSignificant = lfMoreThanHalf; 676 } 677 678 return moreSignificant; 679 } 680 681 /* The error from the true value, in half-ulps, on multiplying two 682 floating point numbers, which differ from the value they 683 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 684 than the returned value. 685 686 See "How to Read Floating Point Numbers Accurately" by William D 687 Clinger. */ 688 static unsigned int 689 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 690 { 691 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 692 693 if (HUerr1 + HUerr2 == 0) 694 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 695 else 696 return inexactMultiply + 2 * (HUerr1 + HUerr2); 697 } 698 699 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 700 when the least significant BITS are truncated. BITS cannot be 701 zero. */ 702 static APFloatBase::integerPart 703 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 704 bool isNearest) { 705 unsigned int count, partBits; 706 APFloatBase::integerPart part, boundary; 707 708 assert(bits != 0); 709 710 bits--; 711 count = bits / APFloatBase::integerPartWidth; 712 partBits = bits % APFloatBase::integerPartWidth + 1; 713 714 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 715 716 if (isNearest) 717 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 718 else 719 boundary = 0; 720 721 if (count == 0) { 722 if (part - boundary <= boundary - part) 723 return part - boundary; 724 else 725 return boundary - part; 726 } 727 728 if (part == boundary) { 729 while (--count) 730 if (parts[count]) 731 return ~(APFloatBase::integerPart) 0; /* A lot. */ 732 733 return parts[0]; 734 } else if (part == boundary - 1) { 735 while (--count) 736 if (~parts[count]) 737 return ~(APFloatBase::integerPart) 0; /* A lot. */ 738 739 return -parts[0]; 740 } 741 742 return ~(APFloatBase::integerPart) 0; /* A lot. */ 743 } 744 745 /* Place pow(5, power) in DST, and return the number of parts used. 746 DST must be at least one part larger than size of the answer. */ 747 static unsigned int 748 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 749 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 750 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 751 pow5s[0] = 78125 * 5; 752 753 unsigned int partsCount = 1; 754 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 755 unsigned int result; 756 assert(power <= maxExponent); 757 758 p1 = dst; 759 p2 = scratch; 760 761 *p1 = firstEightPowers[power & 7]; 762 power >>= 3; 763 764 result = 1; 765 pow5 = pow5s; 766 767 for (unsigned int n = 0; power; power >>= 1, n++) { 768 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 769 if (n != 0) { 770 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 771 partsCount, partsCount); 772 partsCount *= 2; 773 if (pow5[partsCount - 1] == 0) 774 partsCount--; 775 } 776 777 if (power & 1) { 778 APFloatBase::integerPart *tmp; 779 780 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 781 result += partsCount; 782 if (p2[result - 1] == 0) 783 result--; 784 785 /* Now result is in p1 with partsCount parts and p2 is scratch 786 space. */ 787 tmp = p1; 788 p1 = p2; 789 p2 = tmp; 790 } 791 792 pow5 += partsCount; 793 } 794 795 if (p1 != dst) 796 APInt::tcAssign(dst, p1, result); 797 798 return result; 799 } 800 801 /* Zero at the end to avoid modular arithmetic when adding one; used 802 when rounding up during hexadecimal output. */ 803 static const char hexDigitsLower[] = "0123456789abcdef0"; 804 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 805 static const char infinityL[] = "infinity"; 806 static const char infinityU[] = "INFINITY"; 807 static const char NaNL[] = "nan"; 808 static const char NaNU[] = "NAN"; 809 810 /* Write out an integerPart in hexadecimal, starting with the most 811 significant nibble. Write out exactly COUNT hexdigits, return 812 COUNT. */ 813 static unsigned int 814 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 815 const char *hexDigitChars) 816 { 817 unsigned int result = count; 818 819 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 820 821 part >>= (APFloatBase::integerPartWidth - 4 * count); 822 while (count--) { 823 dst[count] = hexDigitChars[part & 0xf]; 824 part >>= 4; 825 } 826 827 return result; 828 } 829 830 /* Write out an unsigned decimal integer. */ 831 static char * 832 writeUnsignedDecimal (char *dst, unsigned int n) 833 { 834 char buff[40], *p; 835 836 p = buff; 837 do 838 *p++ = '0' + n % 10; 839 while (n /= 10); 840 841 do 842 *dst++ = *--p; 843 while (p != buff); 844 845 return dst; 846 } 847 848 /* Write out a signed decimal integer. */ 849 static char * 850 writeSignedDecimal (char *dst, int value) 851 { 852 if (value < 0) { 853 *dst++ = '-'; 854 dst = writeUnsignedDecimal(dst, -(unsigned) value); 855 } else 856 dst = writeUnsignedDecimal(dst, value); 857 858 return dst; 859 } 860 861 namespace detail { 862 /* Constructors. */ 863 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 864 unsigned int count; 865 866 semantics = ourSemantics; 867 count = partCount(); 868 if (count > 1) 869 significand.parts = new integerPart[count]; 870 } 871 872 void IEEEFloat::freeSignificand() { 873 if (needsCleanup()) 874 delete [] significand.parts; 875 } 876 877 void IEEEFloat::assign(const IEEEFloat &rhs) { 878 assert(semantics == rhs.semantics); 879 880 sign = rhs.sign; 881 category = rhs.category; 882 exponent = rhs.exponent; 883 if (isFiniteNonZero() || category == fcNaN) 884 copySignificand(rhs); 885 } 886 887 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 888 assert(isFiniteNonZero() || category == fcNaN); 889 assert(rhs.partCount() >= partCount()); 890 891 APInt::tcAssign(significandParts(), rhs.significandParts(), 892 partCount()); 893 } 894 895 /* Make this number a NaN, with an arbitrary but deterministic value 896 for the significand. If double or longer, this is a signalling NaN, 897 which may not be ideal. If float, this is QNaN(0). */ 898 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 899 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 900 llvm_unreachable("This floating point format does not support NaN"); 901 902 category = fcNaN; 903 sign = Negative; 904 exponent = exponentNaN(); 905 906 integerPart *significand = significandParts(); 907 unsigned numParts = partCount(); 908 909 APInt fill_storage; 910 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 911 // Finite-only types do not distinguish signalling and quiet NaN, so 912 // make them all signalling. 913 SNaN = false; 914 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 915 sign = true; 916 fill_storage = APInt::getZero(semantics->precision - 1); 917 } else { 918 fill_storage = APInt::getAllOnes(semantics->precision - 1); 919 } 920 fill = &fill_storage; 921 } 922 923 // Set the significand bits to the fill. 924 if (!fill || fill->getNumWords() < numParts) 925 APInt::tcSet(significand, 0, numParts); 926 if (fill) { 927 APInt::tcAssign(significand, fill->getRawData(), 928 std::min(fill->getNumWords(), numParts)); 929 930 // Zero out the excess bits of the significand. 931 unsigned bitsToPreserve = semantics->precision - 1; 932 unsigned part = bitsToPreserve / 64; 933 bitsToPreserve %= 64; 934 significand[part] &= ((1ULL << bitsToPreserve) - 1); 935 for (part++; part != numParts; ++part) 936 significand[part] = 0; 937 } 938 939 unsigned QNaNBit = semantics->precision - 2; 940 941 if (SNaN) { 942 // We always have to clear the QNaN bit to make it an SNaN. 943 APInt::tcClearBit(significand, QNaNBit); 944 945 // If there are no bits set in the payload, we have to set 946 // *something* to make it a NaN instead of an infinity; 947 // conventionally, this is the next bit down from the QNaN bit. 948 if (APInt::tcIsZero(significand, numParts)) 949 APInt::tcSetBit(significand, QNaNBit - 1); 950 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 951 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 952 // Do nothing. 953 } else { 954 // We always have to set the QNaN bit to make it a QNaN. 955 APInt::tcSetBit(significand, QNaNBit); 956 } 957 958 // For x87 extended precision, we want to make a NaN, not a 959 // pseudo-NaN. Maybe we should expose the ability to make 960 // pseudo-NaNs? 961 if (semantics == &semX87DoubleExtended) 962 APInt::tcSetBit(significand, QNaNBit + 1); 963 } 964 965 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 966 if (this != &rhs) { 967 if (semantics != rhs.semantics) { 968 freeSignificand(); 969 initialize(rhs.semantics); 970 } 971 assign(rhs); 972 } 973 974 return *this; 975 } 976 977 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 978 freeSignificand(); 979 980 semantics = rhs.semantics; 981 significand = rhs.significand; 982 exponent = rhs.exponent; 983 category = rhs.category; 984 sign = rhs.sign; 985 986 rhs.semantics = &semBogus; 987 return *this; 988 } 989 990 bool IEEEFloat::isDenormal() const { 991 return isFiniteNonZero() && (exponent == semantics->minExponent) && 992 (APInt::tcExtractBit(significandParts(), 993 semantics->precision - 1) == 0); 994 } 995 996 bool IEEEFloat::isSmallest() const { 997 // The smallest number by magnitude in our format will be the smallest 998 // denormal, i.e. the floating point number with exponent being minimum 999 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1000 return isFiniteNonZero() && exponent == semantics->minExponent && 1001 significandMSB() == 0; 1002 } 1003 1004 bool IEEEFloat::isSmallestNormalized() const { 1005 return getCategory() == fcNormal && exponent == semantics->minExponent && 1006 isSignificandAllZerosExceptMSB(); 1007 } 1008 1009 bool IEEEFloat::isSignificandAllOnes() const { 1010 // Test if the significand excluding the integral bit is all ones. This allows 1011 // us to test for binade boundaries. 1012 const integerPart *Parts = significandParts(); 1013 const unsigned PartCount = partCountForBits(semantics->precision); 1014 for (unsigned i = 0; i < PartCount - 1; i++) 1015 if (~Parts[i]) 1016 return false; 1017 1018 // Set the unused high bits to all ones when we compare. 1019 const unsigned NumHighBits = 1020 PartCount*integerPartWidth - semantics->precision + 1; 1021 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1022 "Can not have more high bits to fill than integerPartWidth"); 1023 const integerPart HighBitFill = 1024 ~integerPart(0) << (integerPartWidth - NumHighBits); 1025 if (~(Parts[PartCount - 1] | HighBitFill)) 1026 return false; 1027 1028 return true; 1029 } 1030 1031 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1032 // Test if the significand excluding the integral bit is all ones except for 1033 // the least significant bit. 1034 const integerPart *Parts = significandParts(); 1035 1036 if (Parts[0] & 1) 1037 return false; 1038 1039 const unsigned PartCount = partCountForBits(semantics->precision); 1040 for (unsigned i = 0; i < PartCount - 1; i++) { 1041 if (~Parts[i] & ~unsigned{!i}) 1042 return false; 1043 } 1044 1045 // Set the unused high bits to all ones when we compare. 1046 const unsigned NumHighBits = 1047 PartCount * integerPartWidth - semantics->precision + 1; 1048 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1049 "Can not have more high bits to fill than integerPartWidth"); 1050 const integerPart HighBitFill = ~integerPart(0) 1051 << (integerPartWidth - NumHighBits); 1052 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1053 return false; 1054 1055 return true; 1056 } 1057 1058 bool IEEEFloat::isSignificandAllZeros() const { 1059 // Test if the significand excluding the integral bit is all zeros. This 1060 // allows us to test for binade boundaries. 1061 const integerPart *Parts = significandParts(); 1062 const unsigned PartCount = partCountForBits(semantics->precision); 1063 1064 for (unsigned i = 0; i < PartCount - 1; i++) 1065 if (Parts[i]) 1066 return false; 1067 1068 // Compute how many bits are used in the final word. 1069 const unsigned NumHighBits = 1070 PartCount*integerPartWidth - semantics->precision + 1; 1071 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1072 "clear than integerPartWidth"); 1073 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1074 1075 if (Parts[PartCount - 1] & HighBitMask) 1076 return false; 1077 1078 return true; 1079 } 1080 1081 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1082 const integerPart *Parts = significandParts(); 1083 const unsigned PartCount = partCountForBits(semantics->precision); 1084 1085 for (unsigned i = 0; i < PartCount - 1; i++) { 1086 if (Parts[i]) 1087 return false; 1088 } 1089 1090 const unsigned NumHighBits = 1091 PartCount * integerPartWidth - semantics->precision + 1; 1092 return Parts[PartCount - 1] == integerPart(1) 1093 << (integerPartWidth - NumHighBits); 1094 } 1095 1096 bool IEEEFloat::isLargest() const { 1097 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1098 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1099 // The largest number by magnitude in our format will be the floating point 1100 // number with maximum exponent and with significand that is all ones except 1101 // the LSB. 1102 return isFiniteNonZero() && exponent == semantics->maxExponent && 1103 isSignificandAllOnesExceptLSB(); 1104 } else { 1105 // The largest number by magnitude in our format will be the floating point 1106 // number with maximum exponent and with significand that is all ones. 1107 return isFiniteNonZero() && exponent == semantics->maxExponent && 1108 isSignificandAllOnes(); 1109 } 1110 } 1111 1112 bool IEEEFloat::isInteger() const { 1113 // This could be made more efficient; I'm going for obviously correct. 1114 if (!isFinite()) return false; 1115 IEEEFloat truncated = *this; 1116 truncated.roundToIntegral(rmTowardZero); 1117 return compare(truncated) == cmpEqual; 1118 } 1119 1120 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1121 if (this == &rhs) 1122 return true; 1123 if (semantics != rhs.semantics || 1124 category != rhs.category || 1125 sign != rhs.sign) 1126 return false; 1127 if (category==fcZero || category==fcInfinity) 1128 return true; 1129 1130 if (isFiniteNonZero() && exponent != rhs.exponent) 1131 return false; 1132 1133 return std::equal(significandParts(), significandParts() + partCount(), 1134 rhs.significandParts()); 1135 } 1136 1137 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1138 initialize(&ourSemantics); 1139 sign = 0; 1140 category = fcNormal; 1141 zeroSignificand(); 1142 exponent = ourSemantics.precision - 1; 1143 significandParts()[0] = value; 1144 normalize(rmNearestTiesToEven, lfExactlyZero); 1145 } 1146 1147 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1148 initialize(&ourSemantics); 1149 makeZero(false); 1150 } 1151 1152 // Delegate to the previous constructor, because later copy constructor may 1153 // actually inspects category, which can't be garbage. 1154 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1155 : IEEEFloat(ourSemantics) {} 1156 1157 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1158 initialize(rhs.semantics); 1159 assign(rhs); 1160 } 1161 1162 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1163 *this = std::move(rhs); 1164 } 1165 1166 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1167 1168 unsigned int IEEEFloat::partCount() const { 1169 return partCountForBits(semantics->precision + 1); 1170 } 1171 1172 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1173 return const_cast<IEEEFloat *>(this)->significandParts(); 1174 } 1175 1176 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1177 if (partCount() > 1) 1178 return significand.parts; 1179 else 1180 return &significand.part; 1181 } 1182 1183 void IEEEFloat::zeroSignificand() { 1184 APInt::tcSet(significandParts(), 0, partCount()); 1185 } 1186 1187 /* Increment an fcNormal floating point number's significand. */ 1188 void IEEEFloat::incrementSignificand() { 1189 integerPart carry; 1190 1191 carry = APInt::tcIncrement(significandParts(), partCount()); 1192 1193 /* Our callers should never cause us to overflow. */ 1194 assert(carry == 0); 1195 (void)carry; 1196 } 1197 1198 /* Add the significand of the RHS. Returns the carry flag. */ 1199 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1200 integerPart *parts; 1201 1202 parts = significandParts(); 1203 1204 assert(semantics == rhs.semantics); 1205 assert(exponent == rhs.exponent); 1206 1207 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1208 } 1209 1210 /* Subtract the significand of the RHS with a borrow flag. Returns 1211 the borrow flag. */ 1212 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1213 integerPart borrow) { 1214 integerPart *parts; 1215 1216 parts = significandParts(); 1217 1218 assert(semantics == rhs.semantics); 1219 assert(exponent == rhs.exponent); 1220 1221 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1222 partCount()); 1223 } 1224 1225 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1226 on to the full-precision result of the multiplication. Returns the 1227 lost fraction. */ 1228 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1229 IEEEFloat addend) { 1230 unsigned int omsb; // One, not zero, based MSB. 1231 unsigned int partsCount, newPartsCount, precision; 1232 integerPart *lhsSignificand; 1233 integerPart scratch[4]; 1234 integerPart *fullSignificand; 1235 lostFraction lost_fraction; 1236 bool ignored; 1237 1238 assert(semantics == rhs.semantics); 1239 1240 precision = semantics->precision; 1241 1242 // Allocate space for twice as many bits as the original significand, plus one 1243 // extra bit for the addition to overflow into. 1244 newPartsCount = partCountForBits(precision * 2 + 1); 1245 1246 if (newPartsCount > 4) 1247 fullSignificand = new integerPart[newPartsCount]; 1248 else 1249 fullSignificand = scratch; 1250 1251 lhsSignificand = significandParts(); 1252 partsCount = partCount(); 1253 1254 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1255 rhs.significandParts(), partsCount, partsCount); 1256 1257 lost_fraction = lfExactlyZero; 1258 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1259 exponent += rhs.exponent; 1260 1261 // Assume the operands involved in the multiplication are single-precision 1262 // FP, and the two multiplicants are: 1263 // *this = a23 . a22 ... a0 * 2^e1 1264 // rhs = b23 . b22 ... b0 * 2^e2 1265 // the result of multiplication is: 1266 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1267 // Note that there are three significant bits at the left-hand side of the 1268 // radix point: two for the multiplication, and an overflow bit for the 1269 // addition (that will always be zero at this point). Move the radix point 1270 // toward left by two bits, and adjust exponent accordingly. 1271 exponent += 2; 1272 1273 if (addend.isNonZero()) { 1274 // The intermediate result of the multiplication has "2 * precision" 1275 // signicant bit; adjust the addend to be consistent with mul result. 1276 // 1277 Significand savedSignificand = significand; 1278 const fltSemantics *savedSemantics = semantics; 1279 fltSemantics extendedSemantics; 1280 opStatus status; 1281 unsigned int extendedPrecision; 1282 1283 // Normalize our MSB to one below the top bit to allow for overflow. 1284 extendedPrecision = 2 * precision + 1; 1285 if (omsb != extendedPrecision - 1) { 1286 assert(extendedPrecision > omsb); 1287 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1288 (extendedPrecision - 1) - omsb); 1289 exponent -= (extendedPrecision - 1) - omsb; 1290 } 1291 1292 /* Create new semantics. */ 1293 extendedSemantics = *semantics; 1294 extendedSemantics.precision = extendedPrecision; 1295 1296 if (newPartsCount == 1) 1297 significand.part = fullSignificand[0]; 1298 else 1299 significand.parts = fullSignificand; 1300 semantics = &extendedSemantics; 1301 1302 // Make a copy so we can convert it to the extended semantics. 1303 // Note that we cannot convert the addend directly, as the extendedSemantics 1304 // is a local variable (which we take a reference to). 1305 IEEEFloat extendedAddend(addend); 1306 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1307 assert(status == opOK); 1308 (void)status; 1309 1310 // Shift the significand of the addend right by one bit. This guarantees 1311 // that the high bit of the significand is zero (same as fullSignificand), 1312 // so the addition will overflow (if it does overflow at all) into the top bit. 1313 lost_fraction = extendedAddend.shiftSignificandRight(1); 1314 assert(lost_fraction == lfExactlyZero && 1315 "Lost precision while shifting addend for fused-multiply-add."); 1316 1317 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1318 1319 /* Restore our state. */ 1320 if (newPartsCount == 1) 1321 fullSignificand[0] = significand.part; 1322 significand = savedSignificand; 1323 semantics = savedSemantics; 1324 1325 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1326 } 1327 1328 // Convert the result having "2 * precision" significant-bits back to the one 1329 // having "precision" significant-bits. First, move the radix point from 1330 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1331 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1332 exponent -= precision + 1; 1333 1334 // In case MSB resides at the left-hand side of radix point, shift the 1335 // mantissa right by some amount to make sure the MSB reside right before 1336 // the radix point (i.e. "MSB . rest-significant-bits"). 1337 // 1338 // Note that the result is not normalized when "omsb < precision". So, the 1339 // caller needs to call IEEEFloat::normalize() if normalized value is 1340 // expected. 1341 if (omsb > precision) { 1342 unsigned int bits, significantParts; 1343 lostFraction lf; 1344 1345 bits = omsb - precision; 1346 significantParts = partCountForBits(omsb); 1347 lf = shiftRight(fullSignificand, significantParts, bits); 1348 lost_fraction = combineLostFractions(lf, lost_fraction); 1349 exponent += bits; 1350 } 1351 1352 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1353 1354 if (newPartsCount > 4) 1355 delete [] fullSignificand; 1356 1357 return lost_fraction; 1358 } 1359 1360 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1361 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1362 } 1363 1364 /* Multiply the significands of LHS and RHS to DST. */ 1365 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1366 unsigned int bit, i, partsCount; 1367 const integerPart *rhsSignificand; 1368 integerPart *lhsSignificand, *dividend, *divisor; 1369 integerPart scratch[4]; 1370 lostFraction lost_fraction; 1371 1372 assert(semantics == rhs.semantics); 1373 1374 lhsSignificand = significandParts(); 1375 rhsSignificand = rhs.significandParts(); 1376 partsCount = partCount(); 1377 1378 if (partsCount > 2) 1379 dividend = new integerPart[partsCount * 2]; 1380 else 1381 dividend = scratch; 1382 1383 divisor = dividend + partsCount; 1384 1385 /* Copy the dividend and divisor as they will be modified in-place. */ 1386 for (i = 0; i < partsCount; i++) { 1387 dividend[i] = lhsSignificand[i]; 1388 divisor[i] = rhsSignificand[i]; 1389 lhsSignificand[i] = 0; 1390 } 1391 1392 exponent -= rhs.exponent; 1393 1394 unsigned int precision = semantics->precision; 1395 1396 /* Normalize the divisor. */ 1397 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1398 if (bit) { 1399 exponent += bit; 1400 APInt::tcShiftLeft(divisor, partsCount, bit); 1401 } 1402 1403 /* Normalize the dividend. */ 1404 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1405 if (bit) { 1406 exponent -= bit; 1407 APInt::tcShiftLeft(dividend, partsCount, bit); 1408 } 1409 1410 /* Ensure the dividend >= divisor initially for the loop below. 1411 Incidentally, this means that the division loop below is 1412 guaranteed to set the integer bit to one. */ 1413 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1414 exponent--; 1415 APInt::tcShiftLeft(dividend, partsCount, 1); 1416 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1417 } 1418 1419 /* Long division. */ 1420 for (bit = precision; bit; bit -= 1) { 1421 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1422 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1423 APInt::tcSetBit(lhsSignificand, bit - 1); 1424 } 1425 1426 APInt::tcShiftLeft(dividend, partsCount, 1); 1427 } 1428 1429 /* Figure out the lost fraction. */ 1430 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1431 1432 if (cmp > 0) 1433 lost_fraction = lfMoreThanHalf; 1434 else if (cmp == 0) 1435 lost_fraction = lfExactlyHalf; 1436 else if (APInt::tcIsZero(dividend, partsCount)) 1437 lost_fraction = lfExactlyZero; 1438 else 1439 lost_fraction = lfLessThanHalf; 1440 1441 if (partsCount > 2) 1442 delete [] dividend; 1443 1444 return lost_fraction; 1445 } 1446 1447 unsigned int IEEEFloat::significandMSB() const { 1448 return APInt::tcMSB(significandParts(), partCount()); 1449 } 1450 1451 unsigned int IEEEFloat::significandLSB() const { 1452 return APInt::tcLSB(significandParts(), partCount()); 1453 } 1454 1455 /* Note that a zero result is NOT normalized to fcZero. */ 1456 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1457 /* Our exponent should not overflow. */ 1458 assert((ExponentType) (exponent + bits) >= exponent); 1459 1460 exponent += bits; 1461 1462 return shiftRight(significandParts(), partCount(), bits); 1463 } 1464 1465 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1466 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1467 assert(bits < semantics->precision); 1468 1469 if (bits) { 1470 unsigned int partsCount = partCount(); 1471 1472 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1473 exponent -= bits; 1474 1475 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1476 } 1477 } 1478 1479 IEEEFloat::cmpResult 1480 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1481 int compare; 1482 1483 assert(semantics == rhs.semantics); 1484 assert(isFiniteNonZero()); 1485 assert(rhs.isFiniteNonZero()); 1486 1487 compare = exponent - rhs.exponent; 1488 1489 /* If exponents are equal, do an unsigned bignum comparison of the 1490 significands. */ 1491 if (compare == 0) 1492 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1493 partCount()); 1494 1495 if (compare > 0) 1496 return cmpGreaterThan; 1497 else if (compare < 0) 1498 return cmpLessThan; 1499 else 1500 return cmpEqual; 1501 } 1502 1503 /* Set the least significant BITS bits of a bignum, clear the 1504 rest. */ 1505 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1506 unsigned bits) { 1507 unsigned i = 0; 1508 while (bits > APInt::APINT_BITS_PER_WORD) { 1509 dst[i++] = ~(APInt::WordType)0; 1510 bits -= APInt::APINT_BITS_PER_WORD; 1511 } 1512 1513 if (bits) 1514 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1515 1516 while (i < parts) 1517 dst[i++] = 0; 1518 } 1519 1520 /* Handle overflow. Sign is preserved. We either become infinity or 1521 the largest finite number. */ 1522 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1523 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1524 /* Infinity? */ 1525 if (rounding_mode == rmNearestTiesToEven || 1526 rounding_mode == rmNearestTiesToAway || 1527 (rounding_mode == rmTowardPositive && !sign) || 1528 (rounding_mode == rmTowardNegative && sign)) { 1529 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1530 makeNaN(false, sign); 1531 else 1532 category = fcInfinity; 1533 return static_cast<opStatus>(opOverflow | opInexact); 1534 } 1535 } 1536 1537 /* Otherwise we become the largest finite number. */ 1538 category = fcNormal; 1539 exponent = semantics->maxExponent; 1540 tcSetLeastSignificantBits(significandParts(), partCount(), 1541 semantics->precision); 1542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1543 semantics->nanEncoding == fltNanEncoding::AllOnes) 1544 APInt::tcClearBit(significandParts(), 0); 1545 1546 return opInexact; 1547 } 1548 1549 /* Returns TRUE if, when truncating the current number, with BIT the 1550 new LSB, with the given lost fraction and rounding mode, the result 1551 would need to be rounded away from zero (i.e., by increasing the 1552 signficand). This routine must work for fcZero of both signs, and 1553 fcNormal numbers. */ 1554 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1555 lostFraction lost_fraction, 1556 unsigned int bit) const { 1557 /* NaNs and infinities should not have lost fractions. */ 1558 assert(isFiniteNonZero() || category == fcZero); 1559 1560 /* Current callers never pass this so we don't handle it. */ 1561 assert(lost_fraction != lfExactlyZero); 1562 1563 switch (rounding_mode) { 1564 case rmNearestTiesToAway: 1565 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1566 1567 case rmNearestTiesToEven: 1568 if (lost_fraction == lfMoreThanHalf) 1569 return true; 1570 1571 /* Our zeroes don't have a significand to test. */ 1572 if (lost_fraction == lfExactlyHalf && category != fcZero) 1573 return APInt::tcExtractBit(significandParts(), bit); 1574 1575 return false; 1576 1577 case rmTowardZero: 1578 return false; 1579 1580 case rmTowardPositive: 1581 return !sign; 1582 1583 case rmTowardNegative: 1584 return sign; 1585 1586 default: 1587 break; 1588 } 1589 llvm_unreachable("Invalid rounding mode found"); 1590 } 1591 1592 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1593 lostFraction lost_fraction) { 1594 unsigned int omsb; /* One, not zero, based MSB. */ 1595 int exponentChange; 1596 1597 if (!isFiniteNonZero()) 1598 return opOK; 1599 1600 /* Before rounding normalize the exponent of fcNormal numbers. */ 1601 omsb = significandMSB() + 1; 1602 1603 if (omsb) { 1604 /* OMSB is numbered from 1. We want to place it in the integer 1605 bit numbered PRECISION if possible, with a compensating change in 1606 the exponent. */ 1607 exponentChange = omsb - semantics->precision; 1608 1609 /* If the resulting exponent is too high, overflow according to 1610 the rounding mode. */ 1611 if (exponent + exponentChange > semantics->maxExponent) 1612 return handleOverflow(rounding_mode); 1613 1614 /* Subnormal numbers have exponent minExponent, and their MSB 1615 is forced based on that. */ 1616 if (exponent + exponentChange < semantics->minExponent) 1617 exponentChange = semantics->minExponent - exponent; 1618 1619 /* Shifting left is easy as we don't lose precision. */ 1620 if (exponentChange < 0) { 1621 assert(lost_fraction == lfExactlyZero); 1622 1623 shiftSignificandLeft(-exponentChange); 1624 1625 return opOK; 1626 } 1627 1628 if (exponentChange > 0) { 1629 lostFraction lf; 1630 1631 /* Shift right and capture any new lost fraction. */ 1632 lf = shiftSignificandRight(exponentChange); 1633 1634 lost_fraction = combineLostFractions(lf, lost_fraction); 1635 1636 /* Keep OMSB up-to-date. */ 1637 if (omsb > (unsigned) exponentChange) 1638 omsb -= exponentChange; 1639 else 1640 omsb = 0; 1641 } 1642 } 1643 1644 // The all-ones values is an overflow if NaN is all ones. If NaN is 1645 // represented by negative zero, then it is a valid finite value. 1646 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1647 semantics->nanEncoding == fltNanEncoding::AllOnes && 1648 exponent == semantics->maxExponent && isSignificandAllOnes()) 1649 return handleOverflow(rounding_mode); 1650 1651 /* Now round the number according to rounding_mode given the lost 1652 fraction. */ 1653 1654 /* As specified in IEEE 754, since we do not trap we do not report 1655 underflow for exact results. */ 1656 if (lost_fraction == lfExactlyZero) { 1657 /* Canonicalize zeroes. */ 1658 if (omsb == 0) { 1659 category = fcZero; 1660 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1661 sign = false; 1662 } 1663 1664 return opOK; 1665 } 1666 1667 /* Increment the significand if we're rounding away from zero. */ 1668 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1669 if (omsb == 0) 1670 exponent = semantics->minExponent; 1671 1672 incrementSignificand(); 1673 omsb = significandMSB() + 1; 1674 1675 /* Did the significand increment overflow? */ 1676 if (omsb == (unsigned) semantics->precision + 1) { 1677 /* Renormalize by incrementing the exponent and shifting our 1678 significand right one. However if we already have the 1679 maximum exponent we overflow to infinity. */ 1680 if (exponent == semantics->maxExponent) 1681 // Invoke overflow handling with a rounding mode that will guarantee 1682 // that the result gets turned into the correct infinity representation. 1683 // This is needed instead of just setting the category to infinity to 1684 // account for 8-bit floating point types that have no inf, only NaN. 1685 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1686 1687 shiftSignificandRight(1); 1688 1689 return opInexact; 1690 } 1691 1692 // The all-ones values is an overflow if NaN is all ones. If NaN is 1693 // represented by negative zero, then it is a valid finite value. 1694 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1695 semantics->nanEncoding == fltNanEncoding::AllOnes && 1696 exponent == semantics->maxExponent && isSignificandAllOnes()) 1697 return handleOverflow(rounding_mode); 1698 } 1699 1700 /* The normal case - we were and are not denormal, and any 1701 significand increment above didn't overflow. */ 1702 if (omsb == semantics->precision) 1703 return opInexact; 1704 1705 /* We have a non-zero denormal. */ 1706 assert(omsb < semantics->precision); 1707 1708 /* Canonicalize zeroes. */ 1709 if (omsb == 0) { 1710 category = fcZero; 1711 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1712 sign = false; 1713 } 1714 1715 /* The fcZero case is a denormal that underflowed to zero. */ 1716 return (opStatus) (opUnderflow | opInexact); 1717 } 1718 1719 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1720 bool subtract) { 1721 switch (PackCategoriesIntoKey(category, rhs.category)) { 1722 default: 1723 llvm_unreachable(nullptr); 1724 1725 case PackCategoriesIntoKey(fcZero, fcNaN): 1726 case PackCategoriesIntoKey(fcNormal, fcNaN): 1727 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1728 assign(rhs); 1729 [[fallthrough]]; 1730 case PackCategoriesIntoKey(fcNaN, fcZero): 1731 case PackCategoriesIntoKey(fcNaN, fcNormal): 1732 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1733 case PackCategoriesIntoKey(fcNaN, fcNaN): 1734 if (isSignaling()) { 1735 makeQuiet(); 1736 return opInvalidOp; 1737 } 1738 return rhs.isSignaling() ? opInvalidOp : opOK; 1739 1740 case PackCategoriesIntoKey(fcNormal, fcZero): 1741 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1742 case PackCategoriesIntoKey(fcInfinity, fcZero): 1743 return opOK; 1744 1745 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1746 case PackCategoriesIntoKey(fcZero, fcInfinity): 1747 category = fcInfinity; 1748 sign = rhs.sign ^ subtract; 1749 return opOK; 1750 1751 case PackCategoriesIntoKey(fcZero, fcNormal): 1752 assign(rhs); 1753 sign = rhs.sign ^ subtract; 1754 return opOK; 1755 1756 case PackCategoriesIntoKey(fcZero, fcZero): 1757 /* Sign depends on rounding mode; handled by caller. */ 1758 return opOK; 1759 1760 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1761 /* Differently signed infinities can only be validly 1762 subtracted. */ 1763 if (((sign ^ rhs.sign)!=0) != subtract) { 1764 makeNaN(); 1765 return opInvalidOp; 1766 } 1767 1768 return opOK; 1769 1770 case PackCategoriesIntoKey(fcNormal, fcNormal): 1771 return opDivByZero; 1772 } 1773 } 1774 1775 /* Add or subtract two normal numbers. */ 1776 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1777 bool subtract) { 1778 integerPart carry; 1779 lostFraction lost_fraction; 1780 int bits; 1781 1782 /* Determine if the operation on the absolute values is effectively 1783 an addition or subtraction. */ 1784 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1785 1786 /* Are we bigger exponent-wise than the RHS? */ 1787 bits = exponent - rhs.exponent; 1788 1789 /* Subtraction is more subtle than one might naively expect. */ 1790 if (subtract) { 1791 IEEEFloat temp_rhs(rhs); 1792 1793 if (bits == 0) 1794 lost_fraction = lfExactlyZero; 1795 else if (bits > 0) { 1796 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1797 shiftSignificandLeft(1); 1798 } else { 1799 lost_fraction = shiftSignificandRight(-bits - 1); 1800 temp_rhs.shiftSignificandLeft(1); 1801 } 1802 1803 // Should we reverse the subtraction. 1804 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1805 carry = temp_rhs.subtractSignificand 1806 (*this, lost_fraction != lfExactlyZero); 1807 copySignificand(temp_rhs); 1808 sign = !sign; 1809 } else { 1810 carry = subtractSignificand 1811 (temp_rhs, lost_fraction != lfExactlyZero); 1812 } 1813 1814 /* Invert the lost fraction - it was on the RHS and 1815 subtracted. */ 1816 if (lost_fraction == lfLessThanHalf) 1817 lost_fraction = lfMoreThanHalf; 1818 else if (lost_fraction == lfMoreThanHalf) 1819 lost_fraction = lfLessThanHalf; 1820 1821 /* The code above is intended to ensure that no borrow is 1822 necessary. */ 1823 assert(!carry); 1824 (void)carry; 1825 } else { 1826 if (bits > 0) { 1827 IEEEFloat temp_rhs(rhs); 1828 1829 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1830 carry = addSignificand(temp_rhs); 1831 } else { 1832 lost_fraction = shiftSignificandRight(-bits); 1833 carry = addSignificand(rhs); 1834 } 1835 1836 /* We have a guard bit; generating a carry cannot happen. */ 1837 assert(!carry); 1838 (void)carry; 1839 } 1840 1841 return lost_fraction; 1842 } 1843 1844 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1845 switch (PackCategoriesIntoKey(category, rhs.category)) { 1846 default: 1847 llvm_unreachable(nullptr); 1848 1849 case PackCategoriesIntoKey(fcZero, fcNaN): 1850 case PackCategoriesIntoKey(fcNormal, fcNaN): 1851 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1852 assign(rhs); 1853 sign = false; 1854 [[fallthrough]]; 1855 case PackCategoriesIntoKey(fcNaN, fcZero): 1856 case PackCategoriesIntoKey(fcNaN, fcNormal): 1857 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1858 case PackCategoriesIntoKey(fcNaN, fcNaN): 1859 sign ^= rhs.sign; // restore the original sign 1860 if (isSignaling()) { 1861 makeQuiet(); 1862 return opInvalidOp; 1863 } 1864 return rhs.isSignaling() ? opInvalidOp : opOK; 1865 1866 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1867 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1868 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1869 category = fcInfinity; 1870 return opOK; 1871 1872 case PackCategoriesIntoKey(fcZero, fcNormal): 1873 case PackCategoriesIntoKey(fcNormal, fcZero): 1874 case PackCategoriesIntoKey(fcZero, fcZero): 1875 category = fcZero; 1876 return opOK; 1877 1878 case PackCategoriesIntoKey(fcZero, fcInfinity): 1879 case PackCategoriesIntoKey(fcInfinity, fcZero): 1880 makeNaN(); 1881 return opInvalidOp; 1882 1883 case PackCategoriesIntoKey(fcNormal, fcNormal): 1884 return opOK; 1885 } 1886 } 1887 1888 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1889 switch (PackCategoriesIntoKey(category, rhs.category)) { 1890 default: 1891 llvm_unreachable(nullptr); 1892 1893 case PackCategoriesIntoKey(fcZero, fcNaN): 1894 case PackCategoriesIntoKey(fcNormal, fcNaN): 1895 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1896 assign(rhs); 1897 sign = false; 1898 [[fallthrough]]; 1899 case PackCategoriesIntoKey(fcNaN, fcZero): 1900 case PackCategoriesIntoKey(fcNaN, fcNormal): 1901 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1902 case PackCategoriesIntoKey(fcNaN, fcNaN): 1903 sign ^= rhs.sign; // restore the original sign 1904 if (isSignaling()) { 1905 makeQuiet(); 1906 return opInvalidOp; 1907 } 1908 return rhs.isSignaling() ? opInvalidOp : opOK; 1909 1910 case PackCategoriesIntoKey(fcInfinity, fcZero): 1911 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1912 case PackCategoriesIntoKey(fcZero, fcInfinity): 1913 case PackCategoriesIntoKey(fcZero, fcNormal): 1914 return opOK; 1915 1916 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1917 category = fcZero; 1918 return opOK; 1919 1920 case PackCategoriesIntoKey(fcNormal, fcZero): 1921 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1922 makeNaN(false, sign); 1923 else 1924 category = fcInfinity; 1925 return opDivByZero; 1926 1927 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1928 case PackCategoriesIntoKey(fcZero, fcZero): 1929 makeNaN(); 1930 return opInvalidOp; 1931 1932 case PackCategoriesIntoKey(fcNormal, fcNormal): 1933 return opOK; 1934 } 1935 } 1936 1937 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1938 switch (PackCategoriesIntoKey(category, rhs.category)) { 1939 default: 1940 llvm_unreachable(nullptr); 1941 1942 case PackCategoriesIntoKey(fcZero, fcNaN): 1943 case PackCategoriesIntoKey(fcNormal, fcNaN): 1944 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1945 assign(rhs); 1946 [[fallthrough]]; 1947 case PackCategoriesIntoKey(fcNaN, fcZero): 1948 case PackCategoriesIntoKey(fcNaN, fcNormal): 1949 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1950 case PackCategoriesIntoKey(fcNaN, fcNaN): 1951 if (isSignaling()) { 1952 makeQuiet(); 1953 return opInvalidOp; 1954 } 1955 return rhs.isSignaling() ? opInvalidOp : opOK; 1956 1957 case PackCategoriesIntoKey(fcZero, fcInfinity): 1958 case PackCategoriesIntoKey(fcZero, fcNormal): 1959 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1960 return opOK; 1961 1962 case PackCategoriesIntoKey(fcNormal, fcZero): 1963 case PackCategoriesIntoKey(fcInfinity, fcZero): 1964 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1965 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1966 case PackCategoriesIntoKey(fcZero, fcZero): 1967 makeNaN(); 1968 return opInvalidOp; 1969 1970 case PackCategoriesIntoKey(fcNormal, fcNormal): 1971 return opOK; 1972 } 1973 } 1974 1975 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1976 switch (PackCategoriesIntoKey(category, rhs.category)) { 1977 default: 1978 llvm_unreachable(nullptr); 1979 1980 case PackCategoriesIntoKey(fcZero, fcNaN): 1981 case PackCategoriesIntoKey(fcNormal, fcNaN): 1982 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1983 assign(rhs); 1984 [[fallthrough]]; 1985 case PackCategoriesIntoKey(fcNaN, fcZero): 1986 case PackCategoriesIntoKey(fcNaN, fcNormal): 1987 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1988 case PackCategoriesIntoKey(fcNaN, fcNaN): 1989 if (isSignaling()) { 1990 makeQuiet(); 1991 return opInvalidOp; 1992 } 1993 return rhs.isSignaling() ? opInvalidOp : opOK; 1994 1995 case PackCategoriesIntoKey(fcZero, fcInfinity): 1996 case PackCategoriesIntoKey(fcZero, fcNormal): 1997 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1998 return opOK; 1999 2000 case PackCategoriesIntoKey(fcNormal, fcZero): 2001 case PackCategoriesIntoKey(fcInfinity, fcZero): 2002 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2003 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2004 case PackCategoriesIntoKey(fcZero, fcZero): 2005 makeNaN(); 2006 return opInvalidOp; 2007 2008 case PackCategoriesIntoKey(fcNormal, fcNormal): 2009 return opDivByZero; // fake status, indicating this is not a special case 2010 } 2011 } 2012 2013 /* Change sign. */ 2014 void IEEEFloat::changeSign() { 2015 // With NaN-as-negative-zero, neither NaN or negative zero can change 2016 // their signs. 2017 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2018 (isZero() || isNaN())) 2019 return; 2020 /* Look mummy, this one's easy. */ 2021 sign = !sign; 2022 } 2023 2024 /* Normalized addition or subtraction. */ 2025 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2026 roundingMode rounding_mode, 2027 bool subtract) { 2028 opStatus fs; 2029 2030 fs = addOrSubtractSpecials(rhs, subtract); 2031 2032 /* This return code means it was not a simple case. */ 2033 if (fs == opDivByZero) { 2034 lostFraction lost_fraction; 2035 2036 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2037 fs = normalize(rounding_mode, lost_fraction); 2038 2039 /* Can only be zero if we lost no fraction. */ 2040 assert(category != fcZero || lost_fraction == lfExactlyZero); 2041 } 2042 2043 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2044 positive zero unless rounding to minus infinity, except that 2045 adding two like-signed zeroes gives that zero. */ 2046 if (category == fcZero) { 2047 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2048 sign = (rounding_mode == rmTowardNegative); 2049 // NaN-in-negative-zero means zeros need to be normalized to +0. 2050 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2051 sign = false; 2052 } 2053 2054 return fs; 2055 } 2056 2057 /* Normalized addition. */ 2058 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2059 roundingMode rounding_mode) { 2060 return addOrSubtract(rhs, rounding_mode, false); 2061 } 2062 2063 /* Normalized subtraction. */ 2064 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2065 roundingMode rounding_mode) { 2066 return addOrSubtract(rhs, rounding_mode, true); 2067 } 2068 2069 /* Normalized multiply. */ 2070 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2071 roundingMode rounding_mode) { 2072 opStatus fs; 2073 2074 sign ^= rhs.sign; 2075 fs = multiplySpecials(rhs); 2076 2077 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2078 sign = false; 2079 if (isFiniteNonZero()) { 2080 lostFraction lost_fraction = multiplySignificand(rhs); 2081 fs = normalize(rounding_mode, lost_fraction); 2082 if (lost_fraction != lfExactlyZero) 2083 fs = (opStatus) (fs | opInexact); 2084 } 2085 2086 return fs; 2087 } 2088 2089 /* Normalized divide. */ 2090 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2091 roundingMode rounding_mode) { 2092 opStatus fs; 2093 2094 sign ^= rhs.sign; 2095 fs = divideSpecials(rhs); 2096 2097 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2098 sign = false; 2099 if (isFiniteNonZero()) { 2100 lostFraction lost_fraction = divideSignificand(rhs); 2101 fs = normalize(rounding_mode, lost_fraction); 2102 if (lost_fraction != lfExactlyZero) 2103 fs = (opStatus) (fs | opInexact); 2104 } 2105 2106 return fs; 2107 } 2108 2109 /* Normalized remainder. */ 2110 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2111 opStatus fs; 2112 unsigned int origSign = sign; 2113 2114 // First handle the special cases. 2115 fs = remainderSpecials(rhs); 2116 if (fs != opDivByZero) 2117 return fs; 2118 2119 fs = opOK; 2120 2121 // Make sure the current value is less than twice the denom. If the addition 2122 // did not succeed (an overflow has happened), which means that the finite 2123 // value we currently posses must be less than twice the denom (as we are 2124 // using the same semantics). 2125 IEEEFloat P2 = rhs; 2126 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2127 fs = mod(P2); 2128 assert(fs == opOK); 2129 } 2130 2131 // Lets work with absolute numbers. 2132 IEEEFloat P = rhs; 2133 P.sign = false; 2134 sign = false; 2135 2136 // 2137 // To calculate the remainder we use the following scheme. 2138 // 2139 // The remainder is defained as follows: 2140 // 2141 // remainder = numer - rquot * denom = x - r * p 2142 // 2143 // Where r is the result of: x/p, rounded toward the nearest integral value 2144 // (with halfway cases rounded toward the even number). 2145 // 2146 // Currently, (after x mod 2p): 2147 // r is the number of 2p's present inside x, which is inherently, an even 2148 // number of p's. 2149 // 2150 // We may split the remaining calculation into 4 options: 2151 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2152 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2153 // are done as well. 2154 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2155 // to subtract 1p at least once. 2156 // - if x >= p then we must subtract p at least once, as x must be a 2157 // remainder. 2158 // 2159 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2160 // 2161 // We can now split the remaining calculation to the following 3 options: 2162 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2163 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2164 // must round up to the next even number. so we must subtract p once more. 2165 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2166 // integral, and subtract p once more. 2167 // 2168 2169 // Extend the semantics to prevent an overflow/underflow or inexact result. 2170 bool losesInfo; 2171 fltSemantics extendedSemantics = *semantics; 2172 extendedSemantics.maxExponent++; 2173 extendedSemantics.minExponent--; 2174 extendedSemantics.precision += 2; 2175 2176 IEEEFloat VEx = *this; 2177 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2178 assert(fs == opOK && !losesInfo); 2179 IEEEFloat PEx = P; 2180 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2181 assert(fs == opOK && !losesInfo); 2182 2183 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2184 // any fraction. 2185 fs = VEx.add(VEx, rmNearestTiesToEven); 2186 assert(fs == opOK); 2187 2188 if (VEx.compare(PEx) == cmpGreaterThan) { 2189 fs = subtract(P, rmNearestTiesToEven); 2190 assert(fs == opOK); 2191 2192 // Make VEx = this.add(this), but because we have different semantics, we do 2193 // not want to `convert` again, so we just subtract PEx twice (which equals 2194 // to the desired value). 2195 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2196 assert(fs == opOK); 2197 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2198 assert(fs == opOK); 2199 2200 cmpResult result = VEx.compare(PEx); 2201 if (result == cmpGreaterThan || result == cmpEqual) { 2202 fs = subtract(P, rmNearestTiesToEven); 2203 assert(fs == opOK); 2204 } 2205 } 2206 2207 if (isZero()) { 2208 sign = origSign; // IEEE754 requires this 2209 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2210 // But some 8-bit floats only have positive 0. 2211 sign = false; 2212 } 2213 2214 else 2215 sign ^= origSign; 2216 return fs; 2217 } 2218 2219 /* Normalized llvm frem (C fmod). */ 2220 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2221 opStatus fs; 2222 fs = modSpecials(rhs); 2223 unsigned int origSign = sign; 2224 2225 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2226 compareAbsoluteValue(rhs) != cmpLessThan) { 2227 int Exp = ilogb(*this) - ilogb(rhs); 2228 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2229 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2230 // check for it. 2231 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2232 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2233 V.sign = sign; 2234 2235 fs = subtract(V, rmNearestTiesToEven); 2236 assert(fs==opOK); 2237 } 2238 if (isZero()) { 2239 sign = origSign; // fmod requires this 2240 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2241 sign = false; 2242 } 2243 return fs; 2244 } 2245 2246 /* Normalized fused-multiply-add. */ 2247 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2248 const IEEEFloat &addend, 2249 roundingMode rounding_mode) { 2250 opStatus fs; 2251 2252 /* Post-multiplication sign, before addition. */ 2253 sign ^= multiplicand.sign; 2254 2255 /* If and only if all arguments are normal do we need to do an 2256 extended-precision calculation. */ 2257 if (isFiniteNonZero() && 2258 multiplicand.isFiniteNonZero() && 2259 addend.isFinite()) { 2260 lostFraction lost_fraction; 2261 2262 lost_fraction = multiplySignificand(multiplicand, addend); 2263 fs = normalize(rounding_mode, lost_fraction); 2264 if (lost_fraction != lfExactlyZero) 2265 fs = (opStatus) (fs | opInexact); 2266 2267 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2268 positive zero unless rounding to minus infinity, except that 2269 adding two like-signed zeroes gives that zero. */ 2270 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2271 sign = (rounding_mode == rmTowardNegative); 2272 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2273 sign = false; 2274 } 2275 } else { 2276 fs = multiplySpecials(multiplicand); 2277 2278 /* FS can only be opOK or opInvalidOp. There is no more work 2279 to do in the latter case. The IEEE-754R standard says it is 2280 implementation-defined in this case whether, if ADDEND is a 2281 quiet NaN, we raise invalid op; this implementation does so. 2282 2283 If we need to do the addition we can do so with normal 2284 precision. */ 2285 if (fs == opOK) 2286 fs = addOrSubtract(addend, rounding_mode, false); 2287 } 2288 2289 return fs; 2290 } 2291 2292 /* Rounding-mode correct round to integral value. */ 2293 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2294 opStatus fs; 2295 2296 if (isInfinity()) 2297 // [IEEE Std 754-2008 6.1]: 2298 // The behavior of infinity in floating-point arithmetic is derived from the 2299 // limiting cases of real arithmetic with operands of arbitrarily 2300 // large magnitude, when such a limit exists. 2301 // ... 2302 // Operations on infinite operands are usually exact and therefore signal no 2303 // exceptions ... 2304 return opOK; 2305 2306 if (isNaN()) { 2307 if (isSignaling()) { 2308 // [IEEE Std 754-2008 6.2]: 2309 // Under default exception handling, any operation signaling an invalid 2310 // operation exception and for which a floating-point result is to be 2311 // delivered shall deliver a quiet NaN. 2312 makeQuiet(); 2313 // [IEEE Std 754-2008 6.2]: 2314 // Signaling NaNs shall be reserved operands that, under default exception 2315 // handling, signal the invalid operation exception(see 7.2) for every 2316 // general-computational and signaling-computational operation except for 2317 // the conversions described in 5.12. 2318 return opInvalidOp; 2319 } else { 2320 // [IEEE Std 754-2008 6.2]: 2321 // For an operation with quiet NaN inputs, other than maximum and minimum 2322 // operations, if a floating-point result is to be delivered the result 2323 // shall be a quiet NaN which should be one of the input NaNs. 2324 // ... 2325 // Every general-computational and quiet-computational operation involving 2326 // one or more input NaNs, none of them signaling, shall signal no 2327 // exception, except fusedMultiplyAdd might signal the invalid operation 2328 // exception(see 7.2). 2329 return opOK; 2330 } 2331 } 2332 2333 if (isZero()) { 2334 // [IEEE Std 754-2008 6.3]: 2335 // ... the sign of the result of conversions, the quantize operation, the 2336 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2337 // the sign of the first or only operand. 2338 return opOK; 2339 } 2340 2341 // If the exponent is large enough, we know that this value is already 2342 // integral, and the arithmetic below would potentially cause it to saturate 2343 // to +/-Inf. Bail out early instead. 2344 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2345 return opOK; 2346 2347 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2348 // precision of our format, and then subtract it back off again. The choice 2349 // of rounding modes for the addition/subtraction determines the rounding mode 2350 // for our integral rounding as well. 2351 // NOTE: When the input value is negative, we do subtraction followed by 2352 // addition instead. 2353 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2354 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2355 IEEEFloat MagicConstant(*semantics); 2356 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2357 rmNearestTiesToEven); 2358 assert(fs == opOK); 2359 MagicConstant.sign = sign; 2360 2361 // Preserve the input sign so that we can handle the case of zero result 2362 // correctly. 2363 bool inputSign = isNegative(); 2364 2365 fs = add(MagicConstant, rounding_mode); 2366 2367 // Current value and 'MagicConstant' are both integers, so the result of the 2368 // subtraction is always exact according to Sterbenz' lemma. 2369 subtract(MagicConstant, rounding_mode); 2370 2371 // Restore the input sign. 2372 if (inputSign != isNegative()) 2373 changeSign(); 2374 2375 return fs; 2376 } 2377 2378 2379 /* Comparison requires normalized numbers. */ 2380 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2381 cmpResult result; 2382 2383 assert(semantics == rhs.semantics); 2384 2385 switch (PackCategoriesIntoKey(category, rhs.category)) { 2386 default: 2387 llvm_unreachable(nullptr); 2388 2389 case PackCategoriesIntoKey(fcNaN, fcZero): 2390 case PackCategoriesIntoKey(fcNaN, fcNormal): 2391 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2392 case PackCategoriesIntoKey(fcNaN, fcNaN): 2393 case PackCategoriesIntoKey(fcZero, fcNaN): 2394 case PackCategoriesIntoKey(fcNormal, fcNaN): 2395 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2396 return cmpUnordered; 2397 2398 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2399 case PackCategoriesIntoKey(fcInfinity, fcZero): 2400 case PackCategoriesIntoKey(fcNormal, fcZero): 2401 if (sign) 2402 return cmpLessThan; 2403 else 2404 return cmpGreaterThan; 2405 2406 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2407 case PackCategoriesIntoKey(fcZero, fcInfinity): 2408 case PackCategoriesIntoKey(fcZero, fcNormal): 2409 if (rhs.sign) 2410 return cmpGreaterThan; 2411 else 2412 return cmpLessThan; 2413 2414 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2415 if (sign == rhs.sign) 2416 return cmpEqual; 2417 else if (sign) 2418 return cmpLessThan; 2419 else 2420 return cmpGreaterThan; 2421 2422 case PackCategoriesIntoKey(fcZero, fcZero): 2423 return cmpEqual; 2424 2425 case PackCategoriesIntoKey(fcNormal, fcNormal): 2426 break; 2427 } 2428 2429 /* Two normal numbers. Do they have the same sign? */ 2430 if (sign != rhs.sign) { 2431 if (sign) 2432 result = cmpLessThan; 2433 else 2434 result = cmpGreaterThan; 2435 } else { 2436 /* Compare absolute values; invert result if negative. */ 2437 result = compareAbsoluteValue(rhs); 2438 2439 if (sign) { 2440 if (result == cmpLessThan) 2441 result = cmpGreaterThan; 2442 else if (result == cmpGreaterThan) 2443 result = cmpLessThan; 2444 } 2445 } 2446 2447 return result; 2448 } 2449 2450 /// IEEEFloat::convert - convert a value of one floating point type to another. 2451 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2452 /// records whether the transformation lost information, i.e. whether 2453 /// converting the result back to the original type will produce the 2454 /// original value (this is almost the same as return value==fsOK, but there 2455 /// are edge cases where this is not so). 2456 2457 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2458 roundingMode rounding_mode, 2459 bool *losesInfo) { 2460 lostFraction lostFraction; 2461 unsigned int newPartCount, oldPartCount; 2462 opStatus fs; 2463 int shift; 2464 const fltSemantics &fromSemantics = *semantics; 2465 bool is_signaling = isSignaling(); 2466 2467 lostFraction = lfExactlyZero; 2468 newPartCount = partCountForBits(toSemantics.precision + 1); 2469 oldPartCount = partCount(); 2470 shift = toSemantics.precision - fromSemantics.precision; 2471 2472 bool X86SpecialNan = false; 2473 if (&fromSemantics == &semX87DoubleExtended && 2474 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2475 (!(*significandParts() & 0x8000000000000000ULL) || 2476 !(*significandParts() & 0x4000000000000000ULL))) { 2477 // x86 has some unusual NaNs which cannot be represented in any other 2478 // format; note them here. 2479 X86SpecialNan = true; 2480 } 2481 2482 // If this is a truncation of a denormal number, and the target semantics 2483 // has larger exponent range than the source semantics (this can happen 2484 // when truncating from PowerPC double-double to double format), the 2485 // right shift could lose result mantissa bits. Adjust exponent instead 2486 // of performing excessive shift. 2487 // Also do a similar trick in case shifting denormal would produce zero 2488 // significand as this case isn't handled correctly by normalize. 2489 if (shift < 0 && isFiniteNonZero()) { 2490 int omsb = significandMSB() + 1; 2491 int exponentChange = omsb - fromSemantics.precision; 2492 if (exponent + exponentChange < toSemantics.minExponent) 2493 exponentChange = toSemantics.minExponent - exponent; 2494 if (exponentChange < shift) 2495 exponentChange = shift; 2496 if (exponentChange < 0) { 2497 shift -= exponentChange; 2498 exponent += exponentChange; 2499 } else if (omsb <= -shift) { 2500 exponentChange = omsb + shift - 1; // leave at least one bit set 2501 shift -= exponentChange; 2502 exponent += exponentChange; 2503 } 2504 } 2505 2506 // If this is a truncation, perform the shift before we narrow the storage. 2507 if (shift < 0 && (isFiniteNonZero() || 2508 (category == fcNaN && semantics->nonFiniteBehavior != 2509 fltNonfiniteBehavior::NanOnly))) 2510 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2511 2512 // Fix the storage so it can hold to new value. 2513 if (newPartCount > oldPartCount) { 2514 // The new type requires more storage; make it available. 2515 integerPart *newParts; 2516 newParts = new integerPart[newPartCount]; 2517 APInt::tcSet(newParts, 0, newPartCount); 2518 if (isFiniteNonZero() || category==fcNaN) 2519 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2520 freeSignificand(); 2521 significand.parts = newParts; 2522 } else if (newPartCount == 1 && oldPartCount != 1) { 2523 // Switch to built-in storage for a single part. 2524 integerPart newPart = 0; 2525 if (isFiniteNonZero() || category==fcNaN) 2526 newPart = significandParts()[0]; 2527 freeSignificand(); 2528 significand.part = newPart; 2529 } 2530 2531 // Now that we have the right storage, switch the semantics. 2532 semantics = &toSemantics; 2533 2534 // If this is an extension, perform the shift now that the storage is 2535 // available. 2536 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2537 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2538 2539 if (isFiniteNonZero()) { 2540 fs = normalize(rounding_mode, lostFraction); 2541 *losesInfo = (fs != opOK); 2542 } else if (category == fcNaN) { 2543 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2544 *losesInfo = 2545 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2546 makeNaN(false, sign); 2547 return is_signaling ? opInvalidOp : opOK; 2548 } 2549 2550 // If NaN is negative zero, we need to create a new NaN to avoid converting 2551 // NaN to -Inf. 2552 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2553 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2554 makeNaN(false, false); 2555 2556 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2557 2558 // For x87 extended precision, we want to make a NaN, not a special NaN if 2559 // the input wasn't special either. 2560 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2561 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2562 2563 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2564 // This also guarantees that a sNaN does not become Inf on a truncation 2565 // that loses all payload bits. 2566 if (is_signaling) { 2567 makeQuiet(); 2568 fs = opInvalidOp; 2569 } else { 2570 fs = opOK; 2571 } 2572 } else if (category == fcInfinity && 2573 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2574 makeNaN(false, sign); 2575 *losesInfo = true; 2576 fs = opInexact; 2577 } else if (category == fcZero && 2578 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2579 // Negative zero loses info, but positive zero doesn't. 2580 *losesInfo = 2581 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2582 fs = *losesInfo ? opInexact : opOK; 2583 // NaN is negative zero means -0 -> +0, which can lose information 2584 sign = false; 2585 } else { 2586 *losesInfo = false; 2587 fs = opOK; 2588 } 2589 2590 return fs; 2591 } 2592 2593 /* Convert a floating point number to an integer according to the 2594 rounding mode. If the rounded integer value is out of range this 2595 returns an invalid operation exception and the contents of the 2596 destination parts are unspecified. If the rounded value is in 2597 range but the floating point number is not the exact integer, the C 2598 standard doesn't require an inexact exception to be raised. IEEE 2599 854 does require it so we do that. 2600 2601 Note that for conversions to integer type the C standard requires 2602 round-to-zero to always be used. */ 2603 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2604 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2605 roundingMode rounding_mode, bool *isExact) const { 2606 lostFraction lost_fraction; 2607 const integerPart *src; 2608 unsigned int dstPartsCount, truncatedBits; 2609 2610 *isExact = false; 2611 2612 /* Handle the three special cases first. */ 2613 if (category == fcInfinity || category == fcNaN) 2614 return opInvalidOp; 2615 2616 dstPartsCount = partCountForBits(width); 2617 assert(dstPartsCount <= parts.size() && "Integer too big"); 2618 2619 if (category == fcZero) { 2620 APInt::tcSet(parts.data(), 0, dstPartsCount); 2621 // Negative zero can't be represented as an int. 2622 *isExact = !sign; 2623 return opOK; 2624 } 2625 2626 src = significandParts(); 2627 2628 /* Step 1: place our absolute value, with any fraction truncated, in 2629 the destination. */ 2630 if (exponent < 0) { 2631 /* Our absolute value is less than one; truncate everything. */ 2632 APInt::tcSet(parts.data(), 0, dstPartsCount); 2633 /* For exponent -1 the integer bit represents .5, look at that. 2634 For smaller exponents leftmost truncated bit is 0. */ 2635 truncatedBits = semantics->precision -1U - exponent; 2636 } else { 2637 /* We want the most significant (exponent + 1) bits; the rest are 2638 truncated. */ 2639 unsigned int bits = exponent + 1U; 2640 2641 /* Hopelessly large in magnitude? */ 2642 if (bits > width) 2643 return opInvalidOp; 2644 2645 if (bits < semantics->precision) { 2646 /* We truncate (semantics->precision - bits) bits. */ 2647 truncatedBits = semantics->precision - bits; 2648 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2649 } else { 2650 /* We want at least as many bits as are available. */ 2651 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2652 0); 2653 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2654 bits - semantics->precision); 2655 truncatedBits = 0; 2656 } 2657 } 2658 2659 /* Step 2: work out any lost fraction, and increment the absolute 2660 value if we would round away from zero. */ 2661 if (truncatedBits) { 2662 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2663 truncatedBits); 2664 if (lost_fraction != lfExactlyZero && 2665 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2666 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2667 return opInvalidOp; /* Overflow. */ 2668 } 2669 } else { 2670 lost_fraction = lfExactlyZero; 2671 } 2672 2673 /* Step 3: check if we fit in the destination. */ 2674 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2675 2676 if (sign) { 2677 if (!isSigned) { 2678 /* Negative numbers cannot be represented as unsigned. */ 2679 if (omsb != 0) 2680 return opInvalidOp; 2681 } else { 2682 /* It takes omsb bits to represent the unsigned integer value. 2683 We lose a bit for the sign, but care is needed as the 2684 maximally negative integer is a special case. */ 2685 if (omsb == width && 2686 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2687 return opInvalidOp; 2688 2689 /* This case can happen because of rounding. */ 2690 if (omsb > width) 2691 return opInvalidOp; 2692 } 2693 2694 APInt::tcNegate (parts.data(), dstPartsCount); 2695 } else { 2696 if (omsb >= width + !isSigned) 2697 return opInvalidOp; 2698 } 2699 2700 if (lost_fraction == lfExactlyZero) { 2701 *isExact = true; 2702 return opOK; 2703 } else 2704 return opInexact; 2705 } 2706 2707 /* Same as convertToSignExtendedInteger, except we provide 2708 deterministic values in case of an invalid operation exception, 2709 namely zero for NaNs and the minimal or maximal value respectively 2710 for underflow or overflow. 2711 The *isExact output tells whether the result is exact, in the sense 2712 that converting it back to the original floating point type produces 2713 the original value. This is almost equivalent to result==opOK, 2714 except for negative zeroes. 2715 */ 2716 IEEEFloat::opStatus 2717 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2718 unsigned int width, bool isSigned, 2719 roundingMode rounding_mode, bool *isExact) const { 2720 opStatus fs; 2721 2722 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2723 isExact); 2724 2725 if (fs == opInvalidOp) { 2726 unsigned int bits, dstPartsCount; 2727 2728 dstPartsCount = partCountForBits(width); 2729 assert(dstPartsCount <= parts.size() && "Integer too big"); 2730 2731 if (category == fcNaN) 2732 bits = 0; 2733 else if (sign) 2734 bits = isSigned; 2735 else 2736 bits = width - isSigned; 2737 2738 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2739 if (sign && isSigned) 2740 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2741 } 2742 2743 return fs; 2744 } 2745 2746 /* Convert an unsigned integer SRC to a floating point number, 2747 rounding according to ROUNDING_MODE. The sign of the floating 2748 point number is not modified. */ 2749 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2750 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2751 unsigned int omsb, precision, dstCount; 2752 integerPart *dst; 2753 lostFraction lost_fraction; 2754 2755 category = fcNormal; 2756 omsb = APInt::tcMSB(src, srcCount) + 1; 2757 dst = significandParts(); 2758 dstCount = partCount(); 2759 precision = semantics->precision; 2760 2761 /* We want the most significant PRECISION bits of SRC. There may not 2762 be that many; extract what we can. */ 2763 if (precision <= omsb) { 2764 exponent = omsb - 1; 2765 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2766 omsb - precision); 2767 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2768 } else { 2769 exponent = precision - 1; 2770 lost_fraction = lfExactlyZero; 2771 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2772 } 2773 2774 return normalize(rounding_mode, lost_fraction); 2775 } 2776 2777 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2778 roundingMode rounding_mode) { 2779 unsigned int partCount = Val.getNumWords(); 2780 APInt api = Val; 2781 2782 sign = false; 2783 if (isSigned && api.isNegative()) { 2784 sign = true; 2785 api = -api; 2786 } 2787 2788 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2789 } 2790 2791 /* Convert a two's complement integer SRC to a floating point number, 2792 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2793 integer is signed, in which case it must be sign-extended. */ 2794 IEEEFloat::opStatus 2795 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2796 unsigned int srcCount, bool isSigned, 2797 roundingMode rounding_mode) { 2798 opStatus status; 2799 2800 if (isSigned && 2801 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2802 integerPart *copy; 2803 2804 /* If we're signed and negative negate a copy. */ 2805 sign = true; 2806 copy = new integerPart[srcCount]; 2807 APInt::tcAssign(copy, src, srcCount); 2808 APInt::tcNegate(copy, srcCount); 2809 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2810 delete [] copy; 2811 } else { 2812 sign = false; 2813 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2814 } 2815 2816 return status; 2817 } 2818 2819 /* FIXME: should this just take a const APInt reference? */ 2820 IEEEFloat::opStatus 2821 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2822 unsigned int width, bool isSigned, 2823 roundingMode rounding_mode) { 2824 unsigned int partCount = partCountForBits(width); 2825 APInt api = APInt(width, ArrayRef(parts, partCount)); 2826 2827 sign = false; 2828 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2829 sign = true; 2830 api = -api; 2831 } 2832 2833 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2834 } 2835 2836 Expected<IEEEFloat::opStatus> 2837 IEEEFloat::convertFromHexadecimalString(StringRef s, 2838 roundingMode rounding_mode) { 2839 lostFraction lost_fraction = lfExactlyZero; 2840 2841 category = fcNormal; 2842 zeroSignificand(); 2843 exponent = 0; 2844 2845 integerPart *significand = significandParts(); 2846 unsigned partsCount = partCount(); 2847 unsigned bitPos = partsCount * integerPartWidth; 2848 bool computedTrailingFraction = false; 2849 2850 // Skip leading zeroes and any (hexa)decimal point. 2851 StringRef::iterator begin = s.begin(); 2852 StringRef::iterator end = s.end(); 2853 StringRef::iterator dot; 2854 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2855 if (!PtrOrErr) 2856 return PtrOrErr.takeError(); 2857 StringRef::iterator p = *PtrOrErr; 2858 StringRef::iterator firstSignificantDigit = p; 2859 2860 while (p != end) { 2861 integerPart hex_value; 2862 2863 if (*p == '.') { 2864 if (dot != end) 2865 return createError("String contains multiple dots"); 2866 dot = p++; 2867 continue; 2868 } 2869 2870 hex_value = hexDigitValue(*p); 2871 if (hex_value == UINT_MAX) 2872 break; 2873 2874 p++; 2875 2876 // Store the number while we have space. 2877 if (bitPos) { 2878 bitPos -= 4; 2879 hex_value <<= bitPos % integerPartWidth; 2880 significand[bitPos / integerPartWidth] |= hex_value; 2881 } else if (!computedTrailingFraction) { 2882 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2883 if (!FractOrErr) 2884 return FractOrErr.takeError(); 2885 lost_fraction = *FractOrErr; 2886 computedTrailingFraction = true; 2887 } 2888 } 2889 2890 /* Hex floats require an exponent but not a hexadecimal point. */ 2891 if (p == end) 2892 return createError("Hex strings require an exponent"); 2893 if (*p != 'p' && *p != 'P') 2894 return createError("Invalid character in significand"); 2895 if (p == begin) 2896 return createError("Significand has no digits"); 2897 if (dot != end && p - begin == 1) 2898 return createError("Significand has no digits"); 2899 2900 /* Ignore the exponent if we are zero. */ 2901 if (p != firstSignificantDigit) { 2902 int expAdjustment; 2903 2904 /* Implicit hexadecimal point? */ 2905 if (dot == end) 2906 dot = p; 2907 2908 /* Calculate the exponent adjustment implicit in the number of 2909 significant digits. */ 2910 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2911 if (expAdjustment < 0) 2912 expAdjustment++; 2913 expAdjustment = expAdjustment * 4 - 1; 2914 2915 /* Adjust for writing the significand starting at the most 2916 significant nibble. */ 2917 expAdjustment += semantics->precision; 2918 expAdjustment -= partsCount * integerPartWidth; 2919 2920 /* Adjust for the given exponent. */ 2921 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2922 if (!ExpOrErr) 2923 return ExpOrErr.takeError(); 2924 exponent = *ExpOrErr; 2925 } 2926 2927 return normalize(rounding_mode, lost_fraction); 2928 } 2929 2930 IEEEFloat::opStatus 2931 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2932 unsigned sigPartCount, int exp, 2933 roundingMode rounding_mode) { 2934 unsigned int parts, pow5PartCount; 2935 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2936 integerPart pow5Parts[maxPowerOfFiveParts]; 2937 bool isNearest; 2938 2939 isNearest = (rounding_mode == rmNearestTiesToEven || 2940 rounding_mode == rmNearestTiesToAway); 2941 2942 parts = partCountForBits(semantics->precision + 11); 2943 2944 /* Calculate pow(5, abs(exp)). */ 2945 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2946 2947 for (;; parts *= 2) { 2948 opStatus sigStatus, powStatus; 2949 unsigned int excessPrecision, truncatedBits; 2950 2951 calcSemantics.precision = parts * integerPartWidth - 1; 2952 excessPrecision = calcSemantics.precision - semantics->precision; 2953 truncatedBits = excessPrecision; 2954 2955 IEEEFloat decSig(calcSemantics, uninitialized); 2956 decSig.makeZero(sign); 2957 IEEEFloat pow5(calcSemantics); 2958 2959 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2960 rmNearestTiesToEven); 2961 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2962 rmNearestTiesToEven); 2963 /* Add exp, as 10^n = 5^n * 2^n. */ 2964 decSig.exponent += exp; 2965 2966 lostFraction calcLostFraction; 2967 integerPart HUerr, HUdistance; 2968 unsigned int powHUerr; 2969 2970 if (exp >= 0) { 2971 /* multiplySignificand leaves the precision-th bit set to 1. */ 2972 calcLostFraction = decSig.multiplySignificand(pow5); 2973 powHUerr = powStatus != opOK; 2974 } else { 2975 calcLostFraction = decSig.divideSignificand(pow5); 2976 /* Denormal numbers have less precision. */ 2977 if (decSig.exponent < semantics->minExponent) { 2978 excessPrecision += (semantics->minExponent - decSig.exponent); 2979 truncatedBits = excessPrecision; 2980 if (excessPrecision > calcSemantics.precision) 2981 excessPrecision = calcSemantics.precision; 2982 } 2983 /* Extra half-ulp lost in reciprocal of exponent. */ 2984 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2985 } 2986 2987 /* Both multiplySignificand and divideSignificand return the 2988 result with the integer bit set. */ 2989 assert(APInt::tcExtractBit 2990 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2991 2992 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2993 powHUerr); 2994 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2995 excessPrecision, isNearest); 2996 2997 /* Are we guaranteed to round correctly if we truncate? */ 2998 if (HUdistance >= HUerr) { 2999 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3000 calcSemantics.precision - excessPrecision, 3001 excessPrecision); 3002 /* Take the exponent of decSig. If we tcExtract-ed less bits 3003 above we must adjust our exponent to compensate for the 3004 implicit right shift. */ 3005 exponent = (decSig.exponent + semantics->precision 3006 - (calcSemantics.precision - excessPrecision)); 3007 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3008 decSig.partCount(), 3009 truncatedBits); 3010 return normalize(rounding_mode, calcLostFraction); 3011 } 3012 } 3013 } 3014 3015 Expected<IEEEFloat::opStatus> 3016 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3017 decimalInfo D; 3018 opStatus fs; 3019 3020 /* Scan the text. */ 3021 StringRef::iterator p = str.begin(); 3022 if (Error Err = interpretDecimal(p, str.end(), &D)) 3023 return std::move(Err); 3024 3025 /* Handle the quick cases. First the case of no significant digits, 3026 i.e. zero, and then exponents that are obviously too large or too 3027 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3028 definitely overflows if 3029 3030 (exp - 1) * L >= maxExponent 3031 3032 and definitely underflows to zero where 3033 3034 (exp + 1) * L <= minExponent - precision 3035 3036 With integer arithmetic the tightest bounds for L are 3037 3038 93/28 < L < 196/59 [ numerator <= 256 ] 3039 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3040 */ 3041 3042 // Test if we have a zero number allowing for strings with no null terminators 3043 // and zero decimals with non-zero exponents. 3044 // 3045 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3046 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3047 // be at most one dot. On the other hand, if we have a zero with a non-zero 3048 // exponent, then we know that D.firstSigDigit will be non-numeric. 3049 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3050 category = fcZero; 3051 fs = opOK; 3052 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3053 sign = false; 3054 3055 /* Check whether the normalized exponent is high enough to overflow 3056 max during the log-rebasing in the max-exponent check below. */ 3057 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3058 fs = handleOverflow(rounding_mode); 3059 3060 /* If it wasn't, then it also wasn't high enough to overflow max 3061 during the log-rebasing in the min-exponent check. Check that it 3062 won't overflow min in either check, then perform the min-exponent 3063 check. */ 3064 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3065 (D.normalizedExponent + 1) * 28738 <= 3066 8651 * (semantics->minExponent - (int) semantics->precision)) { 3067 /* Underflow to zero and round. */ 3068 category = fcNormal; 3069 zeroSignificand(); 3070 fs = normalize(rounding_mode, lfLessThanHalf); 3071 3072 /* We can finally safely perform the max-exponent check. */ 3073 } else if ((D.normalizedExponent - 1) * 42039 3074 >= 12655 * semantics->maxExponent) { 3075 /* Overflow and round. */ 3076 fs = handleOverflow(rounding_mode); 3077 } else { 3078 integerPart *decSignificand; 3079 unsigned int partCount; 3080 3081 /* A tight upper bound on number of bits required to hold an 3082 N-digit decimal integer is N * 196 / 59. Allocate enough space 3083 to hold the full significand, and an extra part required by 3084 tcMultiplyPart. */ 3085 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3086 partCount = partCountForBits(1 + 196 * partCount / 59); 3087 decSignificand = new integerPart[partCount + 1]; 3088 partCount = 0; 3089 3090 /* Convert to binary efficiently - we do almost all multiplication 3091 in an integerPart. When this would overflow do we do a single 3092 bignum multiplication, and then revert again to multiplication 3093 in an integerPart. */ 3094 do { 3095 integerPart decValue, val, multiplier; 3096 3097 val = 0; 3098 multiplier = 1; 3099 3100 do { 3101 if (*p == '.') { 3102 p++; 3103 if (p == str.end()) { 3104 break; 3105 } 3106 } 3107 decValue = decDigitValue(*p++); 3108 if (decValue >= 10U) { 3109 delete[] decSignificand; 3110 return createError("Invalid character in significand"); 3111 } 3112 multiplier *= 10; 3113 val = val * 10 + decValue; 3114 /* The maximum number that can be multiplied by ten with any 3115 digit added without overflowing an integerPart. */ 3116 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3117 3118 /* Multiply out the current part. */ 3119 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3120 partCount, partCount + 1, false); 3121 3122 /* If we used another part (likely but not guaranteed), increase 3123 the count. */ 3124 if (decSignificand[partCount]) 3125 partCount++; 3126 } while (p <= D.lastSigDigit); 3127 3128 category = fcNormal; 3129 fs = roundSignificandWithExponent(decSignificand, partCount, 3130 D.exponent, rounding_mode); 3131 3132 delete [] decSignificand; 3133 } 3134 3135 return fs; 3136 } 3137 3138 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3139 const size_t MIN_NAME_SIZE = 3; 3140 3141 if (str.size() < MIN_NAME_SIZE) 3142 return false; 3143 3144 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3145 makeInf(false); 3146 return true; 3147 } 3148 3149 bool IsNegative = str.front() == '-'; 3150 if (IsNegative) { 3151 str = str.drop_front(); 3152 if (str.size() < MIN_NAME_SIZE) 3153 return false; 3154 3155 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3156 makeInf(true); 3157 return true; 3158 } 3159 } 3160 3161 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3162 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3163 if (IsSignaling) { 3164 str = str.drop_front(); 3165 if (str.size() < MIN_NAME_SIZE) 3166 return false; 3167 } 3168 3169 if (str.starts_with("nan") || str.starts_with("NaN")) { 3170 str = str.drop_front(3); 3171 3172 // A NaN without payload. 3173 if (str.empty()) { 3174 makeNaN(IsSignaling, IsNegative); 3175 return true; 3176 } 3177 3178 // Allow the payload to be inside parentheses. 3179 if (str.front() == '(') { 3180 // Parentheses should be balanced (and not empty). 3181 if (str.size() <= 2 || str.back() != ')') 3182 return false; 3183 3184 str = str.slice(1, str.size() - 1); 3185 } 3186 3187 // Determine the payload number's radix. 3188 unsigned Radix = 10; 3189 if (str[0] == '0') { 3190 if (str.size() > 1 && tolower(str[1]) == 'x') { 3191 str = str.drop_front(2); 3192 Radix = 16; 3193 } else 3194 Radix = 8; 3195 } 3196 3197 // Parse the payload and make the NaN. 3198 APInt Payload; 3199 if (!str.getAsInteger(Radix, Payload)) { 3200 makeNaN(IsSignaling, IsNegative, &Payload); 3201 return true; 3202 } 3203 } 3204 3205 return false; 3206 } 3207 3208 Expected<IEEEFloat::opStatus> 3209 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3210 if (str.empty()) 3211 return createError("Invalid string length"); 3212 3213 // Handle special cases. 3214 if (convertFromStringSpecials(str)) 3215 return opOK; 3216 3217 /* Handle a leading minus sign. */ 3218 StringRef::iterator p = str.begin(); 3219 size_t slen = str.size(); 3220 sign = *p == '-' ? 1 : 0; 3221 if (*p == '-' || *p == '+') { 3222 p++; 3223 slen--; 3224 if (!slen) 3225 return createError("String has no digits"); 3226 } 3227 3228 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3229 if (slen == 2) 3230 return createError("Invalid string"); 3231 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3232 rounding_mode); 3233 } 3234 3235 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3236 } 3237 3238 /* Write out a hexadecimal representation of the floating point value 3239 to DST, which must be of sufficient size, in the C99 form 3240 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3241 excluding the terminating NUL. 3242 3243 If UPPERCASE, the output is in upper case, otherwise in lower case. 3244 3245 HEXDIGITS digits appear altogether, rounding the value if 3246 necessary. If HEXDIGITS is 0, the minimal precision to display the 3247 number precisely is used instead. If nothing would appear after 3248 the decimal point it is suppressed. 3249 3250 The decimal exponent is always printed and has at least one digit. 3251 Zero values display an exponent of zero. Infinities and NaNs 3252 appear as "infinity" or "nan" respectively. 3253 3254 The above rules are as specified by C99. There is ambiguity about 3255 what the leading hexadecimal digit should be. This implementation 3256 uses whatever is necessary so that the exponent is displayed as 3257 stored. This implies the exponent will fall within the IEEE format 3258 range, and the leading hexadecimal digit will be 0 (for denormals), 3259 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3260 any other digits zero). 3261 */ 3262 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3263 bool upperCase, 3264 roundingMode rounding_mode) const { 3265 char *p; 3266 3267 p = dst; 3268 if (sign) 3269 *dst++ = '-'; 3270 3271 switch (category) { 3272 case fcInfinity: 3273 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3274 dst += sizeof infinityL - 1; 3275 break; 3276 3277 case fcNaN: 3278 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3279 dst += sizeof NaNU - 1; 3280 break; 3281 3282 case fcZero: 3283 *dst++ = '0'; 3284 *dst++ = upperCase ? 'X': 'x'; 3285 *dst++ = '0'; 3286 if (hexDigits > 1) { 3287 *dst++ = '.'; 3288 memset (dst, '0', hexDigits - 1); 3289 dst += hexDigits - 1; 3290 } 3291 *dst++ = upperCase ? 'P': 'p'; 3292 *dst++ = '0'; 3293 break; 3294 3295 case fcNormal: 3296 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3297 break; 3298 } 3299 3300 *dst = 0; 3301 3302 return static_cast<unsigned int>(dst - p); 3303 } 3304 3305 /* Does the hard work of outputting the correctly rounded hexadecimal 3306 form of a normal floating point number with the specified number of 3307 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3308 digits necessary to print the value precisely is output. */ 3309 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3310 bool upperCase, 3311 roundingMode rounding_mode) const { 3312 unsigned int count, valueBits, shift, partsCount, outputDigits; 3313 const char *hexDigitChars; 3314 const integerPart *significand; 3315 char *p; 3316 bool roundUp; 3317 3318 *dst++ = '0'; 3319 *dst++ = upperCase ? 'X': 'x'; 3320 3321 roundUp = false; 3322 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3323 3324 significand = significandParts(); 3325 partsCount = partCount(); 3326 3327 /* +3 because the first digit only uses the single integer bit, so 3328 we have 3 virtual zero most-significant-bits. */ 3329 valueBits = semantics->precision + 3; 3330 shift = integerPartWidth - valueBits % integerPartWidth; 3331 3332 /* The natural number of digits required ignoring trailing 3333 insignificant zeroes. */ 3334 outputDigits = (valueBits - significandLSB () + 3) / 4; 3335 3336 /* hexDigits of zero means use the required number for the 3337 precision. Otherwise, see if we are truncating. If we are, 3338 find out if we need to round away from zero. */ 3339 if (hexDigits) { 3340 if (hexDigits < outputDigits) { 3341 /* We are dropping non-zero bits, so need to check how to round. 3342 "bits" is the number of dropped bits. */ 3343 unsigned int bits; 3344 lostFraction fraction; 3345 3346 bits = valueBits - hexDigits * 4; 3347 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3348 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3349 } 3350 outputDigits = hexDigits; 3351 } 3352 3353 /* Write the digits consecutively, and start writing in the location 3354 of the hexadecimal point. We move the most significant digit 3355 left and add the hexadecimal point later. */ 3356 p = ++dst; 3357 3358 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3359 3360 while (outputDigits && count) { 3361 integerPart part; 3362 3363 /* Put the most significant integerPartWidth bits in "part". */ 3364 if (--count == partsCount) 3365 part = 0; /* An imaginary higher zero part. */ 3366 else 3367 part = significand[count] << shift; 3368 3369 if (count && shift) 3370 part |= significand[count - 1] >> (integerPartWidth - shift); 3371 3372 /* Convert as much of "part" to hexdigits as we can. */ 3373 unsigned int curDigits = integerPartWidth / 4; 3374 3375 if (curDigits > outputDigits) 3376 curDigits = outputDigits; 3377 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3378 outputDigits -= curDigits; 3379 } 3380 3381 if (roundUp) { 3382 char *q = dst; 3383 3384 /* Note that hexDigitChars has a trailing '0'. */ 3385 do { 3386 q--; 3387 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3388 } while (*q == '0'); 3389 assert(q >= p); 3390 } else { 3391 /* Add trailing zeroes. */ 3392 memset (dst, '0', outputDigits); 3393 dst += outputDigits; 3394 } 3395 3396 /* Move the most significant digit to before the point, and if there 3397 is something after the decimal point add it. This must come 3398 after rounding above. */ 3399 p[-1] = p[0]; 3400 if (dst -1 == p) 3401 dst--; 3402 else 3403 p[0] = '.'; 3404 3405 /* Finally output the exponent. */ 3406 *dst++ = upperCase ? 'P': 'p'; 3407 3408 return writeSignedDecimal (dst, exponent); 3409 } 3410 3411 hash_code hash_value(const IEEEFloat &Arg) { 3412 if (!Arg.isFiniteNonZero()) 3413 return hash_combine((uint8_t)Arg.category, 3414 // NaN has no sign, fix it at zero. 3415 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3416 Arg.semantics->precision); 3417 3418 // Normal floats need their exponent and significand hashed. 3419 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3420 Arg.semantics->precision, Arg.exponent, 3421 hash_combine_range( 3422 Arg.significandParts(), 3423 Arg.significandParts() + Arg.partCount())); 3424 } 3425 3426 // Conversion from APFloat to/from host float/double. It may eventually be 3427 // possible to eliminate these and have everybody deal with APFloats, but that 3428 // will take a while. This approach will not easily extend to long double. 3429 // Current implementation requires integerPartWidth==64, which is correct at 3430 // the moment but could be made more general. 3431 3432 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3433 // the actual IEEE respresentations. We compensate for that here. 3434 3435 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3436 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3437 assert(partCount()==2); 3438 3439 uint64_t myexponent, mysignificand; 3440 3441 if (isFiniteNonZero()) { 3442 myexponent = exponent+16383; //bias 3443 mysignificand = significandParts()[0]; 3444 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3445 myexponent = 0; // denormal 3446 } else if (category==fcZero) { 3447 myexponent = 0; 3448 mysignificand = 0; 3449 } else if (category==fcInfinity) { 3450 myexponent = 0x7fff; 3451 mysignificand = 0x8000000000000000ULL; 3452 } else { 3453 assert(category == fcNaN && "Unknown category"); 3454 myexponent = 0x7fff; 3455 mysignificand = significandParts()[0]; 3456 } 3457 3458 uint64_t words[2]; 3459 words[0] = mysignificand; 3460 words[1] = ((uint64_t)(sign & 1) << 15) | 3461 (myexponent & 0x7fffLL); 3462 return APInt(80, words); 3463 } 3464 3465 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3466 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3467 assert(partCount()==2); 3468 3469 uint64_t words[2]; 3470 opStatus fs; 3471 bool losesInfo; 3472 3473 // Convert number to double. To avoid spurious underflows, we re- 3474 // normalize against the "double" minExponent first, and only *then* 3475 // truncate the mantissa. The result of that second conversion 3476 // may be inexact, but should never underflow. 3477 // Declare fltSemantics before APFloat that uses it (and 3478 // saves pointer to it) to ensure correct destruction order. 3479 fltSemantics extendedSemantics = *semantics; 3480 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3481 IEEEFloat extended(*this); 3482 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3483 assert(fs == opOK && !losesInfo); 3484 (void)fs; 3485 3486 IEEEFloat u(extended); 3487 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3488 assert(fs == opOK || fs == opInexact); 3489 (void)fs; 3490 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3491 3492 // If conversion was exact or resulted in a special case, we're done; 3493 // just set the second double to zero. Otherwise, re-convert back to 3494 // the extended format and compute the difference. This now should 3495 // convert exactly to double. 3496 if (u.isFiniteNonZero() && losesInfo) { 3497 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3498 assert(fs == opOK && !losesInfo); 3499 (void)fs; 3500 3501 IEEEFloat v(extended); 3502 v.subtract(u, rmNearestTiesToEven); 3503 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3504 assert(fs == opOK && !losesInfo); 3505 (void)fs; 3506 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3507 } else { 3508 words[1] = 0; 3509 } 3510 3511 return APInt(128, words); 3512 } 3513 3514 template <const fltSemantics &S> 3515 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3516 assert(semantics == &S); 3517 3518 constexpr int bias = -(S.minExponent - 1); 3519 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3520 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3521 constexpr integerPart integer_bit = 3522 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3523 constexpr uint64_t significand_mask = integer_bit - 1; 3524 constexpr unsigned int exponent_bits = 3525 S.sizeInBits - 1 - trailing_significand_bits; 3526 static_assert(exponent_bits < 64); 3527 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3528 3529 uint64_t myexponent; 3530 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3531 mysignificand; 3532 3533 if (isFiniteNonZero()) { 3534 myexponent = exponent + bias; 3535 std::copy_n(significandParts(), mysignificand.size(), 3536 mysignificand.begin()); 3537 if (myexponent == 1 && 3538 !(significandParts()[integer_bit_part] & integer_bit)) 3539 myexponent = 0; // denormal 3540 } else if (category == fcZero) { 3541 myexponent = ::exponentZero(S) + bias; 3542 mysignificand.fill(0); 3543 } else if (category == fcInfinity) { 3544 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3545 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3546 llvm_unreachable("semantics don't support inf!"); 3547 myexponent = ::exponentInf(S) + bias; 3548 mysignificand.fill(0); 3549 } else { 3550 assert(category == fcNaN && "Unknown category!"); 3551 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3552 llvm_unreachable("semantics don't support NaN!"); 3553 myexponent = ::exponentNaN(S) + bias; 3554 std::copy_n(significandParts(), mysignificand.size(), 3555 mysignificand.begin()); 3556 } 3557 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3558 auto words_iter = 3559 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3560 if constexpr (significand_mask != 0) { 3561 // Clear the integer bit. 3562 words[mysignificand.size() - 1] &= significand_mask; 3563 } 3564 std::fill(words_iter, words.end(), uint64_t{0}); 3565 constexpr size_t last_word = words.size() - 1; 3566 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3567 << ((S.sizeInBits - 1) % 64); 3568 words[last_word] |= shifted_sign; 3569 uint64_t shifted_exponent = (myexponent & exponent_mask) 3570 << (trailing_significand_bits % 64); 3571 words[last_word] |= shifted_exponent; 3572 if constexpr (last_word == 0) { 3573 return APInt(S.sizeInBits, words[0]); 3574 } 3575 return APInt(S.sizeInBits, words); 3576 } 3577 3578 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3579 assert(partCount() == 2); 3580 return convertIEEEFloatToAPInt<semIEEEquad>(); 3581 } 3582 3583 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3584 assert(partCount()==1); 3585 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3586 } 3587 3588 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3589 assert(partCount()==1); 3590 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3591 } 3592 3593 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3594 assert(partCount() == 1); 3595 return convertIEEEFloatToAPInt<semBFloat>(); 3596 } 3597 3598 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3599 assert(partCount()==1); 3600 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3601 } 3602 3603 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3604 assert(partCount() == 1); 3605 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3606 } 3607 3608 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3609 assert(partCount() == 1); 3610 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3611 } 3612 3613 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3614 assert(partCount() == 1); 3615 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3616 } 3617 3618 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3619 assert(partCount() == 1); 3620 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3621 } 3622 3623 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3624 assert(partCount() == 1); 3625 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3626 } 3627 3628 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3629 assert(partCount() == 1); 3630 return convertIEEEFloatToAPInt<semFloatTF32>(); 3631 } 3632 3633 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3634 assert(partCount() == 1); 3635 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3636 } 3637 3638 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3639 assert(partCount() == 1); 3640 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3641 } 3642 3643 // This function creates an APInt that is just a bit map of the floating 3644 // point constant as it would appear in memory. It is not a conversion, 3645 // and treating the result as a normal integer is unlikely to be useful. 3646 3647 APInt IEEEFloat::bitcastToAPInt() const { 3648 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3649 return convertHalfAPFloatToAPInt(); 3650 3651 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3652 return convertBFloatAPFloatToAPInt(); 3653 3654 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3655 return convertFloatAPFloatToAPInt(); 3656 3657 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3658 return convertDoubleAPFloatToAPInt(); 3659 3660 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3661 return convertQuadrupleAPFloatToAPInt(); 3662 3663 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3664 return convertPPCDoubleDoubleAPFloatToAPInt(); 3665 3666 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3667 return convertFloat8E5M2APFloatToAPInt(); 3668 3669 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3670 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3671 3672 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3673 return convertFloat8E4M3FNAPFloatToAPInt(); 3674 3675 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3676 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3677 3678 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3679 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3680 3681 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3682 return convertFloatTF32APFloatToAPInt(); 3683 3684 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3685 return convertFloat6E3M2FNAPFloatToAPInt(); 3686 3687 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3688 return convertFloat6E2M3FNAPFloatToAPInt(); 3689 3690 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3691 "unknown format!"); 3692 return convertF80LongDoubleAPFloatToAPInt(); 3693 } 3694 3695 float IEEEFloat::convertToFloat() const { 3696 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3697 "Float semantics are not IEEEsingle"); 3698 APInt api = bitcastToAPInt(); 3699 return api.bitsToFloat(); 3700 } 3701 3702 double IEEEFloat::convertToDouble() const { 3703 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3704 "Float semantics are not IEEEdouble"); 3705 APInt api = bitcastToAPInt(); 3706 return api.bitsToDouble(); 3707 } 3708 3709 #ifdef HAS_IEE754_FLOAT128 3710 float128 IEEEFloat::convertToQuad() const { 3711 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3712 "Float semantics are not IEEEquads"); 3713 APInt api = bitcastToAPInt(); 3714 return api.bitsToQuad(); 3715 } 3716 #endif 3717 3718 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3719 /// does not support these bit patterns: 3720 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3721 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3722 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3723 /// exponent = 0, integer bit 1 ("pseudodenormal") 3724 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3725 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3726 uint64_t i1 = api.getRawData()[0]; 3727 uint64_t i2 = api.getRawData()[1]; 3728 uint64_t myexponent = (i2 & 0x7fff); 3729 uint64_t mysignificand = i1; 3730 uint8_t myintegerbit = mysignificand >> 63; 3731 3732 initialize(&semX87DoubleExtended); 3733 assert(partCount()==2); 3734 3735 sign = static_cast<unsigned int>(i2>>15); 3736 if (myexponent == 0 && mysignificand == 0) { 3737 makeZero(sign); 3738 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3739 makeInf(sign); 3740 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3741 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3742 category = fcNaN; 3743 exponent = exponentNaN(); 3744 significandParts()[0] = mysignificand; 3745 significandParts()[1] = 0; 3746 } else { 3747 category = fcNormal; 3748 exponent = myexponent - 16383; 3749 significandParts()[0] = mysignificand; 3750 significandParts()[1] = 0; 3751 if (myexponent==0) // denormal 3752 exponent = -16382; 3753 } 3754 } 3755 3756 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3757 uint64_t i1 = api.getRawData()[0]; 3758 uint64_t i2 = api.getRawData()[1]; 3759 opStatus fs; 3760 bool losesInfo; 3761 3762 // Get the first double and convert to our format. 3763 initFromDoubleAPInt(APInt(64, i1)); 3764 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3765 assert(fs == opOK && !losesInfo); 3766 (void)fs; 3767 3768 // Unless we have a special case, add in second double. 3769 if (isFiniteNonZero()) { 3770 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3771 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3772 assert(fs == opOK && !losesInfo); 3773 (void)fs; 3774 3775 add(v, rmNearestTiesToEven); 3776 } 3777 } 3778 3779 template <const fltSemantics &S> 3780 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3781 assert(api.getBitWidth() == S.sizeInBits); 3782 constexpr integerPart integer_bit = integerPart{1} 3783 << ((S.precision - 1) % integerPartWidth); 3784 constexpr uint64_t significand_mask = integer_bit - 1; 3785 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3786 constexpr unsigned int stored_significand_parts = 3787 partCountForBits(trailing_significand_bits); 3788 constexpr unsigned int exponent_bits = 3789 S.sizeInBits - 1 - trailing_significand_bits; 3790 static_assert(exponent_bits < 64); 3791 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3792 constexpr int bias = -(S.minExponent - 1); 3793 3794 // Copy the bits of the significand. We need to clear out the exponent and 3795 // sign bit in the last word. 3796 std::array<integerPart, stored_significand_parts> mysignificand; 3797 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3798 if constexpr (significand_mask != 0) { 3799 mysignificand[mysignificand.size() - 1] &= significand_mask; 3800 } 3801 3802 // We assume the last word holds the sign bit, the exponent, and potentially 3803 // some of the trailing significand field. 3804 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3805 uint64_t myexponent = 3806 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3807 3808 initialize(&S); 3809 assert(partCount() == mysignificand.size()); 3810 3811 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3812 3813 bool all_zero_significand = 3814 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3815 3816 bool is_zero = myexponent == 0 && all_zero_significand; 3817 3818 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3819 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3820 makeInf(sign); 3821 return; 3822 } 3823 } 3824 3825 bool is_nan = false; 3826 3827 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3828 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3829 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3830 bool all_ones_significand = 3831 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3832 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3833 (!significand_mask || 3834 mysignificand[mysignificand.size() - 1] == significand_mask); 3835 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3836 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3837 is_nan = is_zero && sign; 3838 } 3839 3840 if (is_nan) { 3841 category = fcNaN; 3842 exponent = ::exponentNaN(S); 3843 std::copy_n(mysignificand.begin(), mysignificand.size(), 3844 significandParts()); 3845 return; 3846 } 3847 3848 if (is_zero) { 3849 makeZero(sign); 3850 return; 3851 } 3852 3853 category = fcNormal; 3854 exponent = myexponent - bias; 3855 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3856 if (myexponent == 0) // denormal 3857 exponent = S.minExponent; 3858 else 3859 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3860 } 3861 3862 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3863 initFromIEEEAPInt<semIEEEquad>(api); 3864 } 3865 3866 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3867 initFromIEEEAPInt<semIEEEdouble>(api); 3868 } 3869 3870 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3871 initFromIEEEAPInt<semIEEEsingle>(api); 3872 } 3873 3874 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3875 initFromIEEEAPInt<semBFloat>(api); 3876 } 3877 3878 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3879 initFromIEEEAPInt<semIEEEhalf>(api); 3880 } 3881 3882 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3883 initFromIEEEAPInt<semFloat8E5M2>(api); 3884 } 3885 3886 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3887 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3888 } 3889 3890 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3891 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3892 } 3893 3894 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3895 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3896 } 3897 3898 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3899 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3900 } 3901 3902 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3903 initFromIEEEAPInt<semFloatTF32>(api); 3904 } 3905 3906 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 3907 initFromIEEEAPInt<semFloat6E3M2FN>(api); 3908 } 3909 3910 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 3911 initFromIEEEAPInt<semFloat6E2M3FN>(api); 3912 } 3913 3914 /// Treat api as containing the bits of a floating point number. 3915 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3916 assert(api.getBitWidth() == Sem->sizeInBits); 3917 if (Sem == &semIEEEhalf) 3918 return initFromHalfAPInt(api); 3919 if (Sem == &semBFloat) 3920 return initFromBFloatAPInt(api); 3921 if (Sem == &semIEEEsingle) 3922 return initFromFloatAPInt(api); 3923 if (Sem == &semIEEEdouble) 3924 return initFromDoubleAPInt(api); 3925 if (Sem == &semX87DoubleExtended) 3926 return initFromF80LongDoubleAPInt(api); 3927 if (Sem == &semIEEEquad) 3928 return initFromQuadrupleAPInt(api); 3929 if (Sem == &semPPCDoubleDoubleLegacy) 3930 return initFromPPCDoubleDoubleAPInt(api); 3931 if (Sem == &semFloat8E5M2) 3932 return initFromFloat8E5M2APInt(api); 3933 if (Sem == &semFloat8E5M2FNUZ) 3934 return initFromFloat8E5M2FNUZAPInt(api); 3935 if (Sem == &semFloat8E4M3FN) 3936 return initFromFloat8E4M3FNAPInt(api); 3937 if (Sem == &semFloat8E4M3FNUZ) 3938 return initFromFloat8E4M3FNUZAPInt(api); 3939 if (Sem == &semFloat8E4M3B11FNUZ) 3940 return initFromFloat8E4M3B11FNUZAPInt(api); 3941 if (Sem == &semFloatTF32) 3942 return initFromFloatTF32APInt(api); 3943 if (Sem == &semFloat6E3M2FN) 3944 return initFromFloat6E3M2FNAPInt(api); 3945 if (Sem == &semFloat6E2M3FN) 3946 return initFromFloat6E2M3FNAPInt(api); 3947 3948 llvm_unreachable(nullptr); 3949 } 3950 3951 /// Make this number the largest magnitude normal number in the given 3952 /// semantics. 3953 void IEEEFloat::makeLargest(bool Negative) { 3954 // We want (in interchange format): 3955 // sign = {Negative} 3956 // exponent = 1..10 3957 // significand = 1..1 3958 category = fcNormal; 3959 sign = Negative; 3960 exponent = semantics->maxExponent; 3961 3962 // Use memset to set all but the highest integerPart to all ones. 3963 integerPart *significand = significandParts(); 3964 unsigned PartCount = partCount(); 3965 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3966 3967 // Set the high integerPart especially setting all unused top bits for 3968 // internal consistency. 3969 const unsigned NumUnusedHighBits = 3970 PartCount*integerPartWidth - semantics->precision; 3971 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3972 ? (~integerPart(0) >> NumUnusedHighBits) 3973 : 0; 3974 3975 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 3976 semantics->nanEncoding == fltNanEncoding::AllOnes) 3977 significand[0] &= ~integerPart(1); 3978 } 3979 3980 /// Make this number the smallest magnitude denormal number in the given 3981 /// semantics. 3982 void IEEEFloat::makeSmallest(bool Negative) { 3983 // We want (in interchange format): 3984 // sign = {Negative} 3985 // exponent = 0..0 3986 // significand = 0..01 3987 category = fcNormal; 3988 sign = Negative; 3989 exponent = semantics->minExponent; 3990 APInt::tcSet(significandParts(), 1, partCount()); 3991 } 3992 3993 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3994 // We want (in interchange format): 3995 // sign = {Negative} 3996 // exponent = 0..0 3997 // significand = 10..0 3998 3999 category = fcNormal; 4000 zeroSignificand(); 4001 sign = Negative; 4002 exponent = semantics->minExponent; 4003 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4004 } 4005 4006 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4007 initFromAPInt(&Sem, API); 4008 } 4009 4010 IEEEFloat::IEEEFloat(float f) { 4011 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4012 } 4013 4014 IEEEFloat::IEEEFloat(double d) { 4015 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4016 } 4017 4018 namespace { 4019 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4020 Buffer.append(Str.begin(), Str.end()); 4021 } 4022 4023 /// Removes data from the given significand until it is no more 4024 /// precise than is required for the desired precision. 4025 void AdjustToPrecision(APInt &significand, 4026 int &exp, unsigned FormatPrecision) { 4027 unsigned bits = significand.getActiveBits(); 4028 4029 // 196/59 is a very slight overestimate of lg_2(10). 4030 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4031 4032 if (bits <= bitsRequired) return; 4033 4034 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4035 if (!tensRemovable) return; 4036 4037 exp += tensRemovable; 4038 4039 APInt divisor(significand.getBitWidth(), 1); 4040 APInt powten(significand.getBitWidth(), 10); 4041 while (true) { 4042 if (tensRemovable & 1) 4043 divisor *= powten; 4044 tensRemovable >>= 1; 4045 if (!tensRemovable) break; 4046 powten *= powten; 4047 } 4048 4049 significand = significand.udiv(divisor); 4050 4051 // Truncate the significand down to its active bit count. 4052 significand = significand.trunc(significand.getActiveBits()); 4053 } 4054 4055 4056 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4057 int &exp, unsigned FormatPrecision) { 4058 unsigned N = buffer.size(); 4059 if (N <= FormatPrecision) return; 4060 4061 // The most significant figures are the last ones in the buffer. 4062 unsigned FirstSignificant = N - FormatPrecision; 4063 4064 // Round. 4065 // FIXME: this probably shouldn't use 'round half up'. 4066 4067 // Rounding down is just a truncation, except we also want to drop 4068 // trailing zeros from the new result. 4069 if (buffer[FirstSignificant - 1] < '5') { 4070 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4071 FirstSignificant++; 4072 4073 exp += FirstSignificant; 4074 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4075 return; 4076 } 4077 4078 // Rounding up requires a decimal add-with-carry. If we continue 4079 // the carry, the newly-introduced zeros will just be truncated. 4080 for (unsigned I = FirstSignificant; I != N; ++I) { 4081 if (buffer[I] == '9') { 4082 FirstSignificant++; 4083 } else { 4084 buffer[I]++; 4085 break; 4086 } 4087 } 4088 4089 // If we carried through, we have exactly one digit of precision. 4090 if (FirstSignificant == N) { 4091 exp += FirstSignificant; 4092 buffer.clear(); 4093 buffer.push_back('1'); 4094 return; 4095 } 4096 4097 exp += FirstSignificant; 4098 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4099 } 4100 } // namespace 4101 4102 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4103 unsigned FormatMaxPadding, bool TruncateZero) const { 4104 switch (category) { 4105 case fcInfinity: 4106 if (isNegative()) 4107 return append(Str, "-Inf"); 4108 else 4109 return append(Str, "+Inf"); 4110 4111 case fcNaN: return append(Str, "NaN"); 4112 4113 case fcZero: 4114 if (isNegative()) 4115 Str.push_back('-'); 4116 4117 if (!FormatMaxPadding) { 4118 if (TruncateZero) 4119 append(Str, "0.0E+0"); 4120 else { 4121 append(Str, "0.0"); 4122 if (FormatPrecision > 1) 4123 Str.append(FormatPrecision - 1, '0'); 4124 append(Str, "e+00"); 4125 } 4126 } else 4127 Str.push_back('0'); 4128 return; 4129 4130 case fcNormal: 4131 break; 4132 } 4133 4134 if (isNegative()) 4135 Str.push_back('-'); 4136 4137 // Decompose the number into an APInt and an exponent. 4138 int exp = exponent - ((int) semantics->precision - 1); 4139 APInt significand( 4140 semantics->precision, 4141 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4142 4143 // Set FormatPrecision if zero. We want to do this before we 4144 // truncate trailing zeros, as those are part of the precision. 4145 if (!FormatPrecision) { 4146 // We use enough digits so the number can be round-tripped back to an 4147 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4148 // Accurately" by Steele and White. 4149 // FIXME: Using a formula based purely on the precision is conservative; 4150 // we can print fewer digits depending on the actual value being printed. 4151 4152 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4153 FormatPrecision = 2 + semantics->precision * 59 / 196; 4154 } 4155 4156 // Ignore trailing binary zeros. 4157 int trailingZeros = significand.countr_zero(); 4158 exp += trailingZeros; 4159 significand.lshrInPlace(trailingZeros); 4160 4161 // Change the exponent from 2^e to 10^e. 4162 if (exp == 0) { 4163 // Nothing to do. 4164 } else if (exp > 0) { 4165 // Just shift left. 4166 significand = significand.zext(semantics->precision + exp); 4167 significand <<= exp; 4168 exp = 0; 4169 } else { /* exp < 0 */ 4170 int texp = -exp; 4171 4172 // We transform this using the identity: 4173 // (N)(2^-e) == (N)(5^e)(10^-e) 4174 // This means we have to multiply N (the significand) by 5^e. 4175 // To avoid overflow, we have to operate on numbers large 4176 // enough to store N * 5^e: 4177 // log2(N * 5^e) == log2(N) + e * log2(5) 4178 // <= semantics->precision + e * 137 / 59 4179 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4180 4181 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 4182 4183 // Multiply significand by 5^e. 4184 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4185 significand = significand.zext(precision); 4186 APInt five_to_the_i(precision, 5); 4187 while (true) { 4188 if (texp & 1) significand *= five_to_the_i; 4189 4190 texp >>= 1; 4191 if (!texp) break; 4192 five_to_the_i *= five_to_the_i; 4193 } 4194 } 4195 4196 AdjustToPrecision(significand, exp, FormatPrecision); 4197 4198 SmallVector<char, 256> buffer; 4199 4200 // Fill the buffer. 4201 unsigned precision = significand.getBitWidth(); 4202 if (precision < 4) { 4203 // We need enough precision to store the value 10. 4204 precision = 4; 4205 significand = significand.zext(precision); 4206 } 4207 APInt ten(precision, 10); 4208 APInt digit(precision, 0); 4209 4210 bool inTrail = true; 4211 while (significand != 0) { 4212 // digit <- significand % 10 4213 // significand <- significand / 10 4214 APInt::udivrem(significand, ten, significand, digit); 4215 4216 unsigned d = digit.getZExtValue(); 4217 4218 // Drop trailing zeros. 4219 if (inTrail && !d) exp++; 4220 else { 4221 buffer.push_back((char) ('0' + d)); 4222 inTrail = false; 4223 } 4224 } 4225 4226 assert(!buffer.empty() && "no characters in buffer!"); 4227 4228 // Drop down to FormatPrecision. 4229 // TODO: don't do more precise calculations above than are required. 4230 AdjustToPrecision(buffer, exp, FormatPrecision); 4231 4232 unsigned NDigits = buffer.size(); 4233 4234 // Check whether we should use scientific notation. 4235 bool FormatScientific; 4236 if (!FormatMaxPadding) 4237 FormatScientific = true; 4238 else { 4239 if (exp >= 0) { 4240 // 765e3 --> 765000 4241 // ^^^ 4242 // But we shouldn't make the number look more precise than it is. 4243 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4244 NDigits + (unsigned) exp > FormatPrecision); 4245 } else { 4246 // Power of the most significant digit. 4247 int MSD = exp + (int) (NDigits - 1); 4248 if (MSD >= 0) { 4249 // 765e-2 == 7.65 4250 FormatScientific = false; 4251 } else { 4252 // 765e-5 == 0.00765 4253 // ^ ^^ 4254 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4255 } 4256 } 4257 } 4258 4259 // Scientific formatting is pretty straightforward. 4260 if (FormatScientific) { 4261 exp += (NDigits - 1); 4262 4263 Str.push_back(buffer[NDigits-1]); 4264 Str.push_back('.'); 4265 if (NDigits == 1 && TruncateZero) 4266 Str.push_back('0'); 4267 else 4268 for (unsigned I = 1; I != NDigits; ++I) 4269 Str.push_back(buffer[NDigits-1-I]); 4270 // Fill with zeros up to FormatPrecision. 4271 if (!TruncateZero && FormatPrecision > NDigits - 1) 4272 Str.append(FormatPrecision - NDigits + 1, '0'); 4273 // For !TruncateZero we use lower 'e'. 4274 Str.push_back(TruncateZero ? 'E' : 'e'); 4275 4276 Str.push_back(exp >= 0 ? '+' : '-'); 4277 if (exp < 0) exp = -exp; 4278 SmallVector<char, 6> expbuf; 4279 do { 4280 expbuf.push_back((char) ('0' + (exp % 10))); 4281 exp /= 10; 4282 } while (exp); 4283 // Exponent always at least two digits if we do not truncate zeros. 4284 if (!TruncateZero && expbuf.size() < 2) 4285 expbuf.push_back('0'); 4286 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4287 Str.push_back(expbuf[E-1-I]); 4288 return; 4289 } 4290 4291 // Non-scientific, positive exponents. 4292 if (exp >= 0) { 4293 for (unsigned I = 0; I != NDigits; ++I) 4294 Str.push_back(buffer[NDigits-1-I]); 4295 for (unsigned I = 0; I != (unsigned) exp; ++I) 4296 Str.push_back('0'); 4297 return; 4298 } 4299 4300 // Non-scientific, negative exponents. 4301 4302 // The number of digits to the left of the decimal point. 4303 int NWholeDigits = exp + (int) NDigits; 4304 4305 unsigned I = 0; 4306 if (NWholeDigits > 0) { 4307 for (; I != (unsigned) NWholeDigits; ++I) 4308 Str.push_back(buffer[NDigits-I-1]); 4309 Str.push_back('.'); 4310 } else { 4311 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4312 4313 Str.push_back('0'); 4314 Str.push_back('.'); 4315 for (unsigned Z = 1; Z != NZeros; ++Z) 4316 Str.push_back('0'); 4317 } 4318 4319 for (; I != NDigits; ++I) 4320 Str.push_back(buffer[NDigits-I-1]); 4321 } 4322 4323 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4324 // Special floats and denormals have no exact inverse. 4325 if (!isFiniteNonZero()) 4326 return false; 4327 4328 // Check that the number is a power of two by making sure that only the 4329 // integer bit is set in the significand. 4330 if (significandLSB() != semantics->precision - 1) 4331 return false; 4332 4333 // Get the inverse. 4334 IEEEFloat reciprocal(*semantics, 1ULL); 4335 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4336 return false; 4337 4338 // Avoid multiplication with a denormal, it is not safe on all platforms and 4339 // may be slower than a normal division. 4340 if (reciprocal.isDenormal()) 4341 return false; 4342 4343 assert(reciprocal.isFiniteNonZero() && 4344 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4345 4346 if (inv) 4347 *inv = APFloat(reciprocal, *semantics); 4348 4349 return true; 4350 } 4351 4352 int IEEEFloat::getExactLog2Abs() const { 4353 if (!isFinite() || isZero()) 4354 return INT_MIN; 4355 4356 const integerPart *Parts = significandParts(); 4357 const int PartCount = partCountForBits(semantics->precision); 4358 4359 int PopCount = 0; 4360 for (int i = 0; i < PartCount; ++i) { 4361 PopCount += llvm::popcount(Parts[i]); 4362 if (PopCount > 1) 4363 return INT_MIN; 4364 } 4365 4366 if (exponent != semantics->minExponent) 4367 return exponent; 4368 4369 int CountrParts = 0; 4370 for (int i = 0; i < PartCount; 4371 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4372 if (Parts[i] != 0) { 4373 return exponent - semantics->precision + CountrParts + 4374 llvm::countr_zero(Parts[i]) + 1; 4375 } 4376 } 4377 4378 llvm_unreachable("didn't find the set bit"); 4379 } 4380 4381 bool IEEEFloat::isSignaling() const { 4382 if (!isNaN()) 4383 return false; 4384 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4385 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4386 return false; 4387 4388 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4389 // first bit of the trailing significand being 0. 4390 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4391 } 4392 4393 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4394 /// 4395 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4396 /// appropriate sign switching before/after the computation. 4397 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4398 // If we are performing nextDown, swap sign so we have -x. 4399 if (nextDown) 4400 changeSign(); 4401 4402 // Compute nextUp(x) 4403 opStatus result = opOK; 4404 4405 // Handle each float category separately. 4406 switch (category) { 4407 case fcInfinity: 4408 // nextUp(+inf) = +inf 4409 if (!isNegative()) 4410 break; 4411 // nextUp(-inf) = -getLargest() 4412 makeLargest(true); 4413 break; 4414 case fcNaN: 4415 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4416 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4417 // change the payload. 4418 if (isSignaling()) { 4419 result = opInvalidOp; 4420 // For consistency, propagate the sign of the sNaN to the qNaN. 4421 makeNaN(false, isNegative(), nullptr); 4422 } 4423 break; 4424 case fcZero: 4425 // nextUp(pm 0) = +getSmallest() 4426 makeSmallest(false); 4427 break; 4428 case fcNormal: 4429 // nextUp(-getSmallest()) = -0 4430 if (isSmallest() && isNegative()) { 4431 APInt::tcSet(significandParts(), 0, partCount()); 4432 category = fcZero; 4433 exponent = 0; 4434 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4435 sign = false; 4436 break; 4437 } 4438 4439 if (isLargest() && !isNegative()) { 4440 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4441 // nextUp(getLargest()) == NAN 4442 makeNaN(); 4443 break; 4444 } else if (semantics->nonFiniteBehavior == 4445 fltNonfiniteBehavior::FiniteOnly) { 4446 // nextUp(getLargest()) == getLargest() 4447 break; 4448 } else { 4449 // nextUp(getLargest()) == INFINITY 4450 APInt::tcSet(significandParts(), 0, partCount()); 4451 category = fcInfinity; 4452 exponent = semantics->maxExponent + 1; 4453 break; 4454 } 4455 } 4456 4457 // nextUp(normal) == normal + inc. 4458 if (isNegative()) { 4459 // If we are negative, we need to decrement the significand. 4460 4461 // We only cross a binade boundary that requires adjusting the exponent 4462 // if: 4463 // 1. exponent != semantics->minExponent. This implies we are not in the 4464 // smallest binade or are dealing with denormals. 4465 // 2. Our significand excluding the integral bit is all zeros. 4466 bool WillCrossBinadeBoundary = 4467 exponent != semantics->minExponent && isSignificandAllZeros(); 4468 4469 // Decrement the significand. 4470 // 4471 // We always do this since: 4472 // 1. If we are dealing with a non-binade decrement, by definition we 4473 // just decrement the significand. 4474 // 2. If we are dealing with a normal -> normal binade decrement, since 4475 // we have an explicit integral bit the fact that all bits but the 4476 // integral bit are zero implies that subtracting one will yield a 4477 // significand with 0 integral bit and 1 in all other spots. Thus we 4478 // must just adjust the exponent and set the integral bit to 1. 4479 // 3. If we are dealing with a normal -> denormal binade decrement, 4480 // since we set the integral bit to 0 when we represent denormals, we 4481 // just decrement the significand. 4482 integerPart *Parts = significandParts(); 4483 APInt::tcDecrement(Parts, partCount()); 4484 4485 if (WillCrossBinadeBoundary) { 4486 // Our result is a normal number. Do the following: 4487 // 1. Set the integral bit to 1. 4488 // 2. Decrement the exponent. 4489 APInt::tcSetBit(Parts, semantics->precision - 1); 4490 exponent--; 4491 } 4492 } else { 4493 // If we are positive, we need to increment the significand. 4494 4495 // We only cross a binade boundary that requires adjusting the exponent if 4496 // the input is not a denormal and all of said input's significand bits 4497 // are set. If all of said conditions are true: clear the significand, set 4498 // the integral bit to 1, and increment the exponent. If we have a 4499 // denormal always increment since moving denormals and the numbers in the 4500 // smallest normal binade have the same exponent in our representation. 4501 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4502 4503 if (WillCrossBinadeBoundary) { 4504 integerPart *Parts = significandParts(); 4505 APInt::tcSet(Parts, 0, partCount()); 4506 APInt::tcSetBit(Parts, semantics->precision - 1); 4507 assert(exponent != semantics->maxExponent && 4508 "We can not increment an exponent beyond the maxExponent allowed" 4509 " by the given floating point semantics."); 4510 exponent++; 4511 } else { 4512 incrementSignificand(); 4513 } 4514 } 4515 break; 4516 } 4517 4518 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4519 if (nextDown) 4520 changeSign(); 4521 4522 return result; 4523 } 4524 4525 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4526 return ::exponentNaN(*semantics); 4527 } 4528 4529 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4530 return ::exponentInf(*semantics); 4531 } 4532 4533 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4534 return ::exponentZero(*semantics); 4535 } 4536 4537 void IEEEFloat::makeInf(bool Negative) { 4538 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4539 llvm_unreachable("This floating point format does not support Inf"); 4540 4541 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4542 // There is no Inf, so make NaN instead. 4543 makeNaN(false, Negative); 4544 return; 4545 } 4546 category = fcInfinity; 4547 sign = Negative; 4548 exponent = exponentInf(); 4549 APInt::tcSet(significandParts(), 0, partCount()); 4550 } 4551 4552 void IEEEFloat::makeZero(bool Negative) { 4553 category = fcZero; 4554 sign = Negative; 4555 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4556 // Merge negative zero to positive because 0b10000...000 is used for NaN 4557 sign = false; 4558 } 4559 exponent = exponentZero(); 4560 APInt::tcSet(significandParts(), 0, partCount()); 4561 } 4562 4563 void IEEEFloat::makeQuiet() { 4564 assert(isNaN()); 4565 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4566 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4567 } 4568 4569 int ilogb(const IEEEFloat &Arg) { 4570 if (Arg.isNaN()) 4571 return IEEEFloat::IEK_NaN; 4572 if (Arg.isZero()) 4573 return IEEEFloat::IEK_Zero; 4574 if (Arg.isInfinity()) 4575 return IEEEFloat::IEK_Inf; 4576 if (!Arg.isDenormal()) 4577 return Arg.exponent; 4578 4579 IEEEFloat Normalized(Arg); 4580 int SignificandBits = Arg.getSemantics().precision - 1; 4581 4582 Normalized.exponent += SignificandBits; 4583 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4584 return Normalized.exponent - SignificandBits; 4585 } 4586 4587 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4588 auto MaxExp = X.getSemantics().maxExponent; 4589 auto MinExp = X.getSemantics().minExponent; 4590 4591 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4592 // overflow; clamp it to a safe range before adding, but ensure that the range 4593 // is large enough that the clamp does not change the result. The range we 4594 // need to support is the difference between the largest possible exponent and 4595 // the normalized exponent of half the smallest denormal. 4596 4597 int SignificandBits = X.getSemantics().precision - 1; 4598 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4599 4600 // Clamp to one past the range ends to let normalize handle overlflow. 4601 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4602 X.normalize(RoundingMode, lfExactlyZero); 4603 if (X.isNaN()) 4604 X.makeQuiet(); 4605 return X; 4606 } 4607 4608 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4609 Exp = ilogb(Val); 4610 4611 // Quiet signalling nans. 4612 if (Exp == IEEEFloat::IEK_NaN) { 4613 IEEEFloat Quiet(Val); 4614 Quiet.makeQuiet(); 4615 return Quiet; 4616 } 4617 4618 if (Exp == IEEEFloat::IEK_Inf) 4619 return Val; 4620 4621 // 1 is added because frexp is defined to return a normalized fraction in 4622 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4623 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4624 return scalbn(Val, -Exp, RM); 4625 } 4626 4627 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4628 : Semantics(&S), 4629 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4630 assert(Semantics == &semPPCDoubleDouble); 4631 } 4632 4633 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4634 : Semantics(&S), 4635 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4636 APFloat(semIEEEdouble, uninitialized)}) { 4637 assert(Semantics == &semPPCDoubleDouble); 4638 } 4639 4640 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4641 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4642 APFloat(semIEEEdouble)}) { 4643 assert(Semantics == &semPPCDoubleDouble); 4644 } 4645 4646 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4647 : Semantics(&S), 4648 Floats(new APFloat[2]{ 4649 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4650 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4651 assert(Semantics == &semPPCDoubleDouble); 4652 } 4653 4654 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4655 APFloat &&Second) 4656 : Semantics(&S), 4657 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4658 assert(Semantics == &semPPCDoubleDouble); 4659 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4660 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4661 } 4662 4663 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4664 : Semantics(RHS.Semantics), 4665 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4666 APFloat(RHS.Floats[1])} 4667 : nullptr) { 4668 assert(Semantics == &semPPCDoubleDouble); 4669 } 4670 4671 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4672 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4673 RHS.Semantics = &semBogus; 4674 assert(Semantics == &semPPCDoubleDouble); 4675 } 4676 4677 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4678 if (Semantics == RHS.Semantics && RHS.Floats) { 4679 Floats[0] = RHS.Floats[0]; 4680 Floats[1] = RHS.Floats[1]; 4681 } else if (this != &RHS) { 4682 this->~DoubleAPFloat(); 4683 new (this) DoubleAPFloat(RHS); 4684 } 4685 return *this; 4686 } 4687 4688 // Implement addition, subtraction, multiplication and division based on: 4689 // "Software for Doubled-Precision Floating-Point Computations", 4690 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4691 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4692 const APFloat &c, const APFloat &cc, 4693 roundingMode RM) { 4694 int Status = opOK; 4695 APFloat z = a; 4696 Status |= z.add(c, RM); 4697 if (!z.isFinite()) { 4698 if (!z.isInfinity()) { 4699 Floats[0] = std::move(z); 4700 Floats[1].makeZero(/* Neg = */ false); 4701 return (opStatus)Status; 4702 } 4703 Status = opOK; 4704 auto AComparedToC = a.compareAbsoluteValue(c); 4705 z = cc; 4706 Status |= z.add(aa, RM); 4707 if (AComparedToC == APFloat::cmpGreaterThan) { 4708 // z = cc + aa + c + a; 4709 Status |= z.add(c, RM); 4710 Status |= z.add(a, RM); 4711 } else { 4712 // z = cc + aa + a + c; 4713 Status |= z.add(a, RM); 4714 Status |= z.add(c, RM); 4715 } 4716 if (!z.isFinite()) { 4717 Floats[0] = std::move(z); 4718 Floats[1].makeZero(/* Neg = */ false); 4719 return (opStatus)Status; 4720 } 4721 Floats[0] = z; 4722 APFloat zz = aa; 4723 Status |= zz.add(cc, RM); 4724 if (AComparedToC == APFloat::cmpGreaterThan) { 4725 // Floats[1] = a - z + c + zz; 4726 Floats[1] = a; 4727 Status |= Floats[1].subtract(z, RM); 4728 Status |= Floats[1].add(c, RM); 4729 Status |= Floats[1].add(zz, RM); 4730 } else { 4731 // Floats[1] = c - z + a + zz; 4732 Floats[1] = c; 4733 Status |= Floats[1].subtract(z, RM); 4734 Status |= Floats[1].add(a, RM); 4735 Status |= Floats[1].add(zz, RM); 4736 } 4737 } else { 4738 // q = a - z; 4739 APFloat q = a; 4740 Status |= q.subtract(z, RM); 4741 4742 // zz = q + c + (a - (q + z)) + aa + cc; 4743 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4744 auto zz = q; 4745 Status |= zz.add(c, RM); 4746 Status |= q.add(z, RM); 4747 Status |= q.subtract(a, RM); 4748 q.changeSign(); 4749 Status |= zz.add(q, RM); 4750 Status |= zz.add(aa, RM); 4751 Status |= zz.add(cc, RM); 4752 if (zz.isZero() && !zz.isNegative()) { 4753 Floats[0] = std::move(z); 4754 Floats[1].makeZero(/* Neg = */ false); 4755 return opOK; 4756 } 4757 Floats[0] = z; 4758 Status |= Floats[0].add(zz, RM); 4759 if (!Floats[0].isFinite()) { 4760 Floats[1].makeZero(/* Neg = */ false); 4761 return (opStatus)Status; 4762 } 4763 Floats[1] = std::move(z); 4764 Status |= Floats[1].subtract(Floats[0], RM); 4765 Status |= Floats[1].add(zz, RM); 4766 } 4767 return (opStatus)Status; 4768 } 4769 4770 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4771 const DoubleAPFloat &RHS, 4772 DoubleAPFloat &Out, 4773 roundingMode RM) { 4774 if (LHS.getCategory() == fcNaN) { 4775 Out = LHS; 4776 return opOK; 4777 } 4778 if (RHS.getCategory() == fcNaN) { 4779 Out = RHS; 4780 return opOK; 4781 } 4782 if (LHS.getCategory() == fcZero) { 4783 Out = RHS; 4784 return opOK; 4785 } 4786 if (RHS.getCategory() == fcZero) { 4787 Out = LHS; 4788 return opOK; 4789 } 4790 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4791 LHS.isNegative() != RHS.isNegative()) { 4792 Out.makeNaN(false, Out.isNegative(), nullptr); 4793 return opInvalidOp; 4794 } 4795 if (LHS.getCategory() == fcInfinity) { 4796 Out = LHS; 4797 return opOK; 4798 } 4799 if (RHS.getCategory() == fcInfinity) { 4800 Out = RHS; 4801 return opOK; 4802 } 4803 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4804 4805 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4806 CC(RHS.Floats[1]); 4807 assert(&A.getSemantics() == &semIEEEdouble); 4808 assert(&AA.getSemantics() == &semIEEEdouble); 4809 assert(&C.getSemantics() == &semIEEEdouble); 4810 assert(&CC.getSemantics() == &semIEEEdouble); 4811 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4812 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4813 return Out.addImpl(A, AA, C, CC, RM); 4814 } 4815 4816 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4817 roundingMode RM) { 4818 return addWithSpecial(*this, RHS, *this, RM); 4819 } 4820 4821 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4822 roundingMode RM) { 4823 changeSign(); 4824 auto Ret = add(RHS, RM); 4825 changeSign(); 4826 return Ret; 4827 } 4828 4829 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4830 APFloat::roundingMode RM) { 4831 const auto &LHS = *this; 4832 auto &Out = *this; 4833 /* Interesting observation: For special categories, finding the lowest 4834 common ancestor of the following layered graph gives the correct 4835 return category: 4836 4837 NaN 4838 / \ 4839 Zero Inf 4840 \ / 4841 Normal 4842 4843 e.g. NaN * NaN = NaN 4844 Zero * Inf = NaN 4845 Normal * Zero = Zero 4846 Normal * Inf = Inf 4847 */ 4848 if (LHS.getCategory() == fcNaN) { 4849 Out = LHS; 4850 return opOK; 4851 } 4852 if (RHS.getCategory() == fcNaN) { 4853 Out = RHS; 4854 return opOK; 4855 } 4856 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4857 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4858 Out.makeNaN(false, false, nullptr); 4859 return opOK; 4860 } 4861 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4862 Out = LHS; 4863 return opOK; 4864 } 4865 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4866 Out = RHS; 4867 return opOK; 4868 } 4869 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4870 "Special cases not handled exhaustively"); 4871 4872 int Status = opOK; 4873 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4874 // t = a * c 4875 APFloat T = A; 4876 Status |= T.multiply(C, RM); 4877 if (!T.isFiniteNonZero()) { 4878 Floats[0] = T; 4879 Floats[1].makeZero(/* Neg = */ false); 4880 return (opStatus)Status; 4881 } 4882 4883 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4884 APFloat Tau = A; 4885 T.changeSign(); 4886 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4887 T.changeSign(); 4888 { 4889 // v = a * d 4890 APFloat V = A; 4891 Status |= V.multiply(D, RM); 4892 // w = b * c 4893 APFloat W = B; 4894 Status |= W.multiply(C, RM); 4895 Status |= V.add(W, RM); 4896 // tau += v + w 4897 Status |= Tau.add(V, RM); 4898 } 4899 // u = t + tau 4900 APFloat U = T; 4901 Status |= U.add(Tau, RM); 4902 4903 Floats[0] = U; 4904 if (!U.isFinite()) { 4905 Floats[1].makeZero(/* Neg = */ false); 4906 } else { 4907 // Floats[1] = (t - u) + tau 4908 Status |= T.subtract(U, RM); 4909 Status |= T.add(Tau, RM); 4910 Floats[1] = T; 4911 } 4912 return (opStatus)Status; 4913 } 4914 4915 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4916 APFloat::roundingMode RM) { 4917 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4918 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4919 auto Ret = 4920 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4921 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4922 return Ret; 4923 } 4924 4925 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4926 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4927 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4928 auto Ret = 4929 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4930 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4931 return Ret; 4932 } 4933 4934 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4935 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4936 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4937 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4938 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4939 return Ret; 4940 } 4941 4942 APFloat::opStatus 4943 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4944 const DoubleAPFloat &Addend, 4945 APFloat::roundingMode RM) { 4946 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4947 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4948 auto Ret = Tmp.fusedMultiplyAdd( 4949 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4950 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4951 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4952 return Ret; 4953 } 4954 4955 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4956 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4957 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4958 auto Ret = Tmp.roundToIntegral(RM); 4959 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4960 return Ret; 4961 } 4962 4963 void DoubleAPFloat::changeSign() { 4964 Floats[0].changeSign(); 4965 Floats[1].changeSign(); 4966 } 4967 4968 APFloat::cmpResult 4969 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4970 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4971 if (Result != cmpEqual) 4972 return Result; 4973 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4974 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4975 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4976 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4977 if (Against && !RHSAgainst) 4978 return cmpLessThan; 4979 if (!Against && RHSAgainst) 4980 return cmpGreaterThan; 4981 if (!Against && !RHSAgainst) 4982 return Result; 4983 if (Against && RHSAgainst) 4984 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4985 } 4986 return Result; 4987 } 4988 4989 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4990 return Floats[0].getCategory(); 4991 } 4992 4993 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4994 4995 void DoubleAPFloat::makeInf(bool Neg) { 4996 Floats[0].makeInf(Neg); 4997 Floats[1].makeZero(/* Neg = */ false); 4998 } 4999 5000 void DoubleAPFloat::makeZero(bool Neg) { 5001 Floats[0].makeZero(Neg); 5002 Floats[1].makeZero(/* Neg = */ false); 5003 } 5004 5005 void DoubleAPFloat::makeLargest(bool Neg) { 5006 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5007 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5008 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5009 if (Neg) 5010 changeSign(); 5011 } 5012 5013 void DoubleAPFloat::makeSmallest(bool Neg) { 5014 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5015 Floats[0].makeSmallest(Neg); 5016 Floats[1].makeZero(/* Neg = */ false); 5017 } 5018 5019 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5020 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5021 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5022 if (Neg) 5023 Floats[0].changeSign(); 5024 Floats[1].makeZero(/* Neg = */ false); 5025 } 5026 5027 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5028 Floats[0].makeNaN(SNaN, Neg, fill); 5029 Floats[1].makeZero(/* Neg = */ false); 5030 } 5031 5032 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5033 auto Result = Floats[0].compare(RHS.Floats[0]); 5034 // |Float[0]| > |Float[1]| 5035 if (Result == APFloat::cmpEqual) 5036 return Floats[1].compare(RHS.Floats[1]); 5037 return Result; 5038 } 5039 5040 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5041 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5042 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5043 } 5044 5045 hash_code hash_value(const DoubleAPFloat &Arg) { 5046 if (Arg.Floats) 5047 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5048 return hash_combine(Arg.Semantics); 5049 } 5050 5051 APInt DoubleAPFloat::bitcastToAPInt() const { 5052 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5053 uint64_t Data[] = { 5054 Floats[0].bitcastToAPInt().getRawData()[0], 5055 Floats[1].bitcastToAPInt().getRawData()[0], 5056 }; 5057 return APInt(128, 2, Data); 5058 } 5059 5060 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5061 roundingMode RM) { 5062 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5063 APFloat Tmp(semPPCDoubleDoubleLegacy); 5064 auto Ret = Tmp.convertFromString(S, RM); 5065 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5066 return Ret; 5067 } 5068 5069 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5070 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5071 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5072 auto Ret = Tmp.next(nextDown); 5073 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5074 return Ret; 5075 } 5076 5077 APFloat::opStatus 5078 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5079 unsigned int Width, bool IsSigned, 5080 roundingMode RM, bool *IsExact) const { 5081 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5082 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5083 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5084 } 5085 5086 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5087 bool IsSigned, 5088 roundingMode RM) { 5089 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5090 APFloat Tmp(semPPCDoubleDoubleLegacy); 5091 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5092 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5093 return Ret; 5094 } 5095 5096 APFloat::opStatus 5097 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5098 unsigned int InputSize, 5099 bool IsSigned, roundingMode RM) { 5100 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5101 APFloat Tmp(semPPCDoubleDoubleLegacy); 5102 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5103 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5104 return Ret; 5105 } 5106 5107 APFloat::opStatus 5108 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5109 unsigned int InputSize, 5110 bool IsSigned, roundingMode RM) { 5111 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5112 APFloat Tmp(semPPCDoubleDoubleLegacy); 5113 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5114 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5115 return Ret; 5116 } 5117 5118 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5119 unsigned int HexDigits, 5120 bool UpperCase, 5121 roundingMode RM) const { 5122 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5123 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5124 .convertToHexString(DST, HexDigits, UpperCase, RM); 5125 } 5126 5127 bool DoubleAPFloat::isDenormal() const { 5128 return getCategory() == fcNormal && 5129 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5130 // (double)(Hi + Lo) == Hi defines a normal number. 5131 Floats[0] != Floats[0] + Floats[1]); 5132 } 5133 5134 bool DoubleAPFloat::isSmallest() const { 5135 if (getCategory() != fcNormal) 5136 return false; 5137 DoubleAPFloat Tmp(*this); 5138 Tmp.makeSmallest(this->isNegative()); 5139 return Tmp.compare(*this) == cmpEqual; 5140 } 5141 5142 bool DoubleAPFloat::isSmallestNormalized() const { 5143 if (getCategory() != fcNormal) 5144 return false; 5145 5146 DoubleAPFloat Tmp(*this); 5147 Tmp.makeSmallestNormalized(this->isNegative()); 5148 return Tmp.compare(*this) == cmpEqual; 5149 } 5150 5151 bool DoubleAPFloat::isLargest() const { 5152 if (getCategory() != fcNormal) 5153 return false; 5154 DoubleAPFloat Tmp(*this); 5155 Tmp.makeLargest(this->isNegative()); 5156 return Tmp.compare(*this) == cmpEqual; 5157 } 5158 5159 bool DoubleAPFloat::isInteger() const { 5160 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5161 return Floats[0].isInteger() && Floats[1].isInteger(); 5162 } 5163 5164 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5165 unsigned FormatPrecision, 5166 unsigned FormatMaxPadding, 5167 bool TruncateZero) const { 5168 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5169 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5170 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5171 } 5172 5173 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5174 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5175 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5176 if (!inv) 5177 return Tmp.getExactInverse(nullptr); 5178 APFloat Inv(semPPCDoubleDoubleLegacy); 5179 auto Ret = Tmp.getExactInverse(&Inv); 5180 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5181 return Ret; 5182 } 5183 5184 int DoubleAPFloat::getExactLog2() const { 5185 // TODO: Implement me 5186 return INT_MIN; 5187 } 5188 5189 int DoubleAPFloat::getExactLog2Abs() const { 5190 // TODO: Implement me 5191 return INT_MIN; 5192 } 5193 5194 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5195 APFloat::roundingMode RM) { 5196 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5197 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5198 scalbn(Arg.Floats[1], Exp, RM)); 5199 } 5200 5201 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5202 APFloat::roundingMode RM) { 5203 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5204 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5205 APFloat Second = Arg.Floats[1]; 5206 if (Arg.getCategory() == APFloat::fcNormal) 5207 Second = scalbn(Second, -Exp, RM); 5208 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5209 } 5210 5211 } // namespace detail 5212 5213 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5214 if (usesLayout<IEEEFloat>(Semantics)) { 5215 new (&IEEE) IEEEFloat(std::move(F)); 5216 return; 5217 } 5218 if (usesLayout<DoubleAPFloat>(Semantics)) { 5219 const fltSemantics& S = F.getSemantics(); 5220 new (&Double) 5221 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5222 APFloat(semIEEEdouble)); 5223 return; 5224 } 5225 llvm_unreachable("Unexpected semantics"); 5226 } 5227 5228 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5229 roundingMode RM) { 5230 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5231 } 5232 5233 hash_code hash_value(const APFloat &Arg) { 5234 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5235 return hash_value(Arg.U.IEEE); 5236 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5237 return hash_value(Arg.U.Double); 5238 llvm_unreachable("Unexpected semantics"); 5239 } 5240 5241 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5242 : APFloat(Semantics) { 5243 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5244 assert(StatusOrErr && "Invalid floating point representation"); 5245 consumeError(StatusOrErr.takeError()); 5246 } 5247 5248 FPClassTest APFloat::classify() const { 5249 if (isZero()) 5250 return isNegative() ? fcNegZero : fcPosZero; 5251 if (isNormal()) 5252 return isNegative() ? fcNegNormal : fcPosNormal; 5253 if (isDenormal()) 5254 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5255 if (isInfinity()) 5256 return isNegative() ? fcNegInf : fcPosInf; 5257 assert(isNaN() && "Other class of FP constant"); 5258 return isSignaling() ? fcSNan : fcQNan; 5259 } 5260 5261 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5262 roundingMode RM, bool *losesInfo) { 5263 if (&getSemantics() == &ToSemantics) { 5264 *losesInfo = false; 5265 return opOK; 5266 } 5267 if (usesLayout<IEEEFloat>(getSemantics()) && 5268 usesLayout<IEEEFloat>(ToSemantics)) 5269 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5270 if (usesLayout<IEEEFloat>(getSemantics()) && 5271 usesLayout<DoubleAPFloat>(ToSemantics)) { 5272 assert(&ToSemantics == &semPPCDoubleDouble); 5273 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5274 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5275 return Ret; 5276 } 5277 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5278 usesLayout<IEEEFloat>(ToSemantics)) { 5279 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5280 *this = APFloat(std::move(getIEEE()), ToSemantics); 5281 return Ret; 5282 } 5283 llvm_unreachable("Unexpected semantics"); 5284 } 5285 5286 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5287 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5288 } 5289 5290 void APFloat::print(raw_ostream &OS) const { 5291 SmallVector<char, 16> Buffer; 5292 toString(Buffer); 5293 OS << Buffer << "\n"; 5294 } 5295 5296 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5297 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5298 #endif 5299 5300 void APFloat::Profile(FoldingSetNodeID &NID) const { 5301 NID.Add(bitcastToAPInt()); 5302 } 5303 5304 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5305 an APSInt, whose initial bit-width and signed-ness are used to determine the 5306 precision of the conversion. 5307 */ 5308 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5309 roundingMode rounding_mode, 5310 bool *isExact) const { 5311 unsigned bitWidth = result.getBitWidth(); 5312 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5313 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5314 rounding_mode, isExact); 5315 // Keeps the original signed-ness. 5316 result = APInt(bitWidth, parts); 5317 return status; 5318 } 5319 5320 double APFloat::convertToDouble() const { 5321 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5322 return getIEEE().convertToDouble(); 5323 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5324 "Float semantics is not representable by IEEEdouble"); 5325 APFloat Temp = *this; 5326 bool LosesInfo; 5327 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5328 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5329 (void)St; 5330 return Temp.getIEEE().convertToDouble(); 5331 } 5332 5333 #ifdef HAS_IEE754_FLOAT128 5334 float128 APFloat::convertToQuad() const { 5335 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5336 return getIEEE().convertToQuad(); 5337 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5338 "Float semantics is not representable by IEEEquad"); 5339 APFloat Temp = *this; 5340 bool LosesInfo; 5341 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5342 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5343 (void)St; 5344 return Temp.getIEEE().convertToQuad(); 5345 } 5346 #endif 5347 5348 float APFloat::convertToFloat() const { 5349 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5350 return getIEEE().convertToFloat(); 5351 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5352 "Float semantics is not representable by IEEEsingle"); 5353 APFloat Temp = *this; 5354 bool LosesInfo; 5355 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5356 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5357 (void)St; 5358 return Temp.getIEEE().convertToFloat(); 5359 } 5360 5361 } // namespace llvm 5362 5363 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5364