1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 }; 72 73 // How NaN values are represented. This is curently only used in combination 74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 75 // while having IEEE non-finite behavior is liable to lead to unexpected 76 // results. 77 enum class fltNanEncoding { 78 // Represents the standard IEEE behavior where a value is NaN if its 79 // exponent is all 1s and the significand is non-zero. 80 IEEE, 81 82 // Represents the behavior in the Float8E4M3 floating point type where NaN is 83 // represented by having the exponent and mantissa set to all 1s. 84 // This behavior matches the FP8 E4M3 type described in 85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 86 // as non-signalling, although the paper does not state whether the NaN 87 // values are signalling or not. 88 AllOnes, 89 90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 93 // there is only one NaN value, it is treated as quiet NaN. This matches the 94 // behavior described in https://arxiv.org/abs/2206.02915 . 95 NegativeZero, 96 }; 97 98 /* Represents floating point arithmetic semantics. */ 99 struct fltSemantics { 100 /* The largest E such that 2^E is representable; this matches the 101 definition of IEEE 754. */ 102 APFloatBase::ExponentType maxExponent; 103 104 /* The smallest E such that 2^E is a normalized number; this 105 matches the definition of IEEE 754. */ 106 APFloatBase::ExponentType minExponent; 107 108 /* Number of bits in the significand. This includes the integer 109 bit. */ 110 unsigned int precision; 111 112 /* Number of bits actually used in the semantics. */ 113 unsigned int sizeInBits; 114 115 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 116 117 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 118 // Returns true if any number described by this semantics can be precisely 119 // represented by the specified semantics. Does not take into account 120 // the value of fltNonfiniteBehavior. 121 bool isRepresentableBy(const fltSemantics &S) const { 122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 123 precision <= S.precision; 124 } 125 }; 126 127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 133 static constexpr fltSemantics semFloat8E5M2FNUZ = { 134 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 135 static constexpr fltSemantics semFloat8E4M3FN = { 136 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 137 static constexpr fltSemantics semFloat8E4M3FNUZ = { 138 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 140 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 143 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 144 145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 149 to each other, and two 11-bit exponents. 150 151 Note: we need to make the value different from semBogus as otherwise 152 an unsafe optimization may collapse both values to a single address, 153 and we heavily rely on them having distinct addresses. */ 154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 155 156 /* These are legacy semantics for the fallback, inaccrurate implementation of 157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 158 operation. It's equivalent to having an IEEE number with consecutive 106 159 bits of mantissa and 11 bits of exponent. 160 161 It's not equivalent to IBM double-double. For example, a legit IBM 162 double-double, 1 + epsilon: 163 164 1 + epsilon = 1 + (1 >> 1076) 165 166 is not representable by a consecutive 106 bits of mantissa. 167 168 Currently, these semantics are used in the following way: 169 170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 172 semPPCDoubleDoubleLegacy -> IEEE operations 173 174 We use bitcastToAPInt() to get the bit representation (in APInt) of the 175 underlying IEEEdouble, then use the APInt constructor to construct the 176 legacy IEEE float. 177 178 TODO: Implement all operations in semPPCDoubleDouble, and delete these 179 semantics. */ 180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 181 53 + 53, 128}; 182 183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 184 switch (S) { 185 case S_IEEEhalf: 186 return IEEEhalf(); 187 case S_BFloat: 188 return BFloat(); 189 case S_IEEEsingle: 190 return IEEEsingle(); 191 case S_IEEEdouble: 192 return IEEEdouble(); 193 case S_IEEEquad: 194 return IEEEquad(); 195 case S_PPCDoubleDouble: 196 return PPCDoubleDouble(); 197 case S_Float8E5M2: 198 return Float8E5M2(); 199 case S_Float8E5M2FNUZ: 200 return Float8E5M2FNUZ(); 201 case S_Float8E4M3FN: 202 return Float8E4M3FN(); 203 case S_Float8E4M3FNUZ: 204 return Float8E4M3FNUZ(); 205 case S_Float8E4M3B11FNUZ: 206 return Float8E4M3B11FNUZ(); 207 case S_FloatTF32: 208 return FloatTF32(); 209 case S_x87DoubleExtended: 210 return x87DoubleExtended(); 211 } 212 llvm_unreachable("Unrecognised floating semantics"); 213 } 214 215 APFloatBase::Semantics 216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 217 if (&Sem == &llvm::APFloat::IEEEhalf()) 218 return S_IEEEhalf; 219 else if (&Sem == &llvm::APFloat::BFloat()) 220 return S_BFloat; 221 else if (&Sem == &llvm::APFloat::IEEEsingle()) 222 return S_IEEEsingle; 223 else if (&Sem == &llvm::APFloat::IEEEdouble()) 224 return S_IEEEdouble; 225 else if (&Sem == &llvm::APFloat::IEEEquad()) 226 return S_IEEEquad; 227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 228 return S_PPCDoubleDouble; 229 else if (&Sem == &llvm::APFloat::Float8E5M2()) 230 return S_Float8E5M2; 231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 232 return S_Float8E5M2FNUZ; 233 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 234 return S_Float8E4M3FN; 235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 236 return S_Float8E4M3FNUZ; 237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 238 return S_Float8E4M3B11FNUZ; 239 else if (&Sem == &llvm::APFloat::FloatTF32()) 240 return S_FloatTF32; 241 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 242 return S_x87DoubleExtended; 243 else 244 llvm_unreachable("Unknown floating semantics"); 245 } 246 247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 252 const fltSemantics &APFloatBase::PPCDoubleDouble() { 253 return semPPCDoubleDouble; 254 } 255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 260 return semFloat8E4M3B11FNUZ; 261 } 262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 263 const fltSemantics &APFloatBase::x87DoubleExtended() { 264 return semX87DoubleExtended; 265 } 266 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 267 268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 269 constexpr RoundingMode APFloatBase::rmTowardPositive; 270 constexpr RoundingMode APFloatBase::rmTowardNegative; 271 constexpr RoundingMode APFloatBase::rmTowardZero; 272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 273 274 /* A tight upper bound on number of parts required to hold the value 275 pow(5, power) is 276 277 power * 815 / (351 * integerPartWidth) + 1 278 279 However, whilst the result may require only this many parts, 280 because we are multiplying two values to get it, the 281 multiplication may require an extra part with the excess part 282 being zero (consider the trivial case of 1 * 1, tcFullMultiply 283 requires two parts to hold the single-part result). So we add an 284 extra one to guarantee enough space whilst multiplying. */ 285 const unsigned int maxExponent = 16383; 286 const unsigned int maxPrecision = 113; 287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 288 const unsigned int maxPowerOfFiveParts = 289 2 + 290 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 291 292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 293 return semantics.precision; 294 } 295 APFloatBase::ExponentType 296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 297 return semantics.maxExponent; 298 } 299 APFloatBase::ExponentType 300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 301 return semantics.minExponent; 302 } 303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 304 return semantics.sizeInBits; 305 } 306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 307 bool isSigned) { 308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 309 // at least one more bit than the MaxExponent to hold the max FP value. 310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 311 // Extra sign bit needed. 312 if (isSigned) 313 ++MinBitWidth; 314 return MinBitWidth; 315 } 316 317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 318 const fltSemantics &Dst) { 319 // Exponent range must be larger. 320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 321 return false; 322 323 // If the mantissa is long enough, the result value could still be denormal 324 // with a larger exponent range. 325 // 326 // FIXME: This condition is probably not accurate but also shouldn't be a 327 // practical concern with existing types. 328 return Dst.precision >= Src.precision; 329 } 330 331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 332 return Sem.sizeInBits; 333 } 334 335 static constexpr APFloatBase::ExponentType 336 exponentZero(const fltSemantics &semantics) { 337 return semantics.minExponent - 1; 338 } 339 340 static constexpr APFloatBase::ExponentType 341 exponentInf(const fltSemantics &semantics) { 342 return semantics.maxExponent + 1; 343 } 344 345 static constexpr APFloatBase::ExponentType 346 exponentNaN(const fltSemantics &semantics) { 347 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 348 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 349 return exponentZero(semantics); 350 return semantics.maxExponent; 351 } 352 return semantics.maxExponent + 1; 353 } 354 355 /* A bunch of private, handy routines. */ 356 357 static inline Error createError(const Twine &Err) { 358 return make_error<StringError>(Err, inconvertibleErrorCode()); 359 } 360 361 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 362 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 363 } 364 365 /* Returns 0U-9U. Return values >= 10U are not digits. */ 366 static inline unsigned int 367 decDigitValue(unsigned int c) 368 { 369 return c - '0'; 370 } 371 372 /* Return the value of a decimal exponent of the form 373 [+-]ddddddd. 374 375 If the exponent overflows, returns a large exponent with the 376 appropriate sign. */ 377 static Expected<int> readExponent(StringRef::iterator begin, 378 StringRef::iterator end) { 379 bool isNegative; 380 unsigned int absExponent; 381 const unsigned int overlargeExponent = 24000; /* FIXME. */ 382 StringRef::iterator p = begin; 383 384 // Treat no exponent as 0 to match binutils 385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 386 return 0; 387 } 388 389 isNegative = (*p == '-'); 390 if (*p == '-' || *p == '+') { 391 p++; 392 if (p == end) 393 return createError("Exponent has no digits"); 394 } 395 396 absExponent = decDigitValue(*p++); 397 if (absExponent >= 10U) 398 return createError("Invalid character in exponent"); 399 400 for (; p != end; ++p) { 401 unsigned int value; 402 403 value = decDigitValue(*p); 404 if (value >= 10U) 405 return createError("Invalid character in exponent"); 406 407 absExponent = absExponent * 10U + value; 408 if (absExponent >= overlargeExponent) { 409 absExponent = overlargeExponent; 410 break; 411 } 412 } 413 414 if (isNegative) 415 return -(int) absExponent; 416 else 417 return (int) absExponent; 418 } 419 420 /* This is ugly and needs cleaning up, but I don't immediately see 421 how whilst remaining safe. */ 422 static Expected<int> totalExponent(StringRef::iterator p, 423 StringRef::iterator end, 424 int exponentAdjustment) { 425 int unsignedExponent; 426 bool negative, overflow; 427 int exponent = 0; 428 429 if (p == end) 430 return createError("Exponent has no digits"); 431 432 negative = *p == '-'; 433 if (*p == '-' || *p == '+') { 434 p++; 435 if (p == end) 436 return createError("Exponent has no digits"); 437 } 438 439 unsignedExponent = 0; 440 overflow = false; 441 for (; p != end; ++p) { 442 unsigned int value; 443 444 value = decDigitValue(*p); 445 if (value >= 10U) 446 return createError("Invalid character in exponent"); 447 448 unsignedExponent = unsignedExponent * 10 + value; 449 if (unsignedExponent > 32767) { 450 overflow = true; 451 break; 452 } 453 } 454 455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 456 overflow = true; 457 458 if (!overflow) { 459 exponent = unsignedExponent; 460 if (negative) 461 exponent = -exponent; 462 exponent += exponentAdjustment; 463 if (exponent > 32767 || exponent < -32768) 464 overflow = true; 465 } 466 467 if (overflow) 468 exponent = negative ? -32768: 32767; 469 470 return exponent; 471 } 472 473 static Expected<StringRef::iterator> 474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 475 StringRef::iterator *dot) { 476 StringRef::iterator p = begin; 477 *dot = end; 478 while (p != end && *p == '0') 479 p++; 480 481 if (p != end && *p == '.') { 482 *dot = p++; 483 484 if (end - begin == 1) 485 return createError("Significand has no digits"); 486 487 while (p != end && *p == '0') 488 p++; 489 } 490 491 return p; 492 } 493 494 /* Given a normal decimal floating point number of the form 495 496 dddd.dddd[eE][+-]ddd 497 498 where the decimal point and exponent are optional, fill out the 499 structure D. Exponent is appropriate if the significand is 500 treated as an integer, and normalizedExponent if the significand 501 is taken to have the decimal point after a single leading 502 non-zero digit. 503 504 If the value is zero, V->firstSigDigit points to a non-digit, and 505 the return exponent is zero. 506 */ 507 struct decimalInfo { 508 const char *firstSigDigit; 509 const char *lastSigDigit; 510 int exponent; 511 int normalizedExponent; 512 }; 513 514 static Error interpretDecimal(StringRef::iterator begin, 515 StringRef::iterator end, decimalInfo *D) { 516 StringRef::iterator dot = end; 517 518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 519 if (!PtrOrErr) 520 return PtrOrErr.takeError(); 521 StringRef::iterator p = *PtrOrErr; 522 523 D->firstSigDigit = p; 524 D->exponent = 0; 525 D->normalizedExponent = 0; 526 527 for (; p != end; ++p) { 528 if (*p == '.') { 529 if (dot != end) 530 return createError("String contains multiple dots"); 531 dot = p++; 532 if (p == end) 533 break; 534 } 535 if (decDigitValue(*p) >= 10U) 536 break; 537 } 538 539 if (p != end) { 540 if (*p != 'e' && *p != 'E') 541 return createError("Invalid character in significand"); 542 if (p == begin) 543 return createError("Significand has no digits"); 544 if (dot != end && p - begin == 1) 545 return createError("Significand has no digits"); 546 547 /* p points to the first non-digit in the string */ 548 auto ExpOrErr = readExponent(p + 1, end); 549 if (!ExpOrErr) 550 return ExpOrErr.takeError(); 551 D->exponent = *ExpOrErr; 552 553 /* Implied decimal point? */ 554 if (dot == end) 555 dot = p; 556 } 557 558 /* If number is all zeroes accept any exponent. */ 559 if (p != D->firstSigDigit) { 560 /* Drop insignificant trailing zeroes. */ 561 if (p != begin) { 562 do 563 do 564 p--; 565 while (p != begin && *p == '0'); 566 while (p != begin && *p == '.'); 567 } 568 569 /* Adjust the exponents for any decimal point. */ 570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 571 D->normalizedExponent = (D->exponent + 572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 573 - (dot > D->firstSigDigit && dot < p))); 574 } 575 576 D->lastSigDigit = p; 577 return Error::success(); 578 } 579 580 /* Return the trailing fraction of a hexadecimal number. 581 DIGITVALUE is the first hex digit of the fraction, P points to 582 the next digit. */ 583 static Expected<lostFraction> 584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 585 unsigned int digitValue) { 586 unsigned int hexDigit; 587 588 /* If the first trailing digit isn't 0 or 8 we can work out the 589 fraction immediately. */ 590 if (digitValue > 8) 591 return lfMoreThanHalf; 592 else if (digitValue < 8 && digitValue > 0) 593 return lfLessThanHalf; 594 595 // Otherwise we need to find the first non-zero digit. 596 while (p != end && (*p == '0' || *p == '.')) 597 p++; 598 599 if (p == end) 600 return createError("Invalid trailing hexadecimal fraction!"); 601 602 hexDigit = hexDigitValue(*p); 603 604 /* If we ran off the end it is exactly zero or one-half, otherwise 605 a little more. */ 606 if (hexDigit == UINT_MAX) 607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 608 else 609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 610 } 611 612 /* Return the fraction lost were a bignum truncated losing the least 613 significant BITS bits. */ 614 static lostFraction 615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 616 unsigned int partCount, 617 unsigned int bits) 618 { 619 unsigned int lsb; 620 621 lsb = APInt::tcLSB(parts, partCount); 622 623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 624 if (bits <= lsb) 625 return lfExactlyZero; 626 if (bits == lsb + 1) 627 return lfExactlyHalf; 628 if (bits <= partCount * APFloatBase::integerPartWidth && 629 APInt::tcExtractBit(parts, bits - 1)) 630 return lfMoreThanHalf; 631 632 return lfLessThanHalf; 633 } 634 635 /* Shift DST right BITS bits noting lost fraction. */ 636 static lostFraction 637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 638 { 639 lostFraction lost_fraction; 640 641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 642 643 APInt::tcShiftRight(dst, parts, bits); 644 645 return lost_fraction; 646 } 647 648 /* Combine the effect of two lost fractions. */ 649 static lostFraction 650 combineLostFractions(lostFraction moreSignificant, 651 lostFraction lessSignificant) 652 { 653 if (lessSignificant != lfExactlyZero) { 654 if (moreSignificant == lfExactlyZero) 655 moreSignificant = lfLessThanHalf; 656 else if (moreSignificant == lfExactlyHalf) 657 moreSignificant = lfMoreThanHalf; 658 } 659 660 return moreSignificant; 661 } 662 663 /* The error from the true value, in half-ulps, on multiplying two 664 floating point numbers, which differ from the value they 665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 666 than the returned value. 667 668 See "How to Read Floating Point Numbers Accurately" by William D 669 Clinger. */ 670 static unsigned int 671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 672 { 673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 674 675 if (HUerr1 + HUerr2 == 0) 676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 677 else 678 return inexactMultiply + 2 * (HUerr1 + HUerr2); 679 } 680 681 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 682 when the least significant BITS are truncated. BITS cannot be 683 zero. */ 684 static APFloatBase::integerPart 685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 686 bool isNearest) { 687 unsigned int count, partBits; 688 APFloatBase::integerPart part, boundary; 689 690 assert(bits != 0); 691 692 bits--; 693 count = bits / APFloatBase::integerPartWidth; 694 partBits = bits % APFloatBase::integerPartWidth + 1; 695 696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 697 698 if (isNearest) 699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 700 else 701 boundary = 0; 702 703 if (count == 0) { 704 if (part - boundary <= boundary - part) 705 return part - boundary; 706 else 707 return boundary - part; 708 } 709 710 if (part == boundary) { 711 while (--count) 712 if (parts[count]) 713 return ~(APFloatBase::integerPart) 0; /* A lot. */ 714 715 return parts[0]; 716 } else if (part == boundary - 1) { 717 while (--count) 718 if (~parts[count]) 719 return ~(APFloatBase::integerPart) 0; /* A lot. */ 720 721 return -parts[0]; 722 } 723 724 return ~(APFloatBase::integerPart) 0; /* A lot. */ 725 } 726 727 /* Place pow(5, power) in DST, and return the number of parts used. 728 DST must be at least one part larger than size of the answer. */ 729 static unsigned int 730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 732 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 733 pow5s[0] = 78125 * 5; 734 735 unsigned int partsCount = 1; 736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 737 unsigned int result; 738 assert(power <= maxExponent); 739 740 p1 = dst; 741 p2 = scratch; 742 743 *p1 = firstEightPowers[power & 7]; 744 power >>= 3; 745 746 result = 1; 747 pow5 = pow5s; 748 749 for (unsigned int n = 0; power; power >>= 1, n++) { 750 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 751 if (n != 0) { 752 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 753 partsCount, partsCount); 754 partsCount *= 2; 755 if (pow5[partsCount - 1] == 0) 756 partsCount--; 757 } 758 759 if (power & 1) { 760 APFloatBase::integerPart *tmp; 761 762 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 763 result += partsCount; 764 if (p2[result - 1] == 0) 765 result--; 766 767 /* Now result is in p1 with partsCount parts and p2 is scratch 768 space. */ 769 tmp = p1; 770 p1 = p2; 771 p2 = tmp; 772 } 773 774 pow5 += partsCount; 775 } 776 777 if (p1 != dst) 778 APInt::tcAssign(dst, p1, result); 779 780 return result; 781 } 782 783 /* Zero at the end to avoid modular arithmetic when adding one; used 784 when rounding up during hexadecimal output. */ 785 static const char hexDigitsLower[] = "0123456789abcdef0"; 786 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 787 static const char infinityL[] = "infinity"; 788 static const char infinityU[] = "INFINITY"; 789 static const char NaNL[] = "nan"; 790 static const char NaNU[] = "NAN"; 791 792 /* Write out an integerPart in hexadecimal, starting with the most 793 significant nibble. Write out exactly COUNT hexdigits, return 794 COUNT. */ 795 static unsigned int 796 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 797 const char *hexDigitChars) 798 { 799 unsigned int result = count; 800 801 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 802 803 part >>= (APFloatBase::integerPartWidth - 4 * count); 804 while (count--) { 805 dst[count] = hexDigitChars[part & 0xf]; 806 part >>= 4; 807 } 808 809 return result; 810 } 811 812 /* Write out an unsigned decimal integer. */ 813 static char * 814 writeUnsignedDecimal (char *dst, unsigned int n) 815 { 816 char buff[40], *p; 817 818 p = buff; 819 do 820 *p++ = '0' + n % 10; 821 while (n /= 10); 822 823 do 824 *dst++ = *--p; 825 while (p != buff); 826 827 return dst; 828 } 829 830 /* Write out a signed decimal integer. */ 831 static char * 832 writeSignedDecimal (char *dst, int value) 833 { 834 if (value < 0) { 835 *dst++ = '-'; 836 dst = writeUnsignedDecimal(dst, -(unsigned) value); 837 } else 838 dst = writeUnsignedDecimal(dst, value); 839 840 return dst; 841 } 842 843 namespace detail { 844 /* Constructors. */ 845 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 846 unsigned int count; 847 848 semantics = ourSemantics; 849 count = partCount(); 850 if (count > 1) 851 significand.parts = new integerPart[count]; 852 } 853 854 void IEEEFloat::freeSignificand() { 855 if (needsCleanup()) 856 delete [] significand.parts; 857 } 858 859 void IEEEFloat::assign(const IEEEFloat &rhs) { 860 assert(semantics == rhs.semantics); 861 862 sign = rhs.sign; 863 category = rhs.category; 864 exponent = rhs.exponent; 865 if (isFiniteNonZero() || category == fcNaN) 866 copySignificand(rhs); 867 } 868 869 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 870 assert(isFiniteNonZero() || category == fcNaN); 871 assert(rhs.partCount() >= partCount()); 872 873 APInt::tcAssign(significandParts(), rhs.significandParts(), 874 partCount()); 875 } 876 877 /* Make this number a NaN, with an arbitrary but deterministic value 878 for the significand. If double or longer, this is a signalling NaN, 879 which may not be ideal. If float, this is QNaN(0). */ 880 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 881 category = fcNaN; 882 sign = Negative; 883 exponent = exponentNaN(); 884 885 integerPart *significand = significandParts(); 886 unsigned numParts = partCount(); 887 888 APInt fill_storage; 889 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 890 // Finite-only types do not distinguish signalling and quiet NaN, so 891 // make them all signalling. 892 SNaN = false; 893 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 894 sign = true; 895 fill_storage = APInt::getZero(semantics->precision - 1); 896 } else { 897 fill_storage = APInt::getAllOnes(semantics->precision - 1); 898 } 899 fill = &fill_storage; 900 } 901 902 // Set the significand bits to the fill. 903 if (!fill || fill->getNumWords() < numParts) 904 APInt::tcSet(significand, 0, numParts); 905 if (fill) { 906 APInt::tcAssign(significand, fill->getRawData(), 907 std::min(fill->getNumWords(), numParts)); 908 909 // Zero out the excess bits of the significand. 910 unsigned bitsToPreserve = semantics->precision - 1; 911 unsigned part = bitsToPreserve / 64; 912 bitsToPreserve %= 64; 913 significand[part] &= ((1ULL << bitsToPreserve) - 1); 914 for (part++; part != numParts; ++part) 915 significand[part] = 0; 916 } 917 918 unsigned QNaNBit = semantics->precision - 2; 919 920 if (SNaN) { 921 // We always have to clear the QNaN bit to make it an SNaN. 922 APInt::tcClearBit(significand, QNaNBit); 923 924 // If there are no bits set in the payload, we have to set 925 // *something* to make it a NaN instead of an infinity; 926 // conventionally, this is the next bit down from the QNaN bit. 927 if (APInt::tcIsZero(significand, numParts)) 928 APInt::tcSetBit(significand, QNaNBit - 1); 929 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 930 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 931 // Do nothing. 932 } else { 933 // We always have to set the QNaN bit to make it a QNaN. 934 APInt::tcSetBit(significand, QNaNBit); 935 } 936 937 // For x87 extended precision, we want to make a NaN, not a 938 // pseudo-NaN. Maybe we should expose the ability to make 939 // pseudo-NaNs? 940 if (semantics == &semX87DoubleExtended) 941 APInt::tcSetBit(significand, QNaNBit + 1); 942 } 943 944 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 945 if (this != &rhs) { 946 if (semantics != rhs.semantics) { 947 freeSignificand(); 948 initialize(rhs.semantics); 949 } 950 assign(rhs); 951 } 952 953 return *this; 954 } 955 956 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 957 freeSignificand(); 958 959 semantics = rhs.semantics; 960 significand = rhs.significand; 961 exponent = rhs.exponent; 962 category = rhs.category; 963 sign = rhs.sign; 964 965 rhs.semantics = &semBogus; 966 return *this; 967 } 968 969 bool IEEEFloat::isDenormal() const { 970 return isFiniteNonZero() && (exponent == semantics->minExponent) && 971 (APInt::tcExtractBit(significandParts(), 972 semantics->precision - 1) == 0); 973 } 974 975 bool IEEEFloat::isSmallest() const { 976 // The smallest number by magnitude in our format will be the smallest 977 // denormal, i.e. the floating point number with exponent being minimum 978 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 979 return isFiniteNonZero() && exponent == semantics->minExponent && 980 significandMSB() == 0; 981 } 982 983 bool IEEEFloat::isSmallestNormalized() const { 984 return getCategory() == fcNormal && exponent == semantics->minExponent && 985 isSignificandAllZerosExceptMSB(); 986 } 987 988 bool IEEEFloat::isSignificandAllOnes() const { 989 // Test if the significand excluding the integral bit is all ones. This allows 990 // us to test for binade boundaries. 991 const integerPart *Parts = significandParts(); 992 const unsigned PartCount = partCountForBits(semantics->precision); 993 for (unsigned i = 0; i < PartCount - 1; i++) 994 if (~Parts[i]) 995 return false; 996 997 // Set the unused high bits to all ones when we compare. 998 const unsigned NumHighBits = 999 PartCount*integerPartWidth - semantics->precision + 1; 1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1001 "Can not have more high bits to fill than integerPartWidth"); 1002 const integerPart HighBitFill = 1003 ~integerPart(0) << (integerPartWidth - NumHighBits); 1004 if (~(Parts[PartCount - 1] | HighBitFill)) 1005 return false; 1006 1007 return true; 1008 } 1009 1010 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1011 // Test if the significand excluding the integral bit is all ones except for 1012 // the least significant bit. 1013 const integerPart *Parts = significandParts(); 1014 1015 if (Parts[0] & 1) 1016 return false; 1017 1018 const unsigned PartCount = partCountForBits(semantics->precision); 1019 for (unsigned i = 0; i < PartCount - 1; i++) { 1020 if (~Parts[i] & ~unsigned{!i}) 1021 return false; 1022 } 1023 1024 // Set the unused high bits to all ones when we compare. 1025 const unsigned NumHighBits = 1026 PartCount * integerPartWidth - semantics->precision + 1; 1027 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1028 "Can not have more high bits to fill than integerPartWidth"); 1029 const integerPart HighBitFill = ~integerPart(0) 1030 << (integerPartWidth - NumHighBits); 1031 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1032 return false; 1033 1034 return true; 1035 } 1036 1037 bool IEEEFloat::isSignificandAllZeros() const { 1038 // Test if the significand excluding the integral bit is all zeros. This 1039 // allows us to test for binade boundaries. 1040 const integerPart *Parts = significandParts(); 1041 const unsigned PartCount = partCountForBits(semantics->precision); 1042 1043 for (unsigned i = 0; i < PartCount - 1; i++) 1044 if (Parts[i]) 1045 return false; 1046 1047 // Compute how many bits are used in the final word. 1048 const unsigned NumHighBits = 1049 PartCount*integerPartWidth - semantics->precision + 1; 1050 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1051 "clear than integerPartWidth"); 1052 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1053 1054 if (Parts[PartCount - 1] & HighBitMask) 1055 return false; 1056 1057 return true; 1058 } 1059 1060 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1061 const integerPart *Parts = significandParts(); 1062 const unsigned PartCount = partCountForBits(semantics->precision); 1063 1064 for (unsigned i = 0; i < PartCount - 1; i++) { 1065 if (Parts[i]) 1066 return false; 1067 } 1068 1069 const unsigned NumHighBits = 1070 PartCount * integerPartWidth - semantics->precision + 1; 1071 return Parts[PartCount - 1] == integerPart(1) 1072 << (integerPartWidth - NumHighBits); 1073 } 1074 1075 bool IEEEFloat::isLargest() const { 1076 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1077 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1078 // The largest number by magnitude in our format will be the floating point 1079 // number with maximum exponent and with significand that is all ones except 1080 // the LSB. 1081 return isFiniteNonZero() && exponent == semantics->maxExponent && 1082 isSignificandAllOnesExceptLSB(); 1083 } else { 1084 // The largest number by magnitude in our format will be the floating point 1085 // number with maximum exponent and with significand that is all ones. 1086 return isFiniteNonZero() && exponent == semantics->maxExponent && 1087 isSignificandAllOnes(); 1088 } 1089 } 1090 1091 bool IEEEFloat::isInteger() const { 1092 // This could be made more efficient; I'm going for obviously correct. 1093 if (!isFinite()) return false; 1094 IEEEFloat truncated = *this; 1095 truncated.roundToIntegral(rmTowardZero); 1096 return compare(truncated) == cmpEqual; 1097 } 1098 1099 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1100 if (this == &rhs) 1101 return true; 1102 if (semantics != rhs.semantics || 1103 category != rhs.category || 1104 sign != rhs.sign) 1105 return false; 1106 if (category==fcZero || category==fcInfinity) 1107 return true; 1108 1109 if (isFiniteNonZero() && exponent != rhs.exponent) 1110 return false; 1111 1112 return std::equal(significandParts(), significandParts() + partCount(), 1113 rhs.significandParts()); 1114 } 1115 1116 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1117 initialize(&ourSemantics); 1118 sign = 0; 1119 category = fcNormal; 1120 zeroSignificand(); 1121 exponent = ourSemantics.precision - 1; 1122 significandParts()[0] = value; 1123 normalize(rmNearestTiesToEven, lfExactlyZero); 1124 } 1125 1126 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1127 initialize(&ourSemantics); 1128 makeZero(false); 1129 } 1130 1131 // Delegate to the previous constructor, because later copy constructor may 1132 // actually inspects category, which can't be garbage. 1133 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1134 : IEEEFloat(ourSemantics) {} 1135 1136 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1137 initialize(rhs.semantics); 1138 assign(rhs); 1139 } 1140 1141 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1142 *this = std::move(rhs); 1143 } 1144 1145 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1146 1147 unsigned int IEEEFloat::partCount() const { 1148 return partCountForBits(semantics->precision + 1); 1149 } 1150 1151 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1152 return const_cast<IEEEFloat *>(this)->significandParts(); 1153 } 1154 1155 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1156 if (partCount() > 1) 1157 return significand.parts; 1158 else 1159 return &significand.part; 1160 } 1161 1162 void IEEEFloat::zeroSignificand() { 1163 APInt::tcSet(significandParts(), 0, partCount()); 1164 } 1165 1166 /* Increment an fcNormal floating point number's significand. */ 1167 void IEEEFloat::incrementSignificand() { 1168 integerPart carry; 1169 1170 carry = APInt::tcIncrement(significandParts(), partCount()); 1171 1172 /* Our callers should never cause us to overflow. */ 1173 assert(carry == 0); 1174 (void)carry; 1175 } 1176 1177 /* Add the significand of the RHS. Returns the carry flag. */ 1178 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1179 integerPart *parts; 1180 1181 parts = significandParts(); 1182 1183 assert(semantics == rhs.semantics); 1184 assert(exponent == rhs.exponent); 1185 1186 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1187 } 1188 1189 /* Subtract the significand of the RHS with a borrow flag. Returns 1190 the borrow flag. */ 1191 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1192 integerPart borrow) { 1193 integerPart *parts; 1194 1195 parts = significandParts(); 1196 1197 assert(semantics == rhs.semantics); 1198 assert(exponent == rhs.exponent); 1199 1200 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1201 partCount()); 1202 } 1203 1204 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1205 on to the full-precision result of the multiplication. Returns the 1206 lost fraction. */ 1207 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1208 IEEEFloat addend) { 1209 unsigned int omsb; // One, not zero, based MSB. 1210 unsigned int partsCount, newPartsCount, precision; 1211 integerPart *lhsSignificand; 1212 integerPart scratch[4]; 1213 integerPart *fullSignificand; 1214 lostFraction lost_fraction; 1215 bool ignored; 1216 1217 assert(semantics == rhs.semantics); 1218 1219 precision = semantics->precision; 1220 1221 // Allocate space for twice as many bits as the original significand, plus one 1222 // extra bit for the addition to overflow into. 1223 newPartsCount = partCountForBits(precision * 2 + 1); 1224 1225 if (newPartsCount > 4) 1226 fullSignificand = new integerPart[newPartsCount]; 1227 else 1228 fullSignificand = scratch; 1229 1230 lhsSignificand = significandParts(); 1231 partsCount = partCount(); 1232 1233 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1234 rhs.significandParts(), partsCount, partsCount); 1235 1236 lost_fraction = lfExactlyZero; 1237 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1238 exponent += rhs.exponent; 1239 1240 // Assume the operands involved in the multiplication are single-precision 1241 // FP, and the two multiplicants are: 1242 // *this = a23 . a22 ... a0 * 2^e1 1243 // rhs = b23 . b22 ... b0 * 2^e2 1244 // the result of multiplication is: 1245 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1246 // Note that there are three significant bits at the left-hand side of the 1247 // radix point: two for the multiplication, and an overflow bit for the 1248 // addition (that will always be zero at this point). Move the radix point 1249 // toward left by two bits, and adjust exponent accordingly. 1250 exponent += 2; 1251 1252 if (addend.isNonZero()) { 1253 // The intermediate result of the multiplication has "2 * precision" 1254 // signicant bit; adjust the addend to be consistent with mul result. 1255 // 1256 Significand savedSignificand = significand; 1257 const fltSemantics *savedSemantics = semantics; 1258 fltSemantics extendedSemantics; 1259 opStatus status; 1260 unsigned int extendedPrecision; 1261 1262 // Normalize our MSB to one below the top bit to allow for overflow. 1263 extendedPrecision = 2 * precision + 1; 1264 if (omsb != extendedPrecision - 1) { 1265 assert(extendedPrecision > omsb); 1266 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1267 (extendedPrecision - 1) - omsb); 1268 exponent -= (extendedPrecision - 1) - omsb; 1269 } 1270 1271 /* Create new semantics. */ 1272 extendedSemantics = *semantics; 1273 extendedSemantics.precision = extendedPrecision; 1274 1275 if (newPartsCount == 1) 1276 significand.part = fullSignificand[0]; 1277 else 1278 significand.parts = fullSignificand; 1279 semantics = &extendedSemantics; 1280 1281 // Make a copy so we can convert it to the extended semantics. 1282 // Note that we cannot convert the addend directly, as the extendedSemantics 1283 // is a local variable (which we take a reference to). 1284 IEEEFloat extendedAddend(addend); 1285 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1286 assert(status == opOK); 1287 (void)status; 1288 1289 // Shift the significand of the addend right by one bit. This guarantees 1290 // that the high bit of the significand is zero (same as fullSignificand), 1291 // so the addition will overflow (if it does overflow at all) into the top bit. 1292 lost_fraction = extendedAddend.shiftSignificandRight(1); 1293 assert(lost_fraction == lfExactlyZero && 1294 "Lost precision while shifting addend for fused-multiply-add."); 1295 1296 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1297 1298 /* Restore our state. */ 1299 if (newPartsCount == 1) 1300 fullSignificand[0] = significand.part; 1301 significand = savedSignificand; 1302 semantics = savedSemantics; 1303 1304 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1305 } 1306 1307 // Convert the result having "2 * precision" significant-bits back to the one 1308 // having "precision" significant-bits. First, move the radix point from 1309 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1310 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1311 exponent -= precision + 1; 1312 1313 // In case MSB resides at the left-hand side of radix point, shift the 1314 // mantissa right by some amount to make sure the MSB reside right before 1315 // the radix point (i.e. "MSB . rest-significant-bits"). 1316 // 1317 // Note that the result is not normalized when "omsb < precision". So, the 1318 // caller needs to call IEEEFloat::normalize() if normalized value is 1319 // expected. 1320 if (omsb > precision) { 1321 unsigned int bits, significantParts; 1322 lostFraction lf; 1323 1324 bits = omsb - precision; 1325 significantParts = partCountForBits(omsb); 1326 lf = shiftRight(fullSignificand, significantParts, bits); 1327 lost_fraction = combineLostFractions(lf, lost_fraction); 1328 exponent += bits; 1329 } 1330 1331 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1332 1333 if (newPartsCount > 4) 1334 delete [] fullSignificand; 1335 1336 return lost_fraction; 1337 } 1338 1339 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1340 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1341 } 1342 1343 /* Multiply the significands of LHS and RHS to DST. */ 1344 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1345 unsigned int bit, i, partsCount; 1346 const integerPart *rhsSignificand; 1347 integerPart *lhsSignificand, *dividend, *divisor; 1348 integerPart scratch[4]; 1349 lostFraction lost_fraction; 1350 1351 assert(semantics == rhs.semantics); 1352 1353 lhsSignificand = significandParts(); 1354 rhsSignificand = rhs.significandParts(); 1355 partsCount = partCount(); 1356 1357 if (partsCount > 2) 1358 dividend = new integerPart[partsCount * 2]; 1359 else 1360 dividend = scratch; 1361 1362 divisor = dividend + partsCount; 1363 1364 /* Copy the dividend and divisor as they will be modified in-place. */ 1365 for (i = 0; i < partsCount; i++) { 1366 dividend[i] = lhsSignificand[i]; 1367 divisor[i] = rhsSignificand[i]; 1368 lhsSignificand[i] = 0; 1369 } 1370 1371 exponent -= rhs.exponent; 1372 1373 unsigned int precision = semantics->precision; 1374 1375 /* Normalize the divisor. */ 1376 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1377 if (bit) { 1378 exponent += bit; 1379 APInt::tcShiftLeft(divisor, partsCount, bit); 1380 } 1381 1382 /* Normalize the dividend. */ 1383 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1384 if (bit) { 1385 exponent -= bit; 1386 APInt::tcShiftLeft(dividend, partsCount, bit); 1387 } 1388 1389 /* Ensure the dividend >= divisor initially for the loop below. 1390 Incidentally, this means that the division loop below is 1391 guaranteed to set the integer bit to one. */ 1392 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1393 exponent--; 1394 APInt::tcShiftLeft(dividend, partsCount, 1); 1395 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1396 } 1397 1398 /* Long division. */ 1399 for (bit = precision; bit; bit -= 1) { 1400 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1401 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1402 APInt::tcSetBit(lhsSignificand, bit - 1); 1403 } 1404 1405 APInt::tcShiftLeft(dividend, partsCount, 1); 1406 } 1407 1408 /* Figure out the lost fraction. */ 1409 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1410 1411 if (cmp > 0) 1412 lost_fraction = lfMoreThanHalf; 1413 else if (cmp == 0) 1414 lost_fraction = lfExactlyHalf; 1415 else if (APInt::tcIsZero(dividend, partsCount)) 1416 lost_fraction = lfExactlyZero; 1417 else 1418 lost_fraction = lfLessThanHalf; 1419 1420 if (partsCount > 2) 1421 delete [] dividend; 1422 1423 return lost_fraction; 1424 } 1425 1426 unsigned int IEEEFloat::significandMSB() const { 1427 return APInt::tcMSB(significandParts(), partCount()); 1428 } 1429 1430 unsigned int IEEEFloat::significandLSB() const { 1431 return APInt::tcLSB(significandParts(), partCount()); 1432 } 1433 1434 /* Note that a zero result is NOT normalized to fcZero. */ 1435 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1436 /* Our exponent should not overflow. */ 1437 assert((ExponentType) (exponent + bits) >= exponent); 1438 1439 exponent += bits; 1440 1441 return shiftRight(significandParts(), partCount(), bits); 1442 } 1443 1444 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1445 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1446 assert(bits < semantics->precision); 1447 1448 if (bits) { 1449 unsigned int partsCount = partCount(); 1450 1451 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1452 exponent -= bits; 1453 1454 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1455 } 1456 } 1457 1458 IEEEFloat::cmpResult 1459 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1460 int compare; 1461 1462 assert(semantics == rhs.semantics); 1463 assert(isFiniteNonZero()); 1464 assert(rhs.isFiniteNonZero()); 1465 1466 compare = exponent - rhs.exponent; 1467 1468 /* If exponents are equal, do an unsigned bignum comparison of the 1469 significands. */ 1470 if (compare == 0) 1471 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1472 partCount()); 1473 1474 if (compare > 0) 1475 return cmpGreaterThan; 1476 else if (compare < 0) 1477 return cmpLessThan; 1478 else 1479 return cmpEqual; 1480 } 1481 1482 /* Set the least significant BITS bits of a bignum, clear the 1483 rest. */ 1484 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1485 unsigned bits) { 1486 unsigned i = 0; 1487 while (bits > APInt::APINT_BITS_PER_WORD) { 1488 dst[i++] = ~(APInt::WordType)0; 1489 bits -= APInt::APINT_BITS_PER_WORD; 1490 } 1491 1492 if (bits) 1493 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1494 1495 while (i < parts) 1496 dst[i++] = 0; 1497 } 1498 1499 /* Handle overflow. Sign is preserved. We either become infinity or 1500 the largest finite number. */ 1501 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1502 /* Infinity? */ 1503 if (rounding_mode == rmNearestTiesToEven || 1504 rounding_mode == rmNearestTiesToAway || 1505 (rounding_mode == rmTowardPositive && !sign) || 1506 (rounding_mode == rmTowardNegative && sign)) { 1507 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1508 makeNaN(false, sign); 1509 else 1510 category = fcInfinity; 1511 return (opStatus) (opOverflow | opInexact); 1512 } 1513 1514 /* Otherwise we become the largest finite number. */ 1515 category = fcNormal; 1516 exponent = semantics->maxExponent; 1517 tcSetLeastSignificantBits(significandParts(), partCount(), 1518 semantics->precision); 1519 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1520 semantics->nanEncoding == fltNanEncoding::AllOnes) 1521 APInt::tcClearBit(significandParts(), 0); 1522 1523 return opInexact; 1524 } 1525 1526 /* Returns TRUE if, when truncating the current number, with BIT the 1527 new LSB, with the given lost fraction and rounding mode, the result 1528 would need to be rounded away from zero (i.e., by increasing the 1529 signficand). This routine must work for fcZero of both signs, and 1530 fcNormal numbers. */ 1531 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1532 lostFraction lost_fraction, 1533 unsigned int bit) const { 1534 /* NaNs and infinities should not have lost fractions. */ 1535 assert(isFiniteNonZero() || category == fcZero); 1536 1537 /* Current callers never pass this so we don't handle it. */ 1538 assert(lost_fraction != lfExactlyZero); 1539 1540 switch (rounding_mode) { 1541 case rmNearestTiesToAway: 1542 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1543 1544 case rmNearestTiesToEven: 1545 if (lost_fraction == lfMoreThanHalf) 1546 return true; 1547 1548 /* Our zeroes don't have a significand to test. */ 1549 if (lost_fraction == lfExactlyHalf && category != fcZero) 1550 return APInt::tcExtractBit(significandParts(), bit); 1551 1552 return false; 1553 1554 case rmTowardZero: 1555 return false; 1556 1557 case rmTowardPositive: 1558 return !sign; 1559 1560 case rmTowardNegative: 1561 return sign; 1562 1563 default: 1564 break; 1565 } 1566 llvm_unreachable("Invalid rounding mode found"); 1567 } 1568 1569 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1570 lostFraction lost_fraction) { 1571 unsigned int omsb; /* One, not zero, based MSB. */ 1572 int exponentChange; 1573 1574 if (!isFiniteNonZero()) 1575 return opOK; 1576 1577 /* Before rounding normalize the exponent of fcNormal numbers. */ 1578 omsb = significandMSB() + 1; 1579 1580 if (omsb) { 1581 /* OMSB is numbered from 1. We want to place it in the integer 1582 bit numbered PRECISION if possible, with a compensating change in 1583 the exponent. */ 1584 exponentChange = omsb - semantics->precision; 1585 1586 /* If the resulting exponent is too high, overflow according to 1587 the rounding mode. */ 1588 if (exponent + exponentChange > semantics->maxExponent) 1589 return handleOverflow(rounding_mode); 1590 1591 /* Subnormal numbers have exponent minExponent, and their MSB 1592 is forced based on that. */ 1593 if (exponent + exponentChange < semantics->minExponent) 1594 exponentChange = semantics->minExponent - exponent; 1595 1596 /* Shifting left is easy as we don't lose precision. */ 1597 if (exponentChange < 0) { 1598 assert(lost_fraction == lfExactlyZero); 1599 1600 shiftSignificandLeft(-exponentChange); 1601 1602 return opOK; 1603 } 1604 1605 if (exponentChange > 0) { 1606 lostFraction lf; 1607 1608 /* Shift right and capture any new lost fraction. */ 1609 lf = shiftSignificandRight(exponentChange); 1610 1611 lost_fraction = combineLostFractions(lf, lost_fraction); 1612 1613 /* Keep OMSB up-to-date. */ 1614 if (omsb > (unsigned) exponentChange) 1615 omsb -= exponentChange; 1616 else 1617 omsb = 0; 1618 } 1619 } 1620 1621 // The all-ones values is an overflow if NaN is all ones. If NaN is 1622 // represented by negative zero, then it is a valid finite value. 1623 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1624 semantics->nanEncoding == fltNanEncoding::AllOnes && 1625 exponent == semantics->maxExponent && isSignificandAllOnes()) 1626 return handleOverflow(rounding_mode); 1627 1628 /* Now round the number according to rounding_mode given the lost 1629 fraction. */ 1630 1631 /* As specified in IEEE 754, since we do not trap we do not report 1632 underflow for exact results. */ 1633 if (lost_fraction == lfExactlyZero) { 1634 /* Canonicalize zeroes. */ 1635 if (omsb == 0) { 1636 category = fcZero; 1637 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1638 sign = false; 1639 } 1640 1641 return opOK; 1642 } 1643 1644 /* Increment the significand if we're rounding away from zero. */ 1645 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1646 if (omsb == 0) 1647 exponent = semantics->minExponent; 1648 1649 incrementSignificand(); 1650 omsb = significandMSB() + 1; 1651 1652 /* Did the significand increment overflow? */ 1653 if (omsb == (unsigned) semantics->precision + 1) { 1654 /* Renormalize by incrementing the exponent and shifting our 1655 significand right one. However if we already have the 1656 maximum exponent we overflow to infinity. */ 1657 if (exponent == semantics->maxExponent) 1658 // Invoke overflow handling with a rounding mode that will guarantee 1659 // that the result gets turned into the correct infinity representation. 1660 // This is needed instead of just setting the category to infinity to 1661 // account for 8-bit floating point types that have no inf, only NaN. 1662 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1663 1664 shiftSignificandRight(1); 1665 1666 return opInexact; 1667 } 1668 1669 // The all-ones values is an overflow if NaN is all ones. If NaN is 1670 // represented by negative zero, then it is a valid finite value. 1671 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1672 semantics->nanEncoding == fltNanEncoding::AllOnes && 1673 exponent == semantics->maxExponent && isSignificandAllOnes()) 1674 return handleOverflow(rounding_mode); 1675 } 1676 1677 /* The normal case - we were and are not denormal, and any 1678 significand increment above didn't overflow. */ 1679 if (omsb == semantics->precision) 1680 return opInexact; 1681 1682 /* We have a non-zero denormal. */ 1683 assert(omsb < semantics->precision); 1684 1685 /* Canonicalize zeroes. */ 1686 if (omsb == 0) { 1687 category = fcZero; 1688 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1689 sign = false; 1690 } 1691 1692 /* The fcZero case is a denormal that underflowed to zero. */ 1693 return (opStatus) (opUnderflow | opInexact); 1694 } 1695 1696 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1697 bool subtract) { 1698 switch (PackCategoriesIntoKey(category, rhs.category)) { 1699 default: 1700 llvm_unreachable(nullptr); 1701 1702 case PackCategoriesIntoKey(fcZero, fcNaN): 1703 case PackCategoriesIntoKey(fcNormal, fcNaN): 1704 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1705 assign(rhs); 1706 [[fallthrough]]; 1707 case PackCategoriesIntoKey(fcNaN, fcZero): 1708 case PackCategoriesIntoKey(fcNaN, fcNormal): 1709 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1710 case PackCategoriesIntoKey(fcNaN, fcNaN): 1711 if (isSignaling()) { 1712 makeQuiet(); 1713 return opInvalidOp; 1714 } 1715 return rhs.isSignaling() ? opInvalidOp : opOK; 1716 1717 case PackCategoriesIntoKey(fcNormal, fcZero): 1718 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1719 case PackCategoriesIntoKey(fcInfinity, fcZero): 1720 return opOK; 1721 1722 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1723 case PackCategoriesIntoKey(fcZero, fcInfinity): 1724 category = fcInfinity; 1725 sign = rhs.sign ^ subtract; 1726 return opOK; 1727 1728 case PackCategoriesIntoKey(fcZero, fcNormal): 1729 assign(rhs); 1730 sign = rhs.sign ^ subtract; 1731 return opOK; 1732 1733 case PackCategoriesIntoKey(fcZero, fcZero): 1734 /* Sign depends on rounding mode; handled by caller. */ 1735 return opOK; 1736 1737 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1738 /* Differently signed infinities can only be validly 1739 subtracted. */ 1740 if (((sign ^ rhs.sign)!=0) != subtract) { 1741 makeNaN(); 1742 return opInvalidOp; 1743 } 1744 1745 return opOK; 1746 1747 case PackCategoriesIntoKey(fcNormal, fcNormal): 1748 return opDivByZero; 1749 } 1750 } 1751 1752 /* Add or subtract two normal numbers. */ 1753 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1754 bool subtract) { 1755 integerPart carry; 1756 lostFraction lost_fraction; 1757 int bits; 1758 1759 /* Determine if the operation on the absolute values is effectively 1760 an addition or subtraction. */ 1761 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1762 1763 /* Are we bigger exponent-wise than the RHS? */ 1764 bits = exponent - rhs.exponent; 1765 1766 /* Subtraction is more subtle than one might naively expect. */ 1767 if (subtract) { 1768 IEEEFloat temp_rhs(rhs); 1769 1770 if (bits == 0) 1771 lost_fraction = lfExactlyZero; 1772 else if (bits > 0) { 1773 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1774 shiftSignificandLeft(1); 1775 } else { 1776 lost_fraction = shiftSignificandRight(-bits - 1); 1777 temp_rhs.shiftSignificandLeft(1); 1778 } 1779 1780 // Should we reverse the subtraction. 1781 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1782 carry = temp_rhs.subtractSignificand 1783 (*this, lost_fraction != lfExactlyZero); 1784 copySignificand(temp_rhs); 1785 sign = !sign; 1786 } else { 1787 carry = subtractSignificand 1788 (temp_rhs, lost_fraction != lfExactlyZero); 1789 } 1790 1791 /* Invert the lost fraction - it was on the RHS and 1792 subtracted. */ 1793 if (lost_fraction == lfLessThanHalf) 1794 lost_fraction = lfMoreThanHalf; 1795 else if (lost_fraction == lfMoreThanHalf) 1796 lost_fraction = lfLessThanHalf; 1797 1798 /* The code above is intended to ensure that no borrow is 1799 necessary. */ 1800 assert(!carry); 1801 (void)carry; 1802 } else { 1803 if (bits > 0) { 1804 IEEEFloat temp_rhs(rhs); 1805 1806 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1807 carry = addSignificand(temp_rhs); 1808 } else { 1809 lost_fraction = shiftSignificandRight(-bits); 1810 carry = addSignificand(rhs); 1811 } 1812 1813 /* We have a guard bit; generating a carry cannot happen. */ 1814 assert(!carry); 1815 (void)carry; 1816 } 1817 1818 return lost_fraction; 1819 } 1820 1821 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1822 switch (PackCategoriesIntoKey(category, rhs.category)) { 1823 default: 1824 llvm_unreachable(nullptr); 1825 1826 case PackCategoriesIntoKey(fcZero, fcNaN): 1827 case PackCategoriesIntoKey(fcNormal, fcNaN): 1828 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1829 assign(rhs); 1830 sign = false; 1831 [[fallthrough]]; 1832 case PackCategoriesIntoKey(fcNaN, fcZero): 1833 case PackCategoriesIntoKey(fcNaN, fcNormal): 1834 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1835 case PackCategoriesIntoKey(fcNaN, fcNaN): 1836 sign ^= rhs.sign; // restore the original sign 1837 if (isSignaling()) { 1838 makeQuiet(); 1839 return opInvalidOp; 1840 } 1841 return rhs.isSignaling() ? opInvalidOp : opOK; 1842 1843 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1844 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1845 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1846 category = fcInfinity; 1847 return opOK; 1848 1849 case PackCategoriesIntoKey(fcZero, fcNormal): 1850 case PackCategoriesIntoKey(fcNormal, fcZero): 1851 case PackCategoriesIntoKey(fcZero, fcZero): 1852 category = fcZero; 1853 return opOK; 1854 1855 case PackCategoriesIntoKey(fcZero, fcInfinity): 1856 case PackCategoriesIntoKey(fcInfinity, fcZero): 1857 makeNaN(); 1858 return opInvalidOp; 1859 1860 case PackCategoriesIntoKey(fcNormal, fcNormal): 1861 return opOK; 1862 } 1863 } 1864 1865 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1866 switch (PackCategoriesIntoKey(category, rhs.category)) { 1867 default: 1868 llvm_unreachable(nullptr); 1869 1870 case PackCategoriesIntoKey(fcZero, fcNaN): 1871 case PackCategoriesIntoKey(fcNormal, fcNaN): 1872 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1873 assign(rhs); 1874 sign = false; 1875 [[fallthrough]]; 1876 case PackCategoriesIntoKey(fcNaN, fcZero): 1877 case PackCategoriesIntoKey(fcNaN, fcNormal): 1878 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1879 case PackCategoriesIntoKey(fcNaN, fcNaN): 1880 sign ^= rhs.sign; // restore the original sign 1881 if (isSignaling()) { 1882 makeQuiet(); 1883 return opInvalidOp; 1884 } 1885 return rhs.isSignaling() ? opInvalidOp : opOK; 1886 1887 case PackCategoriesIntoKey(fcInfinity, fcZero): 1888 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1889 case PackCategoriesIntoKey(fcZero, fcInfinity): 1890 case PackCategoriesIntoKey(fcZero, fcNormal): 1891 return opOK; 1892 1893 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1894 category = fcZero; 1895 return opOK; 1896 1897 case PackCategoriesIntoKey(fcNormal, fcZero): 1898 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1899 makeNaN(false, sign); 1900 else 1901 category = fcInfinity; 1902 return opDivByZero; 1903 1904 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1905 case PackCategoriesIntoKey(fcZero, fcZero): 1906 makeNaN(); 1907 return opInvalidOp; 1908 1909 case PackCategoriesIntoKey(fcNormal, fcNormal): 1910 return opOK; 1911 } 1912 } 1913 1914 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1915 switch (PackCategoriesIntoKey(category, rhs.category)) { 1916 default: 1917 llvm_unreachable(nullptr); 1918 1919 case PackCategoriesIntoKey(fcZero, fcNaN): 1920 case PackCategoriesIntoKey(fcNormal, fcNaN): 1921 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1922 assign(rhs); 1923 [[fallthrough]]; 1924 case PackCategoriesIntoKey(fcNaN, fcZero): 1925 case PackCategoriesIntoKey(fcNaN, fcNormal): 1926 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1927 case PackCategoriesIntoKey(fcNaN, fcNaN): 1928 if (isSignaling()) { 1929 makeQuiet(); 1930 return opInvalidOp; 1931 } 1932 return rhs.isSignaling() ? opInvalidOp : opOK; 1933 1934 case PackCategoriesIntoKey(fcZero, fcInfinity): 1935 case PackCategoriesIntoKey(fcZero, fcNormal): 1936 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1937 return opOK; 1938 1939 case PackCategoriesIntoKey(fcNormal, fcZero): 1940 case PackCategoriesIntoKey(fcInfinity, fcZero): 1941 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1942 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1943 case PackCategoriesIntoKey(fcZero, fcZero): 1944 makeNaN(); 1945 return opInvalidOp; 1946 1947 case PackCategoriesIntoKey(fcNormal, fcNormal): 1948 return opOK; 1949 } 1950 } 1951 1952 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1953 switch (PackCategoriesIntoKey(category, rhs.category)) { 1954 default: 1955 llvm_unreachable(nullptr); 1956 1957 case PackCategoriesIntoKey(fcZero, fcNaN): 1958 case PackCategoriesIntoKey(fcNormal, fcNaN): 1959 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1960 assign(rhs); 1961 [[fallthrough]]; 1962 case PackCategoriesIntoKey(fcNaN, fcZero): 1963 case PackCategoriesIntoKey(fcNaN, fcNormal): 1964 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1965 case PackCategoriesIntoKey(fcNaN, fcNaN): 1966 if (isSignaling()) { 1967 makeQuiet(); 1968 return opInvalidOp; 1969 } 1970 return rhs.isSignaling() ? opInvalidOp : opOK; 1971 1972 case PackCategoriesIntoKey(fcZero, fcInfinity): 1973 case PackCategoriesIntoKey(fcZero, fcNormal): 1974 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1975 return opOK; 1976 1977 case PackCategoriesIntoKey(fcNormal, fcZero): 1978 case PackCategoriesIntoKey(fcInfinity, fcZero): 1979 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1980 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1981 case PackCategoriesIntoKey(fcZero, fcZero): 1982 makeNaN(); 1983 return opInvalidOp; 1984 1985 case PackCategoriesIntoKey(fcNormal, fcNormal): 1986 return opDivByZero; // fake status, indicating this is not a special case 1987 } 1988 } 1989 1990 /* Change sign. */ 1991 void IEEEFloat::changeSign() { 1992 // With NaN-as-negative-zero, neither NaN or negative zero can change 1993 // their signs. 1994 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 1995 (isZero() || isNaN())) 1996 return; 1997 /* Look mummy, this one's easy. */ 1998 sign = !sign; 1999 } 2000 2001 /* Normalized addition or subtraction. */ 2002 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2003 roundingMode rounding_mode, 2004 bool subtract) { 2005 opStatus fs; 2006 2007 fs = addOrSubtractSpecials(rhs, subtract); 2008 2009 /* This return code means it was not a simple case. */ 2010 if (fs == opDivByZero) { 2011 lostFraction lost_fraction; 2012 2013 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2014 fs = normalize(rounding_mode, lost_fraction); 2015 2016 /* Can only be zero if we lost no fraction. */ 2017 assert(category != fcZero || lost_fraction == lfExactlyZero); 2018 } 2019 2020 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2021 positive zero unless rounding to minus infinity, except that 2022 adding two like-signed zeroes gives that zero. */ 2023 if (category == fcZero) { 2024 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2025 sign = (rounding_mode == rmTowardNegative); 2026 // NaN-in-negative-zero means zeros need to be normalized to +0. 2027 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2028 sign = false; 2029 } 2030 2031 return fs; 2032 } 2033 2034 /* Normalized addition. */ 2035 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2036 roundingMode rounding_mode) { 2037 return addOrSubtract(rhs, rounding_mode, false); 2038 } 2039 2040 /* Normalized subtraction. */ 2041 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2042 roundingMode rounding_mode) { 2043 return addOrSubtract(rhs, rounding_mode, true); 2044 } 2045 2046 /* Normalized multiply. */ 2047 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2048 roundingMode rounding_mode) { 2049 opStatus fs; 2050 2051 sign ^= rhs.sign; 2052 fs = multiplySpecials(rhs); 2053 2054 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2055 sign = false; 2056 if (isFiniteNonZero()) { 2057 lostFraction lost_fraction = multiplySignificand(rhs); 2058 fs = normalize(rounding_mode, lost_fraction); 2059 if (lost_fraction != lfExactlyZero) 2060 fs = (opStatus) (fs | opInexact); 2061 } 2062 2063 return fs; 2064 } 2065 2066 /* Normalized divide. */ 2067 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2068 roundingMode rounding_mode) { 2069 opStatus fs; 2070 2071 sign ^= rhs.sign; 2072 fs = divideSpecials(rhs); 2073 2074 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2075 sign = false; 2076 if (isFiniteNonZero()) { 2077 lostFraction lost_fraction = divideSignificand(rhs); 2078 fs = normalize(rounding_mode, lost_fraction); 2079 if (lost_fraction != lfExactlyZero) 2080 fs = (opStatus) (fs | opInexact); 2081 } 2082 2083 return fs; 2084 } 2085 2086 /* Normalized remainder. */ 2087 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2088 opStatus fs; 2089 unsigned int origSign = sign; 2090 2091 // First handle the special cases. 2092 fs = remainderSpecials(rhs); 2093 if (fs != opDivByZero) 2094 return fs; 2095 2096 fs = opOK; 2097 2098 // Make sure the current value is less than twice the denom. If the addition 2099 // did not succeed (an overflow has happened), which means that the finite 2100 // value we currently posses must be less than twice the denom (as we are 2101 // using the same semantics). 2102 IEEEFloat P2 = rhs; 2103 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2104 fs = mod(P2); 2105 assert(fs == opOK); 2106 } 2107 2108 // Lets work with absolute numbers. 2109 IEEEFloat P = rhs; 2110 P.sign = false; 2111 sign = false; 2112 2113 // 2114 // To calculate the remainder we use the following scheme. 2115 // 2116 // The remainder is defained as follows: 2117 // 2118 // remainder = numer - rquot * denom = x - r * p 2119 // 2120 // Where r is the result of: x/p, rounded toward the nearest integral value 2121 // (with halfway cases rounded toward the even number). 2122 // 2123 // Currently, (after x mod 2p): 2124 // r is the number of 2p's present inside x, which is inherently, an even 2125 // number of p's. 2126 // 2127 // We may split the remaining calculation into 4 options: 2128 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2129 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2130 // are done as well. 2131 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2132 // to subtract 1p at least once. 2133 // - if x >= p then we must subtract p at least once, as x must be a 2134 // remainder. 2135 // 2136 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2137 // 2138 // We can now split the remaining calculation to the following 3 options: 2139 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2140 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2141 // must round up to the next even number. so we must subtract p once more. 2142 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2143 // integral, and subtract p once more. 2144 // 2145 2146 // Extend the semantics to prevent an overflow/underflow or inexact result. 2147 bool losesInfo; 2148 fltSemantics extendedSemantics = *semantics; 2149 extendedSemantics.maxExponent++; 2150 extendedSemantics.minExponent--; 2151 extendedSemantics.precision += 2; 2152 2153 IEEEFloat VEx = *this; 2154 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2155 assert(fs == opOK && !losesInfo); 2156 IEEEFloat PEx = P; 2157 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2158 assert(fs == opOK && !losesInfo); 2159 2160 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2161 // any fraction. 2162 fs = VEx.add(VEx, rmNearestTiesToEven); 2163 assert(fs == opOK); 2164 2165 if (VEx.compare(PEx) == cmpGreaterThan) { 2166 fs = subtract(P, rmNearestTiesToEven); 2167 assert(fs == opOK); 2168 2169 // Make VEx = this.add(this), but because we have different semantics, we do 2170 // not want to `convert` again, so we just subtract PEx twice (which equals 2171 // to the desired value). 2172 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2173 assert(fs == opOK); 2174 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2175 assert(fs == opOK); 2176 2177 cmpResult result = VEx.compare(PEx); 2178 if (result == cmpGreaterThan || result == cmpEqual) { 2179 fs = subtract(P, rmNearestTiesToEven); 2180 assert(fs == opOK); 2181 } 2182 } 2183 2184 if (isZero()) { 2185 sign = origSign; // IEEE754 requires this 2186 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2187 // But some 8-bit floats only have positive 0. 2188 sign = false; 2189 } 2190 2191 else 2192 sign ^= origSign; 2193 return fs; 2194 } 2195 2196 /* Normalized llvm frem (C fmod). */ 2197 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2198 opStatus fs; 2199 fs = modSpecials(rhs); 2200 unsigned int origSign = sign; 2201 2202 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2203 compareAbsoluteValue(rhs) != cmpLessThan) { 2204 int Exp = ilogb(*this) - ilogb(rhs); 2205 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2206 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2207 // check for it. 2208 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2209 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2210 V.sign = sign; 2211 2212 fs = subtract(V, rmNearestTiesToEven); 2213 assert(fs==opOK); 2214 } 2215 if (isZero()) { 2216 sign = origSign; // fmod requires this 2217 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2218 sign = false; 2219 } 2220 return fs; 2221 } 2222 2223 /* Normalized fused-multiply-add. */ 2224 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2225 const IEEEFloat &addend, 2226 roundingMode rounding_mode) { 2227 opStatus fs; 2228 2229 /* Post-multiplication sign, before addition. */ 2230 sign ^= multiplicand.sign; 2231 2232 /* If and only if all arguments are normal do we need to do an 2233 extended-precision calculation. */ 2234 if (isFiniteNonZero() && 2235 multiplicand.isFiniteNonZero() && 2236 addend.isFinite()) { 2237 lostFraction lost_fraction; 2238 2239 lost_fraction = multiplySignificand(multiplicand, addend); 2240 fs = normalize(rounding_mode, lost_fraction); 2241 if (lost_fraction != lfExactlyZero) 2242 fs = (opStatus) (fs | opInexact); 2243 2244 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2245 positive zero unless rounding to minus infinity, except that 2246 adding two like-signed zeroes gives that zero. */ 2247 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2248 sign = (rounding_mode == rmTowardNegative); 2249 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2250 sign = false; 2251 } 2252 } else { 2253 fs = multiplySpecials(multiplicand); 2254 2255 /* FS can only be opOK or opInvalidOp. There is no more work 2256 to do in the latter case. The IEEE-754R standard says it is 2257 implementation-defined in this case whether, if ADDEND is a 2258 quiet NaN, we raise invalid op; this implementation does so. 2259 2260 If we need to do the addition we can do so with normal 2261 precision. */ 2262 if (fs == opOK) 2263 fs = addOrSubtract(addend, rounding_mode, false); 2264 } 2265 2266 return fs; 2267 } 2268 2269 /* Rounding-mode correct round to integral value. */ 2270 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2271 opStatus fs; 2272 2273 if (isInfinity()) 2274 // [IEEE Std 754-2008 6.1]: 2275 // The behavior of infinity in floating-point arithmetic is derived from the 2276 // limiting cases of real arithmetic with operands of arbitrarily 2277 // large magnitude, when such a limit exists. 2278 // ... 2279 // Operations on infinite operands are usually exact and therefore signal no 2280 // exceptions ... 2281 return opOK; 2282 2283 if (isNaN()) { 2284 if (isSignaling()) { 2285 // [IEEE Std 754-2008 6.2]: 2286 // Under default exception handling, any operation signaling an invalid 2287 // operation exception and for which a floating-point result is to be 2288 // delivered shall deliver a quiet NaN. 2289 makeQuiet(); 2290 // [IEEE Std 754-2008 6.2]: 2291 // Signaling NaNs shall be reserved operands that, under default exception 2292 // handling, signal the invalid operation exception(see 7.2) for every 2293 // general-computational and signaling-computational operation except for 2294 // the conversions described in 5.12. 2295 return opInvalidOp; 2296 } else { 2297 // [IEEE Std 754-2008 6.2]: 2298 // For an operation with quiet NaN inputs, other than maximum and minimum 2299 // operations, if a floating-point result is to be delivered the result 2300 // shall be a quiet NaN which should be one of the input NaNs. 2301 // ... 2302 // Every general-computational and quiet-computational operation involving 2303 // one or more input NaNs, none of them signaling, shall signal no 2304 // exception, except fusedMultiplyAdd might signal the invalid operation 2305 // exception(see 7.2). 2306 return opOK; 2307 } 2308 } 2309 2310 if (isZero()) { 2311 // [IEEE Std 754-2008 6.3]: 2312 // ... the sign of the result of conversions, the quantize operation, the 2313 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2314 // the sign of the first or only operand. 2315 return opOK; 2316 } 2317 2318 // If the exponent is large enough, we know that this value is already 2319 // integral, and the arithmetic below would potentially cause it to saturate 2320 // to +/-Inf. Bail out early instead. 2321 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2322 return opOK; 2323 2324 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2325 // precision of our format, and then subtract it back off again. The choice 2326 // of rounding modes for the addition/subtraction determines the rounding mode 2327 // for our integral rounding as well. 2328 // NOTE: When the input value is negative, we do subtraction followed by 2329 // addition instead. 2330 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2331 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2332 IEEEFloat MagicConstant(*semantics); 2333 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2334 rmNearestTiesToEven); 2335 assert(fs == opOK); 2336 MagicConstant.sign = sign; 2337 2338 // Preserve the input sign so that we can handle the case of zero result 2339 // correctly. 2340 bool inputSign = isNegative(); 2341 2342 fs = add(MagicConstant, rounding_mode); 2343 2344 // Current value and 'MagicConstant' are both integers, so the result of the 2345 // subtraction is always exact according to Sterbenz' lemma. 2346 subtract(MagicConstant, rounding_mode); 2347 2348 // Restore the input sign. 2349 if (inputSign != isNegative()) 2350 changeSign(); 2351 2352 return fs; 2353 } 2354 2355 2356 /* Comparison requires normalized numbers. */ 2357 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2358 cmpResult result; 2359 2360 assert(semantics == rhs.semantics); 2361 2362 switch (PackCategoriesIntoKey(category, rhs.category)) { 2363 default: 2364 llvm_unreachable(nullptr); 2365 2366 case PackCategoriesIntoKey(fcNaN, fcZero): 2367 case PackCategoriesIntoKey(fcNaN, fcNormal): 2368 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2369 case PackCategoriesIntoKey(fcNaN, fcNaN): 2370 case PackCategoriesIntoKey(fcZero, fcNaN): 2371 case PackCategoriesIntoKey(fcNormal, fcNaN): 2372 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2373 return cmpUnordered; 2374 2375 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2376 case PackCategoriesIntoKey(fcInfinity, fcZero): 2377 case PackCategoriesIntoKey(fcNormal, fcZero): 2378 if (sign) 2379 return cmpLessThan; 2380 else 2381 return cmpGreaterThan; 2382 2383 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2384 case PackCategoriesIntoKey(fcZero, fcInfinity): 2385 case PackCategoriesIntoKey(fcZero, fcNormal): 2386 if (rhs.sign) 2387 return cmpGreaterThan; 2388 else 2389 return cmpLessThan; 2390 2391 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2392 if (sign == rhs.sign) 2393 return cmpEqual; 2394 else if (sign) 2395 return cmpLessThan; 2396 else 2397 return cmpGreaterThan; 2398 2399 case PackCategoriesIntoKey(fcZero, fcZero): 2400 return cmpEqual; 2401 2402 case PackCategoriesIntoKey(fcNormal, fcNormal): 2403 break; 2404 } 2405 2406 /* Two normal numbers. Do they have the same sign? */ 2407 if (sign != rhs.sign) { 2408 if (sign) 2409 result = cmpLessThan; 2410 else 2411 result = cmpGreaterThan; 2412 } else { 2413 /* Compare absolute values; invert result if negative. */ 2414 result = compareAbsoluteValue(rhs); 2415 2416 if (sign) { 2417 if (result == cmpLessThan) 2418 result = cmpGreaterThan; 2419 else if (result == cmpGreaterThan) 2420 result = cmpLessThan; 2421 } 2422 } 2423 2424 return result; 2425 } 2426 2427 /// IEEEFloat::convert - convert a value of one floating point type to another. 2428 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2429 /// records whether the transformation lost information, i.e. whether 2430 /// converting the result back to the original type will produce the 2431 /// original value (this is almost the same as return value==fsOK, but there 2432 /// are edge cases where this is not so). 2433 2434 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2435 roundingMode rounding_mode, 2436 bool *losesInfo) { 2437 lostFraction lostFraction; 2438 unsigned int newPartCount, oldPartCount; 2439 opStatus fs; 2440 int shift; 2441 const fltSemantics &fromSemantics = *semantics; 2442 bool is_signaling = isSignaling(); 2443 2444 lostFraction = lfExactlyZero; 2445 newPartCount = partCountForBits(toSemantics.precision + 1); 2446 oldPartCount = partCount(); 2447 shift = toSemantics.precision - fromSemantics.precision; 2448 2449 bool X86SpecialNan = false; 2450 if (&fromSemantics == &semX87DoubleExtended && 2451 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2452 (!(*significandParts() & 0x8000000000000000ULL) || 2453 !(*significandParts() & 0x4000000000000000ULL))) { 2454 // x86 has some unusual NaNs which cannot be represented in any other 2455 // format; note them here. 2456 X86SpecialNan = true; 2457 } 2458 2459 // If this is a truncation of a denormal number, and the target semantics 2460 // has larger exponent range than the source semantics (this can happen 2461 // when truncating from PowerPC double-double to double format), the 2462 // right shift could lose result mantissa bits. Adjust exponent instead 2463 // of performing excessive shift. 2464 // Also do a similar trick in case shifting denormal would produce zero 2465 // significand as this case isn't handled correctly by normalize. 2466 if (shift < 0 && isFiniteNonZero()) { 2467 int omsb = significandMSB() + 1; 2468 int exponentChange = omsb - fromSemantics.precision; 2469 if (exponent + exponentChange < toSemantics.minExponent) 2470 exponentChange = toSemantics.minExponent - exponent; 2471 if (exponentChange < shift) 2472 exponentChange = shift; 2473 if (exponentChange < 0) { 2474 shift -= exponentChange; 2475 exponent += exponentChange; 2476 } else if (omsb <= -shift) { 2477 exponentChange = omsb + shift - 1; // leave at least one bit set 2478 shift -= exponentChange; 2479 exponent += exponentChange; 2480 } 2481 } 2482 2483 // If this is a truncation, perform the shift before we narrow the storage. 2484 if (shift < 0 && (isFiniteNonZero() || 2485 (category == fcNaN && semantics->nonFiniteBehavior != 2486 fltNonfiniteBehavior::NanOnly))) 2487 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2488 2489 // Fix the storage so it can hold to new value. 2490 if (newPartCount > oldPartCount) { 2491 // The new type requires more storage; make it available. 2492 integerPart *newParts; 2493 newParts = new integerPart[newPartCount]; 2494 APInt::tcSet(newParts, 0, newPartCount); 2495 if (isFiniteNonZero() || category==fcNaN) 2496 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2497 freeSignificand(); 2498 significand.parts = newParts; 2499 } else if (newPartCount == 1 && oldPartCount != 1) { 2500 // Switch to built-in storage for a single part. 2501 integerPart newPart = 0; 2502 if (isFiniteNonZero() || category==fcNaN) 2503 newPart = significandParts()[0]; 2504 freeSignificand(); 2505 significand.part = newPart; 2506 } 2507 2508 // Now that we have the right storage, switch the semantics. 2509 semantics = &toSemantics; 2510 2511 // If this is an extension, perform the shift now that the storage is 2512 // available. 2513 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2514 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2515 2516 if (isFiniteNonZero()) { 2517 fs = normalize(rounding_mode, lostFraction); 2518 *losesInfo = (fs != opOK); 2519 } else if (category == fcNaN) { 2520 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2521 *losesInfo = 2522 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2523 makeNaN(false, sign); 2524 return is_signaling ? opInvalidOp : opOK; 2525 } 2526 2527 // If NaN is negative zero, we need to create a new NaN to avoid converting 2528 // NaN to -Inf. 2529 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2530 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2531 makeNaN(false, false); 2532 2533 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2534 2535 // For x87 extended precision, we want to make a NaN, not a special NaN if 2536 // the input wasn't special either. 2537 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2538 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2539 2540 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2541 // This also guarantees that a sNaN does not become Inf on a truncation 2542 // that loses all payload bits. 2543 if (is_signaling) { 2544 makeQuiet(); 2545 fs = opInvalidOp; 2546 } else { 2547 fs = opOK; 2548 } 2549 } else if (category == fcInfinity && 2550 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2551 makeNaN(false, sign); 2552 *losesInfo = true; 2553 fs = opInexact; 2554 } else if (category == fcZero && 2555 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2556 // Negative zero loses info, but positive zero doesn't. 2557 *losesInfo = 2558 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2559 fs = *losesInfo ? opInexact : opOK; 2560 // NaN is negative zero means -0 -> +0, which can lose information 2561 sign = false; 2562 } else { 2563 *losesInfo = false; 2564 fs = opOK; 2565 } 2566 2567 return fs; 2568 } 2569 2570 /* Convert a floating point number to an integer according to the 2571 rounding mode. If the rounded integer value is out of range this 2572 returns an invalid operation exception and the contents of the 2573 destination parts are unspecified. If the rounded value is in 2574 range but the floating point number is not the exact integer, the C 2575 standard doesn't require an inexact exception to be raised. IEEE 2576 854 does require it so we do that. 2577 2578 Note that for conversions to integer type the C standard requires 2579 round-to-zero to always be used. */ 2580 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2581 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2582 roundingMode rounding_mode, bool *isExact) const { 2583 lostFraction lost_fraction; 2584 const integerPart *src; 2585 unsigned int dstPartsCount, truncatedBits; 2586 2587 *isExact = false; 2588 2589 /* Handle the three special cases first. */ 2590 if (category == fcInfinity || category == fcNaN) 2591 return opInvalidOp; 2592 2593 dstPartsCount = partCountForBits(width); 2594 assert(dstPartsCount <= parts.size() && "Integer too big"); 2595 2596 if (category == fcZero) { 2597 APInt::tcSet(parts.data(), 0, dstPartsCount); 2598 // Negative zero can't be represented as an int. 2599 *isExact = !sign; 2600 return opOK; 2601 } 2602 2603 src = significandParts(); 2604 2605 /* Step 1: place our absolute value, with any fraction truncated, in 2606 the destination. */ 2607 if (exponent < 0) { 2608 /* Our absolute value is less than one; truncate everything. */ 2609 APInt::tcSet(parts.data(), 0, dstPartsCount); 2610 /* For exponent -1 the integer bit represents .5, look at that. 2611 For smaller exponents leftmost truncated bit is 0. */ 2612 truncatedBits = semantics->precision -1U - exponent; 2613 } else { 2614 /* We want the most significant (exponent + 1) bits; the rest are 2615 truncated. */ 2616 unsigned int bits = exponent + 1U; 2617 2618 /* Hopelessly large in magnitude? */ 2619 if (bits > width) 2620 return opInvalidOp; 2621 2622 if (bits < semantics->precision) { 2623 /* We truncate (semantics->precision - bits) bits. */ 2624 truncatedBits = semantics->precision - bits; 2625 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2626 } else { 2627 /* We want at least as many bits as are available. */ 2628 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2629 0); 2630 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2631 bits - semantics->precision); 2632 truncatedBits = 0; 2633 } 2634 } 2635 2636 /* Step 2: work out any lost fraction, and increment the absolute 2637 value if we would round away from zero. */ 2638 if (truncatedBits) { 2639 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2640 truncatedBits); 2641 if (lost_fraction != lfExactlyZero && 2642 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2643 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2644 return opInvalidOp; /* Overflow. */ 2645 } 2646 } else { 2647 lost_fraction = lfExactlyZero; 2648 } 2649 2650 /* Step 3: check if we fit in the destination. */ 2651 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2652 2653 if (sign) { 2654 if (!isSigned) { 2655 /* Negative numbers cannot be represented as unsigned. */ 2656 if (omsb != 0) 2657 return opInvalidOp; 2658 } else { 2659 /* It takes omsb bits to represent the unsigned integer value. 2660 We lose a bit for the sign, but care is needed as the 2661 maximally negative integer is a special case. */ 2662 if (omsb == width && 2663 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2664 return opInvalidOp; 2665 2666 /* This case can happen because of rounding. */ 2667 if (omsb > width) 2668 return opInvalidOp; 2669 } 2670 2671 APInt::tcNegate (parts.data(), dstPartsCount); 2672 } else { 2673 if (omsb >= width + !isSigned) 2674 return opInvalidOp; 2675 } 2676 2677 if (lost_fraction == lfExactlyZero) { 2678 *isExact = true; 2679 return opOK; 2680 } else 2681 return opInexact; 2682 } 2683 2684 /* Same as convertToSignExtendedInteger, except we provide 2685 deterministic values in case of an invalid operation exception, 2686 namely zero for NaNs and the minimal or maximal value respectively 2687 for underflow or overflow. 2688 The *isExact output tells whether the result is exact, in the sense 2689 that converting it back to the original floating point type produces 2690 the original value. This is almost equivalent to result==opOK, 2691 except for negative zeroes. 2692 */ 2693 IEEEFloat::opStatus 2694 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2695 unsigned int width, bool isSigned, 2696 roundingMode rounding_mode, bool *isExact) const { 2697 opStatus fs; 2698 2699 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2700 isExact); 2701 2702 if (fs == opInvalidOp) { 2703 unsigned int bits, dstPartsCount; 2704 2705 dstPartsCount = partCountForBits(width); 2706 assert(dstPartsCount <= parts.size() && "Integer too big"); 2707 2708 if (category == fcNaN) 2709 bits = 0; 2710 else if (sign) 2711 bits = isSigned; 2712 else 2713 bits = width - isSigned; 2714 2715 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2716 if (sign && isSigned) 2717 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2718 } 2719 2720 return fs; 2721 } 2722 2723 /* Convert an unsigned integer SRC to a floating point number, 2724 rounding according to ROUNDING_MODE. The sign of the floating 2725 point number is not modified. */ 2726 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2727 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2728 unsigned int omsb, precision, dstCount; 2729 integerPart *dst; 2730 lostFraction lost_fraction; 2731 2732 category = fcNormal; 2733 omsb = APInt::tcMSB(src, srcCount) + 1; 2734 dst = significandParts(); 2735 dstCount = partCount(); 2736 precision = semantics->precision; 2737 2738 /* We want the most significant PRECISION bits of SRC. There may not 2739 be that many; extract what we can. */ 2740 if (precision <= omsb) { 2741 exponent = omsb - 1; 2742 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2743 omsb - precision); 2744 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2745 } else { 2746 exponent = precision - 1; 2747 lost_fraction = lfExactlyZero; 2748 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2749 } 2750 2751 return normalize(rounding_mode, lost_fraction); 2752 } 2753 2754 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2755 roundingMode rounding_mode) { 2756 unsigned int partCount = Val.getNumWords(); 2757 APInt api = Val; 2758 2759 sign = false; 2760 if (isSigned && api.isNegative()) { 2761 sign = true; 2762 api = -api; 2763 } 2764 2765 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2766 } 2767 2768 /* Convert a two's complement integer SRC to a floating point number, 2769 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2770 integer is signed, in which case it must be sign-extended. */ 2771 IEEEFloat::opStatus 2772 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2773 unsigned int srcCount, bool isSigned, 2774 roundingMode rounding_mode) { 2775 opStatus status; 2776 2777 if (isSigned && 2778 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2779 integerPart *copy; 2780 2781 /* If we're signed and negative negate a copy. */ 2782 sign = true; 2783 copy = new integerPart[srcCount]; 2784 APInt::tcAssign(copy, src, srcCount); 2785 APInt::tcNegate(copy, srcCount); 2786 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2787 delete [] copy; 2788 } else { 2789 sign = false; 2790 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2791 } 2792 2793 return status; 2794 } 2795 2796 /* FIXME: should this just take a const APInt reference? */ 2797 IEEEFloat::opStatus 2798 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2799 unsigned int width, bool isSigned, 2800 roundingMode rounding_mode) { 2801 unsigned int partCount = partCountForBits(width); 2802 APInt api = APInt(width, ArrayRef(parts, partCount)); 2803 2804 sign = false; 2805 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2806 sign = true; 2807 api = -api; 2808 } 2809 2810 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2811 } 2812 2813 Expected<IEEEFloat::opStatus> 2814 IEEEFloat::convertFromHexadecimalString(StringRef s, 2815 roundingMode rounding_mode) { 2816 lostFraction lost_fraction = lfExactlyZero; 2817 2818 category = fcNormal; 2819 zeroSignificand(); 2820 exponent = 0; 2821 2822 integerPart *significand = significandParts(); 2823 unsigned partsCount = partCount(); 2824 unsigned bitPos = partsCount * integerPartWidth; 2825 bool computedTrailingFraction = false; 2826 2827 // Skip leading zeroes and any (hexa)decimal point. 2828 StringRef::iterator begin = s.begin(); 2829 StringRef::iterator end = s.end(); 2830 StringRef::iterator dot; 2831 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2832 if (!PtrOrErr) 2833 return PtrOrErr.takeError(); 2834 StringRef::iterator p = *PtrOrErr; 2835 StringRef::iterator firstSignificantDigit = p; 2836 2837 while (p != end) { 2838 integerPart hex_value; 2839 2840 if (*p == '.') { 2841 if (dot != end) 2842 return createError("String contains multiple dots"); 2843 dot = p++; 2844 continue; 2845 } 2846 2847 hex_value = hexDigitValue(*p); 2848 if (hex_value == UINT_MAX) 2849 break; 2850 2851 p++; 2852 2853 // Store the number while we have space. 2854 if (bitPos) { 2855 bitPos -= 4; 2856 hex_value <<= bitPos % integerPartWidth; 2857 significand[bitPos / integerPartWidth] |= hex_value; 2858 } else if (!computedTrailingFraction) { 2859 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2860 if (!FractOrErr) 2861 return FractOrErr.takeError(); 2862 lost_fraction = *FractOrErr; 2863 computedTrailingFraction = true; 2864 } 2865 } 2866 2867 /* Hex floats require an exponent but not a hexadecimal point. */ 2868 if (p == end) 2869 return createError("Hex strings require an exponent"); 2870 if (*p != 'p' && *p != 'P') 2871 return createError("Invalid character in significand"); 2872 if (p == begin) 2873 return createError("Significand has no digits"); 2874 if (dot != end && p - begin == 1) 2875 return createError("Significand has no digits"); 2876 2877 /* Ignore the exponent if we are zero. */ 2878 if (p != firstSignificantDigit) { 2879 int expAdjustment; 2880 2881 /* Implicit hexadecimal point? */ 2882 if (dot == end) 2883 dot = p; 2884 2885 /* Calculate the exponent adjustment implicit in the number of 2886 significant digits. */ 2887 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2888 if (expAdjustment < 0) 2889 expAdjustment++; 2890 expAdjustment = expAdjustment * 4 - 1; 2891 2892 /* Adjust for writing the significand starting at the most 2893 significant nibble. */ 2894 expAdjustment += semantics->precision; 2895 expAdjustment -= partsCount * integerPartWidth; 2896 2897 /* Adjust for the given exponent. */ 2898 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2899 if (!ExpOrErr) 2900 return ExpOrErr.takeError(); 2901 exponent = *ExpOrErr; 2902 } 2903 2904 return normalize(rounding_mode, lost_fraction); 2905 } 2906 2907 IEEEFloat::opStatus 2908 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2909 unsigned sigPartCount, int exp, 2910 roundingMode rounding_mode) { 2911 unsigned int parts, pow5PartCount; 2912 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2913 integerPart pow5Parts[maxPowerOfFiveParts]; 2914 bool isNearest; 2915 2916 isNearest = (rounding_mode == rmNearestTiesToEven || 2917 rounding_mode == rmNearestTiesToAway); 2918 2919 parts = partCountForBits(semantics->precision + 11); 2920 2921 /* Calculate pow(5, abs(exp)). */ 2922 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2923 2924 for (;; parts *= 2) { 2925 opStatus sigStatus, powStatus; 2926 unsigned int excessPrecision, truncatedBits; 2927 2928 calcSemantics.precision = parts * integerPartWidth - 1; 2929 excessPrecision = calcSemantics.precision - semantics->precision; 2930 truncatedBits = excessPrecision; 2931 2932 IEEEFloat decSig(calcSemantics, uninitialized); 2933 decSig.makeZero(sign); 2934 IEEEFloat pow5(calcSemantics); 2935 2936 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2937 rmNearestTiesToEven); 2938 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2939 rmNearestTiesToEven); 2940 /* Add exp, as 10^n = 5^n * 2^n. */ 2941 decSig.exponent += exp; 2942 2943 lostFraction calcLostFraction; 2944 integerPart HUerr, HUdistance; 2945 unsigned int powHUerr; 2946 2947 if (exp >= 0) { 2948 /* multiplySignificand leaves the precision-th bit set to 1. */ 2949 calcLostFraction = decSig.multiplySignificand(pow5); 2950 powHUerr = powStatus != opOK; 2951 } else { 2952 calcLostFraction = decSig.divideSignificand(pow5); 2953 /* Denormal numbers have less precision. */ 2954 if (decSig.exponent < semantics->minExponent) { 2955 excessPrecision += (semantics->minExponent - decSig.exponent); 2956 truncatedBits = excessPrecision; 2957 if (excessPrecision > calcSemantics.precision) 2958 excessPrecision = calcSemantics.precision; 2959 } 2960 /* Extra half-ulp lost in reciprocal of exponent. */ 2961 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2962 } 2963 2964 /* Both multiplySignificand and divideSignificand return the 2965 result with the integer bit set. */ 2966 assert(APInt::tcExtractBit 2967 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2968 2969 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2970 powHUerr); 2971 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2972 excessPrecision, isNearest); 2973 2974 /* Are we guaranteed to round correctly if we truncate? */ 2975 if (HUdistance >= HUerr) { 2976 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2977 calcSemantics.precision - excessPrecision, 2978 excessPrecision); 2979 /* Take the exponent of decSig. If we tcExtract-ed less bits 2980 above we must adjust our exponent to compensate for the 2981 implicit right shift. */ 2982 exponent = (decSig.exponent + semantics->precision 2983 - (calcSemantics.precision - excessPrecision)); 2984 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2985 decSig.partCount(), 2986 truncatedBits); 2987 return normalize(rounding_mode, calcLostFraction); 2988 } 2989 } 2990 } 2991 2992 Expected<IEEEFloat::opStatus> 2993 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2994 decimalInfo D; 2995 opStatus fs; 2996 2997 /* Scan the text. */ 2998 StringRef::iterator p = str.begin(); 2999 if (Error Err = interpretDecimal(p, str.end(), &D)) 3000 return std::move(Err); 3001 3002 /* Handle the quick cases. First the case of no significant digits, 3003 i.e. zero, and then exponents that are obviously too large or too 3004 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3005 definitely overflows if 3006 3007 (exp - 1) * L >= maxExponent 3008 3009 and definitely underflows to zero where 3010 3011 (exp + 1) * L <= minExponent - precision 3012 3013 With integer arithmetic the tightest bounds for L are 3014 3015 93/28 < L < 196/59 [ numerator <= 256 ] 3016 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3017 */ 3018 3019 // Test if we have a zero number allowing for strings with no null terminators 3020 // and zero decimals with non-zero exponents. 3021 // 3022 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3023 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3024 // be at most one dot. On the other hand, if we have a zero with a non-zero 3025 // exponent, then we know that D.firstSigDigit will be non-numeric. 3026 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3027 category = fcZero; 3028 fs = opOK; 3029 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3030 sign = false; 3031 3032 /* Check whether the normalized exponent is high enough to overflow 3033 max during the log-rebasing in the max-exponent check below. */ 3034 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3035 fs = handleOverflow(rounding_mode); 3036 3037 /* If it wasn't, then it also wasn't high enough to overflow max 3038 during the log-rebasing in the min-exponent check. Check that it 3039 won't overflow min in either check, then perform the min-exponent 3040 check. */ 3041 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3042 (D.normalizedExponent + 1) * 28738 <= 3043 8651 * (semantics->minExponent - (int) semantics->precision)) { 3044 /* Underflow to zero and round. */ 3045 category = fcNormal; 3046 zeroSignificand(); 3047 fs = normalize(rounding_mode, lfLessThanHalf); 3048 3049 /* We can finally safely perform the max-exponent check. */ 3050 } else if ((D.normalizedExponent - 1) * 42039 3051 >= 12655 * semantics->maxExponent) { 3052 /* Overflow and round. */ 3053 fs = handleOverflow(rounding_mode); 3054 } else { 3055 integerPart *decSignificand; 3056 unsigned int partCount; 3057 3058 /* A tight upper bound on number of bits required to hold an 3059 N-digit decimal integer is N * 196 / 59. Allocate enough space 3060 to hold the full significand, and an extra part required by 3061 tcMultiplyPart. */ 3062 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3063 partCount = partCountForBits(1 + 196 * partCount / 59); 3064 decSignificand = new integerPart[partCount + 1]; 3065 partCount = 0; 3066 3067 /* Convert to binary efficiently - we do almost all multiplication 3068 in an integerPart. When this would overflow do we do a single 3069 bignum multiplication, and then revert again to multiplication 3070 in an integerPart. */ 3071 do { 3072 integerPart decValue, val, multiplier; 3073 3074 val = 0; 3075 multiplier = 1; 3076 3077 do { 3078 if (*p == '.') { 3079 p++; 3080 if (p == str.end()) { 3081 break; 3082 } 3083 } 3084 decValue = decDigitValue(*p++); 3085 if (decValue >= 10U) { 3086 delete[] decSignificand; 3087 return createError("Invalid character in significand"); 3088 } 3089 multiplier *= 10; 3090 val = val * 10 + decValue; 3091 /* The maximum number that can be multiplied by ten with any 3092 digit added without overflowing an integerPart. */ 3093 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3094 3095 /* Multiply out the current part. */ 3096 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3097 partCount, partCount + 1, false); 3098 3099 /* If we used another part (likely but not guaranteed), increase 3100 the count. */ 3101 if (decSignificand[partCount]) 3102 partCount++; 3103 } while (p <= D.lastSigDigit); 3104 3105 category = fcNormal; 3106 fs = roundSignificandWithExponent(decSignificand, partCount, 3107 D.exponent, rounding_mode); 3108 3109 delete [] decSignificand; 3110 } 3111 3112 return fs; 3113 } 3114 3115 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3116 const size_t MIN_NAME_SIZE = 3; 3117 3118 if (str.size() < MIN_NAME_SIZE) 3119 return false; 3120 3121 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3122 makeInf(false); 3123 return true; 3124 } 3125 3126 bool IsNegative = str.front() == '-'; 3127 if (IsNegative) { 3128 str = str.drop_front(); 3129 if (str.size() < MIN_NAME_SIZE) 3130 return false; 3131 3132 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3133 makeInf(true); 3134 return true; 3135 } 3136 } 3137 3138 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3139 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3140 if (IsSignaling) { 3141 str = str.drop_front(); 3142 if (str.size() < MIN_NAME_SIZE) 3143 return false; 3144 } 3145 3146 if (str.starts_with("nan") || str.starts_with("NaN")) { 3147 str = str.drop_front(3); 3148 3149 // A NaN without payload. 3150 if (str.empty()) { 3151 makeNaN(IsSignaling, IsNegative); 3152 return true; 3153 } 3154 3155 // Allow the payload to be inside parentheses. 3156 if (str.front() == '(') { 3157 // Parentheses should be balanced (and not empty). 3158 if (str.size() <= 2 || str.back() != ')') 3159 return false; 3160 3161 str = str.slice(1, str.size() - 1); 3162 } 3163 3164 // Determine the payload number's radix. 3165 unsigned Radix = 10; 3166 if (str[0] == '0') { 3167 if (str.size() > 1 && tolower(str[1]) == 'x') { 3168 str = str.drop_front(2); 3169 Radix = 16; 3170 } else 3171 Radix = 8; 3172 } 3173 3174 // Parse the payload and make the NaN. 3175 APInt Payload; 3176 if (!str.getAsInteger(Radix, Payload)) { 3177 makeNaN(IsSignaling, IsNegative, &Payload); 3178 return true; 3179 } 3180 } 3181 3182 return false; 3183 } 3184 3185 Expected<IEEEFloat::opStatus> 3186 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3187 if (str.empty()) 3188 return createError("Invalid string length"); 3189 3190 // Handle special cases. 3191 if (convertFromStringSpecials(str)) 3192 return opOK; 3193 3194 /* Handle a leading minus sign. */ 3195 StringRef::iterator p = str.begin(); 3196 size_t slen = str.size(); 3197 sign = *p == '-' ? 1 : 0; 3198 if (*p == '-' || *p == '+') { 3199 p++; 3200 slen--; 3201 if (!slen) 3202 return createError("String has no digits"); 3203 } 3204 3205 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3206 if (slen == 2) 3207 return createError("Invalid string"); 3208 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3209 rounding_mode); 3210 } 3211 3212 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3213 } 3214 3215 /* Write out a hexadecimal representation of the floating point value 3216 to DST, which must be of sufficient size, in the C99 form 3217 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3218 excluding the terminating NUL. 3219 3220 If UPPERCASE, the output is in upper case, otherwise in lower case. 3221 3222 HEXDIGITS digits appear altogether, rounding the value if 3223 necessary. If HEXDIGITS is 0, the minimal precision to display the 3224 number precisely is used instead. If nothing would appear after 3225 the decimal point it is suppressed. 3226 3227 The decimal exponent is always printed and has at least one digit. 3228 Zero values display an exponent of zero. Infinities and NaNs 3229 appear as "infinity" or "nan" respectively. 3230 3231 The above rules are as specified by C99. There is ambiguity about 3232 what the leading hexadecimal digit should be. This implementation 3233 uses whatever is necessary so that the exponent is displayed as 3234 stored. This implies the exponent will fall within the IEEE format 3235 range, and the leading hexadecimal digit will be 0 (for denormals), 3236 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3237 any other digits zero). 3238 */ 3239 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3240 bool upperCase, 3241 roundingMode rounding_mode) const { 3242 char *p; 3243 3244 p = dst; 3245 if (sign) 3246 *dst++ = '-'; 3247 3248 switch (category) { 3249 case fcInfinity: 3250 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3251 dst += sizeof infinityL - 1; 3252 break; 3253 3254 case fcNaN: 3255 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3256 dst += sizeof NaNU - 1; 3257 break; 3258 3259 case fcZero: 3260 *dst++ = '0'; 3261 *dst++ = upperCase ? 'X': 'x'; 3262 *dst++ = '0'; 3263 if (hexDigits > 1) { 3264 *dst++ = '.'; 3265 memset (dst, '0', hexDigits - 1); 3266 dst += hexDigits - 1; 3267 } 3268 *dst++ = upperCase ? 'P': 'p'; 3269 *dst++ = '0'; 3270 break; 3271 3272 case fcNormal: 3273 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3274 break; 3275 } 3276 3277 *dst = 0; 3278 3279 return static_cast<unsigned int>(dst - p); 3280 } 3281 3282 /* Does the hard work of outputting the correctly rounded hexadecimal 3283 form of a normal floating point number with the specified number of 3284 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3285 digits necessary to print the value precisely is output. */ 3286 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3287 bool upperCase, 3288 roundingMode rounding_mode) const { 3289 unsigned int count, valueBits, shift, partsCount, outputDigits; 3290 const char *hexDigitChars; 3291 const integerPart *significand; 3292 char *p; 3293 bool roundUp; 3294 3295 *dst++ = '0'; 3296 *dst++ = upperCase ? 'X': 'x'; 3297 3298 roundUp = false; 3299 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3300 3301 significand = significandParts(); 3302 partsCount = partCount(); 3303 3304 /* +3 because the first digit only uses the single integer bit, so 3305 we have 3 virtual zero most-significant-bits. */ 3306 valueBits = semantics->precision + 3; 3307 shift = integerPartWidth - valueBits % integerPartWidth; 3308 3309 /* The natural number of digits required ignoring trailing 3310 insignificant zeroes. */ 3311 outputDigits = (valueBits - significandLSB () + 3) / 4; 3312 3313 /* hexDigits of zero means use the required number for the 3314 precision. Otherwise, see if we are truncating. If we are, 3315 find out if we need to round away from zero. */ 3316 if (hexDigits) { 3317 if (hexDigits < outputDigits) { 3318 /* We are dropping non-zero bits, so need to check how to round. 3319 "bits" is the number of dropped bits. */ 3320 unsigned int bits; 3321 lostFraction fraction; 3322 3323 bits = valueBits - hexDigits * 4; 3324 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3325 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3326 } 3327 outputDigits = hexDigits; 3328 } 3329 3330 /* Write the digits consecutively, and start writing in the location 3331 of the hexadecimal point. We move the most significant digit 3332 left and add the hexadecimal point later. */ 3333 p = ++dst; 3334 3335 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3336 3337 while (outputDigits && count) { 3338 integerPart part; 3339 3340 /* Put the most significant integerPartWidth bits in "part". */ 3341 if (--count == partsCount) 3342 part = 0; /* An imaginary higher zero part. */ 3343 else 3344 part = significand[count] << shift; 3345 3346 if (count && shift) 3347 part |= significand[count - 1] >> (integerPartWidth - shift); 3348 3349 /* Convert as much of "part" to hexdigits as we can. */ 3350 unsigned int curDigits = integerPartWidth / 4; 3351 3352 if (curDigits > outputDigits) 3353 curDigits = outputDigits; 3354 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3355 outputDigits -= curDigits; 3356 } 3357 3358 if (roundUp) { 3359 char *q = dst; 3360 3361 /* Note that hexDigitChars has a trailing '0'. */ 3362 do { 3363 q--; 3364 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3365 } while (*q == '0'); 3366 assert(q >= p); 3367 } else { 3368 /* Add trailing zeroes. */ 3369 memset (dst, '0', outputDigits); 3370 dst += outputDigits; 3371 } 3372 3373 /* Move the most significant digit to before the point, and if there 3374 is something after the decimal point add it. This must come 3375 after rounding above. */ 3376 p[-1] = p[0]; 3377 if (dst -1 == p) 3378 dst--; 3379 else 3380 p[0] = '.'; 3381 3382 /* Finally output the exponent. */ 3383 *dst++ = upperCase ? 'P': 'p'; 3384 3385 return writeSignedDecimal (dst, exponent); 3386 } 3387 3388 hash_code hash_value(const IEEEFloat &Arg) { 3389 if (!Arg.isFiniteNonZero()) 3390 return hash_combine((uint8_t)Arg.category, 3391 // NaN has no sign, fix it at zero. 3392 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3393 Arg.semantics->precision); 3394 3395 // Normal floats need their exponent and significand hashed. 3396 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3397 Arg.semantics->precision, Arg.exponent, 3398 hash_combine_range( 3399 Arg.significandParts(), 3400 Arg.significandParts() + Arg.partCount())); 3401 } 3402 3403 // Conversion from APFloat to/from host float/double. It may eventually be 3404 // possible to eliminate these and have everybody deal with APFloats, but that 3405 // will take a while. This approach will not easily extend to long double. 3406 // Current implementation requires integerPartWidth==64, which is correct at 3407 // the moment but could be made more general. 3408 3409 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3410 // the actual IEEE respresentations. We compensate for that here. 3411 3412 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3413 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3414 assert(partCount()==2); 3415 3416 uint64_t myexponent, mysignificand; 3417 3418 if (isFiniteNonZero()) { 3419 myexponent = exponent+16383; //bias 3420 mysignificand = significandParts()[0]; 3421 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3422 myexponent = 0; // denormal 3423 } else if (category==fcZero) { 3424 myexponent = 0; 3425 mysignificand = 0; 3426 } else if (category==fcInfinity) { 3427 myexponent = 0x7fff; 3428 mysignificand = 0x8000000000000000ULL; 3429 } else { 3430 assert(category == fcNaN && "Unknown category"); 3431 myexponent = 0x7fff; 3432 mysignificand = significandParts()[0]; 3433 } 3434 3435 uint64_t words[2]; 3436 words[0] = mysignificand; 3437 words[1] = ((uint64_t)(sign & 1) << 15) | 3438 (myexponent & 0x7fffLL); 3439 return APInt(80, words); 3440 } 3441 3442 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3443 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3444 assert(partCount()==2); 3445 3446 uint64_t words[2]; 3447 opStatus fs; 3448 bool losesInfo; 3449 3450 // Convert number to double. To avoid spurious underflows, we re- 3451 // normalize against the "double" minExponent first, and only *then* 3452 // truncate the mantissa. The result of that second conversion 3453 // may be inexact, but should never underflow. 3454 // Declare fltSemantics before APFloat that uses it (and 3455 // saves pointer to it) to ensure correct destruction order. 3456 fltSemantics extendedSemantics = *semantics; 3457 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3458 IEEEFloat extended(*this); 3459 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3460 assert(fs == opOK && !losesInfo); 3461 (void)fs; 3462 3463 IEEEFloat u(extended); 3464 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3465 assert(fs == opOK || fs == opInexact); 3466 (void)fs; 3467 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3468 3469 // If conversion was exact or resulted in a special case, we're done; 3470 // just set the second double to zero. Otherwise, re-convert back to 3471 // the extended format and compute the difference. This now should 3472 // convert exactly to double. 3473 if (u.isFiniteNonZero() && losesInfo) { 3474 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3475 assert(fs == opOK && !losesInfo); 3476 (void)fs; 3477 3478 IEEEFloat v(extended); 3479 v.subtract(u, rmNearestTiesToEven); 3480 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3481 assert(fs == opOK && !losesInfo); 3482 (void)fs; 3483 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3484 } else { 3485 words[1] = 0; 3486 } 3487 3488 return APInt(128, words); 3489 } 3490 3491 template <const fltSemantics &S> 3492 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3493 assert(semantics == &S); 3494 3495 constexpr int bias = -(S.minExponent - 1); 3496 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3497 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3498 constexpr integerPart integer_bit = 3499 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3500 constexpr uint64_t significand_mask = integer_bit - 1; 3501 constexpr unsigned int exponent_bits = 3502 S.sizeInBits - 1 - trailing_significand_bits; 3503 static_assert(exponent_bits < 64); 3504 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3505 3506 uint64_t myexponent; 3507 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3508 mysignificand; 3509 3510 if (isFiniteNonZero()) { 3511 myexponent = exponent + bias; 3512 std::copy_n(significandParts(), mysignificand.size(), 3513 mysignificand.begin()); 3514 if (myexponent == 1 && 3515 !(significandParts()[integer_bit_part] & integer_bit)) 3516 myexponent = 0; // denormal 3517 } else if (category == fcZero) { 3518 myexponent = ::exponentZero(S) + bias; 3519 mysignificand.fill(0); 3520 } else if (category == fcInfinity) { 3521 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 3522 llvm_unreachable("semantics don't support inf!"); 3523 } 3524 myexponent = ::exponentInf(S) + bias; 3525 mysignificand.fill(0); 3526 } else { 3527 assert(category == fcNaN && "Unknown category!"); 3528 myexponent = ::exponentNaN(S) + bias; 3529 std::copy_n(significandParts(), mysignificand.size(), 3530 mysignificand.begin()); 3531 } 3532 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3533 auto words_iter = 3534 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3535 if constexpr (significand_mask != 0) { 3536 // Clear the integer bit. 3537 words[mysignificand.size() - 1] &= significand_mask; 3538 } 3539 std::fill(words_iter, words.end(), uint64_t{0}); 3540 constexpr size_t last_word = words.size() - 1; 3541 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3542 << ((S.sizeInBits - 1) % 64); 3543 words[last_word] |= shifted_sign; 3544 uint64_t shifted_exponent = (myexponent & exponent_mask) 3545 << (trailing_significand_bits % 64); 3546 words[last_word] |= shifted_exponent; 3547 if constexpr (last_word == 0) { 3548 return APInt(S.sizeInBits, words[0]); 3549 } 3550 return APInt(S.sizeInBits, words); 3551 } 3552 3553 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3554 assert(partCount() == 2); 3555 return convertIEEEFloatToAPInt<semIEEEquad>(); 3556 } 3557 3558 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3559 assert(partCount()==1); 3560 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3561 } 3562 3563 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3564 assert(partCount()==1); 3565 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3566 } 3567 3568 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3569 assert(partCount() == 1); 3570 return convertIEEEFloatToAPInt<semBFloat>(); 3571 } 3572 3573 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3574 assert(partCount()==1); 3575 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3576 } 3577 3578 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3579 assert(partCount() == 1); 3580 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3581 } 3582 3583 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3584 assert(partCount() == 1); 3585 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3586 } 3587 3588 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3589 assert(partCount() == 1); 3590 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3591 } 3592 3593 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3594 assert(partCount() == 1); 3595 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3596 } 3597 3598 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3599 assert(partCount() == 1); 3600 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3601 } 3602 3603 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3604 assert(partCount() == 1); 3605 return convertIEEEFloatToAPInt<semFloatTF32>(); 3606 } 3607 3608 // This function creates an APInt that is just a bit map of the floating 3609 // point constant as it would appear in memory. It is not a conversion, 3610 // and treating the result as a normal integer is unlikely to be useful. 3611 3612 APInt IEEEFloat::bitcastToAPInt() const { 3613 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3614 return convertHalfAPFloatToAPInt(); 3615 3616 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3617 return convertBFloatAPFloatToAPInt(); 3618 3619 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3620 return convertFloatAPFloatToAPInt(); 3621 3622 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3623 return convertDoubleAPFloatToAPInt(); 3624 3625 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3626 return convertQuadrupleAPFloatToAPInt(); 3627 3628 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3629 return convertPPCDoubleDoubleAPFloatToAPInt(); 3630 3631 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3632 return convertFloat8E5M2APFloatToAPInt(); 3633 3634 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3635 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3636 3637 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3638 return convertFloat8E4M3FNAPFloatToAPInt(); 3639 3640 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3641 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3642 3643 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3644 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3645 3646 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3647 return convertFloatTF32APFloatToAPInt(); 3648 3649 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3650 "unknown format!"); 3651 return convertF80LongDoubleAPFloatToAPInt(); 3652 } 3653 3654 float IEEEFloat::convertToFloat() const { 3655 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3656 "Float semantics are not IEEEsingle"); 3657 APInt api = bitcastToAPInt(); 3658 return api.bitsToFloat(); 3659 } 3660 3661 double IEEEFloat::convertToDouble() const { 3662 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3663 "Float semantics are not IEEEdouble"); 3664 APInt api = bitcastToAPInt(); 3665 return api.bitsToDouble(); 3666 } 3667 3668 #ifdef HAS_IEE754_FLOAT128 3669 float128 IEEEFloat::convertToQuad() const { 3670 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3671 "Float semantics are not IEEEquads"); 3672 APInt api = bitcastToAPInt(); 3673 return api.bitsToQuad(); 3674 } 3675 #endif 3676 3677 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3678 /// does not support these bit patterns: 3679 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3680 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3681 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3682 /// exponent = 0, integer bit 1 ("pseudodenormal") 3683 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3684 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3685 uint64_t i1 = api.getRawData()[0]; 3686 uint64_t i2 = api.getRawData()[1]; 3687 uint64_t myexponent = (i2 & 0x7fff); 3688 uint64_t mysignificand = i1; 3689 uint8_t myintegerbit = mysignificand >> 63; 3690 3691 initialize(&semX87DoubleExtended); 3692 assert(partCount()==2); 3693 3694 sign = static_cast<unsigned int>(i2>>15); 3695 if (myexponent == 0 && mysignificand == 0) { 3696 makeZero(sign); 3697 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3698 makeInf(sign); 3699 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3700 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3701 category = fcNaN; 3702 exponent = exponentNaN(); 3703 significandParts()[0] = mysignificand; 3704 significandParts()[1] = 0; 3705 } else { 3706 category = fcNormal; 3707 exponent = myexponent - 16383; 3708 significandParts()[0] = mysignificand; 3709 significandParts()[1] = 0; 3710 if (myexponent==0) // denormal 3711 exponent = -16382; 3712 } 3713 } 3714 3715 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3716 uint64_t i1 = api.getRawData()[0]; 3717 uint64_t i2 = api.getRawData()[1]; 3718 opStatus fs; 3719 bool losesInfo; 3720 3721 // Get the first double and convert to our format. 3722 initFromDoubleAPInt(APInt(64, i1)); 3723 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3724 assert(fs == opOK && !losesInfo); 3725 (void)fs; 3726 3727 // Unless we have a special case, add in second double. 3728 if (isFiniteNonZero()) { 3729 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3730 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3731 assert(fs == opOK && !losesInfo); 3732 (void)fs; 3733 3734 add(v, rmNearestTiesToEven); 3735 } 3736 } 3737 3738 template <const fltSemantics &S> 3739 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3740 assert(api.getBitWidth() == S.sizeInBits); 3741 constexpr integerPart integer_bit = integerPart{1} 3742 << ((S.precision - 1) % integerPartWidth); 3743 constexpr uint64_t significand_mask = integer_bit - 1; 3744 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3745 constexpr unsigned int stored_significand_parts = 3746 partCountForBits(trailing_significand_bits); 3747 constexpr unsigned int exponent_bits = 3748 S.sizeInBits - 1 - trailing_significand_bits; 3749 static_assert(exponent_bits < 64); 3750 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3751 constexpr int bias = -(S.minExponent - 1); 3752 3753 // Copy the bits of the significand. We need to clear out the exponent and 3754 // sign bit in the last word. 3755 std::array<integerPart, stored_significand_parts> mysignificand; 3756 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3757 if constexpr (significand_mask != 0) { 3758 mysignificand[mysignificand.size() - 1] &= significand_mask; 3759 } 3760 3761 // We assume the last word holds the sign bit, the exponent, and potentially 3762 // some of the trailing significand field. 3763 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3764 uint64_t myexponent = 3765 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3766 3767 initialize(&S); 3768 assert(partCount() == mysignificand.size()); 3769 3770 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3771 3772 bool all_zero_significand = 3773 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3774 3775 bool is_zero = myexponent == 0 && all_zero_significand; 3776 3777 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3778 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3779 makeInf(sign); 3780 return; 3781 } 3782 } 3783 3784 bool is_nan = false; 3785 3786 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3787 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3788 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3789 bool all_ones_significand = 3790 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3791 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3792 (!significand_mask || 3793 mysignificand[mysignificand.size() - 1] == significand_mask); 3794 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3795 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3796 is_nan = is_zero && sign; 3797 } 3798 3799 if (is_nan) { 3800 category = fcNaN; 3801 exponent = ::exponentNaN(S); 3802 std::copy_n(mysignificand.begin(), mysignificand.size(), 3803 significandParts()); 3804 return; 3805 } 3806 3807 if (is_zero) { 3808 makeZero(sign); 3809 return; 3810 } 3811 3812 category = fcNormal; 3813 exponent = myexponent - bias; 3814 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3815 if (myexponent == 0) // denormal 3816 exponent = S.minExponent; 3817 else 3818 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3819 } 3820 3821 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3822 initFromIEEEAPInt<semIEEEquad>(api); 3823 } 3824 3825 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3826 initFromIEEEAPInt<semIEEEdouble>(api); 3827 } 3828 3829 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3830 initFromIEEEAPInt<semIEEEsingle>(api); 3831 } 3832 3833 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3834 initFromIEEEAPInt<semBFloat>(api); 3835 } 3836 3837 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3838 initFromIEEEAPInt<semIEEEhalf>(api); 3839 } 3840 3841 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3842 initFromIEEEAPInt<semFloat8E5M2>(api); 3843 } 3844 3845 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3846 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3847 } 3848 3849 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3850 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3851 } 3852 3853 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3854 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3855 } 3856 3857 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3858 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3859 } 3860 3861 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3862 initFromIEEEAPInt<semFloatTF32>(api); 3863 } 3864 3865 /// Treat api as containing the bits of a floating point number. 3866 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3867 assert(api.getBitWidth() == Sem->sizeInBits); 3868 if (Sem == &semIEEEhalf) 3869 return initFromHalfAPInt(api); 3870 if (Sem == &semBFloat) 3871 return initFromBFloatAPInt(api); 3872 if (Sem == &semIEEEsingle) 3873 return initFromFloatAPInt(api); 3874 if (Sem == &semIEEEdouble) 3875 return initFromDoubleAPInt(api); 3876 if (Sem == &semX87DoubleExtended) 3877 return initFromF80LongDoubleAPInt(api); 3878 if (Sem == &semIEEEquad) 3879 return initFromQuadrupleAPInt(api); 3880 if (Sem == &semPPCDoubleDoubleLegacy) 3881 return initFromPPCDoubleDoubleAPInt(api); 3882 if (Sem == &semFloat8E5M2) 3883 return initFromFloat8E5M2APInt(api); 3884 if (Sem == &semFloat8E5M2FNUZ) 3885 return initFromFloat8E5M2FNUZAPInt(api); 3886 if (Sem == &semFloat8E4M3FN) 3887 return initFromFloat8E4M3FNAPInt(api); 3888 if (Sem == &semFloat8E4M3FNUZ) 3889 return initFromFloat8E4M3FNUZAPInt(api); 3890 if (Sem == &semFloat8E4M3B11FNUZ) 3891 return initFromFloat8E4M3B11FNUZAPInt(api); 3892 if (Sem == &semFloatTF32) 3893 return initFromFloatTF32APInt(api); 3894 3895 llvm_unreachable(nullptr); 3896 } 3897 3898 /// Make this number the largest magnitude normal number in the given 3899 /// semantics. 3900 void IEEEFloat::makeLargest(bool Negative) { 3901 // We want (in interchange format): 3902 // sign = {Negative} 3903 // exponent = 1..10 3904 // significand = 1..1 3905 category = fcNormal; 3906 sign = Negative; 3907 exponent = semantics->maxExponent; 3908 3909 // Use memset to set all but the highest integerPart to all ones. 3910 integerPart *significand = significandParts(); 3911 unsigned PartCount = partCount(); 3912 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3913 3914 // Set the high integerPart especially setting all unused top bits for 3915 // internal consistency. 3916 const unsigned NumUnusedHighBits = 3917 PartCount*integerPartWidth - semantics->precision; 3918 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3919 ? (~integerPart(0) >> NumUnusedHighBits) 3920 : 0; 3921 3922 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 3923 semantics->nanEncoding == fltNanEncoding::AllOnes) 3924 significand[0] &= ~integerPart(1); 3925 } 3926 3927 /// Make this number the smallest magnitude denormal number in the given 3928 /// semantics. 3929 void IEEEFloat::makeSmallest(bool Negative) { 3930 // We want (in interchange format): 3931 // sign = {Negative} 3932 // exponent = 0..0 3933 // significand = 0..01 3934 category = fcNormal; 3935 sign = Negative; 3936 exponent = semantics->minExponent; 3937 APInt::tcSet(significandParts(), 1, partCount()); 3938 } 3939 3940 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3941 // We want (in interchange format): 3942 // sign = {Negative} 3943 // exponent = 0..0 3944 // significand = 10..0 3945 3946 category = fcNormal; 3947 zeroSignificand(); 3948 sign = Negative; 3949 exponent = semantics->minExponent; 3950 APInt::tcSetBit(significandParts(), semantics->precision - 1); 3951 } 3952 3953 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3954 initFromAPInt(&Sem, API); 3955 } 3956 3957 IEEEFloat::IEEEFloat(float f) { 3958 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3959 } 3960 3961 IEEEFloat::IEEEFloat(double d) { 3962 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3963 } 3964 3965 namespace { 3966 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3967 Buffer.append(Str.begin(), Str.end()); 3968 } 3969 3970 /// Removes data from the given significand until it is no more 3971 /// precise than is required for the desired precision. 3972 void AdjustToPrecision(APInt &significand, 3973 int &exp, unsigned FormatPrecision) { 3974 unsigned bits = significand.getActiveBits(); 3975 3976 // 196/59 is a very slight overestimate of lg_2(10). 3977 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3978 3979 if (bits <= bitsRequired) return; 3980 3981 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3982 if (!tensRemovable) return; 3983 3984 exp += tensRemovable; 3985 3986 APInt divisor(significand.getBitWidth(), 1); 3987 APInt powten(significand.getBitWidth(), 10); 3988 while (true) { 3989 if (tensRemovable & 1) 3990 divisor *= powten; 3991 tensRemovable >>= 1; 3992 if (!tensRemovable) break; 3993 powten *= powten; 3994 } 3995 3996 significand = significand.udiv(divisor); 3997 3998 // Truncate the significand down to its active bit count. 3999 significand = significand.trunc(significand.getActiveBits()); 4000 } 4001 4002 4003 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4004 int &exp, unsigned FormatPrecision) { 4005 unsigned N = buffer.size(); 4006 if (N <= FormatPrecision) return; 4007 4008 // The most significant figures are the last ones in the buffer. 4009 unsigned FirstSignificant = N - FormatPrecision; 4010 4011 // Round. 4012 // FIXME: this probably shouldn't use 'round half up'. 4013 4014 // Rounding down is just a truncation, except we also want to drop 4015 // trailing zeros from the new result. 4016 if (buffer[FirstSignificant - 1] < '5') { 4017 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4018 FirstSignificant++; 4019 4020 exp += FirstSignificant; 4021 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4022 return; 4023 } 4024 4025 // Rounding up requires a decimal add-with-carry. If we continue 4026 // the carry, the newly-introduced zeros will just be truncated. 4027 for (unsigned I = FirstSignificant; I != N; ++I) { 4028 if (buffer[I] == '9') { 4029 FirstSignificant++; 4030 } else { 4031 buffer[I]++; 4032 break; 4033 } 4034 } 4035 4036 // If we carried through, we have exactly one digit of precision. 4037 if (FirstSignificant == N) { 4038 exp += FirstSignificant; 4039 buffer.clear(); 4040 buffer.push_back('1'); 4041 return; 4042 } 4043 4044 exp += FirstSignificant; 4045 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4046 } 4047 } // namespace 4048 4049 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4050 unsigned FormatMaxPadding, bool TruncateZero) const { 4051 switch (category) { 4052 case fcInfinity: 4053 if (isNegative()) 4054 return append(Str, "-Inf"); 4055 else 4056 return append(Str, "+Inf"); 4057 4058 case fcNaN: return append(Str, "NaN"); 4059 4060 case fcZero: 4061 if (isNegative()) 4062 Str.push_back('-'); 4063 4064 if (!FormatMaxPadding) { 4065 if (TruncateZero) 4066 append(Str, "0.0E+0"); 4067 else { 4068 append(Str, "0.0"); 4069 if (FormatPrecision > 1) 4070 Str.append(FormatPrecision - 1, '0'); 4071 append(Str, "e+00"); 4072 } 4073 } else 4074 Str.push_back('0'); 4075 return; 4076 4077 case fcNormal: 4078 break; 4079 } 4080 4081 if (isNegative()) 4082 Str.push_back('-'); 4083 4084 // Decompose the number into an APInt and an exponent. 4085 int exp = exponent - ((int) semantics->precision - 1); 4086 APInt significand( 4087 semantics->precision, 4088 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4089 4090 // Set FormatPrecision if zero. We want to do this before we 4091 // truncate trailing zeros, as those are part of the precision. 4092 if (!FormatPrecision) { 4093 // We use enough digits so the number can be round-tripped back to an 4094 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4095 // Accurately" by Steele and White. 4096 // FIXME: Using a formula based purely on the precision is conservative; 4097 // we can print fewer digits depending on the actual value being printed. 4098 4099 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4100 FormatPrecision = 2 + semantics->precision * 59 / 196; 4101 } 4102 4103 // Ignore trailing binary zeros. 4104 int trailingZeros = significand.countr_zero(); 4105 exp += trailingZeros; 4106 significand.lshrInPlace(trailingZeros); 4107 4108 // Change the exponent from 2^e to 10^e. 4109 if (exp == 0) { 4110 // Nothing to do. 4111 } else if (exp > 0) { 4112 // Just shift left. 4113 significand = significand.zext(semantics->precision + exp); 4114 significand <<= exp; 4115 exp = 0; 4116 } else { /* exp < 0 */ 4117 int texp = -exp; 4118 4119 // We transform this using the identity: 4120 // (N)(2^-e) == (N)(5^e)(10^-e) 4121 // This means we have to multiply N (the significand) by 5^e. 4122 // To avoid overflow, we have to operate on numbers large 4123 // enough to store N * 5^e: 4124 // log2(N * 5^e) == log2(N) + e * log2(5) 4125 // <= semantics->precision + e * 137 / 59 4126 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4127 4128 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 4129 4130 // Multiply significand by 5^e. 4131 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4132 significand = significand.zext(precision); 4133 APInt five_to_the_i(precision, 5); 4134 while (true) { 4135 if (texp & 1) significand *= five_to_the_i; 4136 4137 texp >>= 1; 4138 if (!texp) break; 4139 five_to_the_i *= five_to_the_i; 4140 } 4141 } 4142 4143 AdjustToPrecision(significand, exp, FormatPrecision); 4144 4145 SmallVector<char, 256> buffer; 4146 4147 // Fill the buffer. 4148 unsigned precision = significand.getBitWidth(); 4149 if (precision < 4) { 4150 // We need enough precision to store the value 10. 4151 precision = 4; 4152 significand = significand.zext(precision); 4153 } 4154 APInt ten(precision, 10); 4155 APInt digit(precision, 0); 4156 4157 bool inTrail = true; 4158 while (significand != 0) { 4159 // digit <- significand % 10 4160 // significand <- significand / 10 4161 APInt::udivrem(significand, ten, significand, digit); 4162 4163 unsigned d = digit.getZExtValue(); 4164 4165 // Drop trailing zeros. 4166 if (inTrail && !d) exp++; 4167 else { 4168 buffer.push_back((char) ('0' + d)); 4169 inTrail = false; 4170 } 4171 } 4172 4173 assert(!buffer.empty() && "no characters in buffer!"); 4174 4175 // Drop down to FormatPrecision. 4176 // TODO: don't do more precise calculations above than are required. 4177 AdjustToPrecision(buffer, exp, FormatPrecision); 4178 4179 unsigned NDigits = buffer.size(); 4180 4181 // Check whether we should use scientific notation. 4182 bool FormatScientific; 4183 if (!FormatMaxPadding) 4184 FormatScientific = true; 4185 else { 4186 if (exp >= 0) { 4187 // 765e3 --> 765000 4188 // ^^^ 4189 // But we shouldn't make the number look more precise than it is. 4190 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4191 NDigits + (unsigned) exp > FormatPrecision); 4192 } else { 4193 // Power of the most significant digit. 4194 int MSD = exp + (int) (NDigits - 1); 4195 if (MSD >= 0) { 4196 // 765e-2 == 7.65 4197 FormatScientific = false; 4198 } else { 4199 // 765e-5 == 0.00765 4200 // ^ ^^ 4201 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4202 } 4203 } 4204 } 4205 4206 // Scientific formatting is pretty straightforward. 4207 if (FormatScientific) { 4208 exp += (NDigits - 1); 4209 4210 Str.push_back(buffer[NDigits-1]); 4211 Str.push_back('.'); 4212 if (NDigits == 1 && TruncateZero) 4213 Str.push_back('0'); 4214 else 4215 for (unsigned I = 1; I != NDigits; ++I) 4216 Str.push_back(buffer[NDigits-1-I]); 4217 // Fill with zeros up to FormatPrecision. 4218 if (!TruncateZero && FormatPrecision > NDigits - 1) 4219 Str.append(FormatPrecision - NDigits + 1, '0'); 4220 // For !TruncateZero we use lower 'e'. 4221 Str.push_back(TruncateZero ? 'E' : 'e'); 4222 4223 Str.push_back(exp >= 0 ? '+' : '-'); 4224 if (exp < 0) exp = -exp; 4225 SmallVector<char, 6> expbuf; 4226 do { 4227 expbuf.push_back((char) ('0' + (exp % 10))); 4228 exp /= 10; 4229 } while (exp); 4230 // Exponent always at least two digits if we do not truncate zeros. 4231 if (!TruncateZero && expbuf.size() < 2) 4232 expbuf.push_back('0'); 4233 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4234 Str.push_back(expbuf[E-1-I]); 4235 return; 4236 } 4237 4238 // Non-scientific, positive exponents. 4239 if (exp >= 0) { 4240 for (unsigned I = 0; I != NDigits; ++I) 4241 Str.push_back(buffer[NDigits-1-I]); 4242 for (unsigned I = 0; I != (unsigned) exp; ++I) 4243 Str.push_back('0'); 4244 return; 4245 } 4246 4247 // Non-scientific, negative exponents. 4248 4249 // The number of digits to the left of the decimal point. 4250 int NWholeDigits = exp + (int) NDigits; 4251 4252 unsigned I = 0; 4253 if (NWholeDigits > 0) { 4254 for (; I != (unsigned) NWholeDigits; ++I) 4255 Str.push_back(buffer[NDigits-I-1]); 4256 Str.push_back('.'); 4257 } else { 4258 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4259 4260 Str.push_back('0'); 4261 Str.push_back('.'); 4262 for (unsigned Z = 1; Z != NZeros; ++Z) 4263 Str.push_back('0'); 4264 } 4265 4266 for (; I != NDigits; ++I) 4267 Str.push_back(buffer[NDigits-I-1]); 4268 } 4269 4270 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4271 // Special floats and denormals have no exact inverse. 4272 if (!isFiniteNonZero()) 4273 return false; 4274 4275 // Check that the number is a power of two by making sure that only the 4276 // integer bit is set in the significand. 4277 if (significandLSB() != semantics->precision - 1) 4278 return false; 4279 4280 // Get the inverse. 4281 IEEEFloat reciprocal(*semantics, 1ULL); 4282 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4283 return false; 4284 4285 // Avoid multiplication with a denormal, it is not safe on all platforms and 4286 // may be slower than a normal division. 4287 if (reciprocal.isDenormal()) 4288 return false; 4289 4290 assert(reciprocal.isFiniteNonZero() && 4291 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4292 4293 if (inv) 4294 *inv = APFloat(reciprocal, *semantics); 4295 4296 return true; 4297 } 4298 4299 int IEEEFloat::getExactLog2Abs() const { 4300 if (!isFinite() || isZero()) 4301 return INT_MIN; 4302 4303 const integerPart *Parts = significandParts(); 4304 const int PartCount = partCountForBits(semantics->precision); 4305 4306 int PopCount = 0; 4307 for (int i = 0; i < PartCount; ++i) { 4308 PopCount += llvm::popcount(Parts[i]); 4309 if (PopCount > 1) 4310 return INT_MIN; 4311 } 4312 4313 if (exponent != semantics->minExponent) 4314 return exponent; 4315 4316 int CountrParts = 0; 4317 for (int i = 0; i < PartCount; 4318 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4319 if (Parts[i] != 0) { 4320 return exponent - semantics->precision + CountrParts + 4321 llvm::countr_zero(Parts[i]) + 1; 4322 } 4323 } 4324 4325 llvm_unreachable("didn't find the set bit"); 4326 } 4327 4328 bool IEEEFloat::isSignaling() const { 4329 if (!isNaN()) 4330 return false; 4331 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 4332 return false; 4333 4334 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4335 // first bit of the trailing significand being 0. 4336 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4337 } 4338 4339 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4340 /// 4341 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4342 /// appropriate sign switching before/after the computation. 4343 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4344 // If we are performing nextDown, swap sign so we have -x. 4345 if (nextDown) 4346 changeSign(); 4347 4348 // Compute nextUp(x) 4349 opStatus result = opOK; 4350 4351 // Handle each float category separately. 4352 switch (category) { 4353 case fcInfinity: 4354 // nextUp(+inf) = +inf 4355 if (!isNegative()) 4356 break; 4357 // nextUp(-inf) = -getLargest() 4358 makeLargest(true); 4359 break; 4360 case fcNaN: 4361 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4362 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4363 // change the payload. 4364 if (isSignaling()) { 4365 result = opInvalidOp; 4366 // For consistency, propagate the sign of the sNaN to the qNaN. 4367 makeNaN(false, isNegative(), nullptr); 4368 } 4369 break; 4370 case fcZero: 4371 // nextUp(pm 0) = +getSmallest() 4372 makeSmallest(false); 4373 break; 4374 case fcNormal: 4375 // nextUp(-getSmallest()) = -0 4376 if (isSmallest() && isNegative()) { 4377 APInt::tcSet(significandParts(), 0, partCount()); 4378 category = fcZero; 4379 exponent = 0; 4380 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4381 sign = false; 4382 break; 4383 } 4384 4385 if (isLargest() && !isNegative()) { 4386 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4387 // nextUp(getLargest()) == NAN 4388 makeNaN(); 4389 break; 4390 } else { 4391 // nextUp(getLargest()) == INFINITY 4392 APInt::tcSet(significandParts(), 0, partCount()); 4393 category = fcInfinity; 4394 exponent = semantics->maxExponent + 1; 4395 break; 4396 } 4397 } 4398 4399 // nextUp(normal) == normal + inc. 4400 if (isNegative()) { 4401 // If we are negative, we need to decrement the significand. 4402 4403 // We only cross a binade boundary that requires adjusting the exponent 4404 // if: 4405 // 1. exponent != semantics->minExponent. This implies we are not in the 4406 // smallest binade or are dealing with denormals. 4407 // 2. Our significand excluding the integral bit is all zeros. 4408 bool WillCrossBinadeBoundary = 4409 exponent != semantics->minExponent && isSignificandAllZeros(); 4410 4411 // Decrement the significand. 4412 // 4413 // We always do this since: 4414 // 1. If we are dealing with a non-binade decrement, by definition we 4415 // just decrement the significand. 4416 // 2. If we are dealing with a normal -> normal binade decrement, since 4417 // we have an explicit integral bit the fact that all bits but the 4418 // integral bit are zero implies that subtracting one will yield a 4419 // significand with 0 integral bit and 1 in all other spots. Thus we 4420 // must just adjust the exponent and set the integral bit to 1. 4421 // 3. If we are dealing with a normal -> denormal binade decrement, 4422 // since we set the integral bit to 0 when we represent denormals, we 4423 // just decrement the significand. 4424 integerPart *Parts = significandParts(); 4425 APInt::tcDecrement(Parts, partCount()); 4426 4427 if (WillCrossBinadeBoundary) { 4428 // Our result is a normal number. Do the following: 4429 // 1. Set the integral bit to 1. 4430 // 2. Decrement the exponent. 4431 APInt::tcSetBit(Parts, semantics->precision - 1); 4432 exponent--; 4433 } 4434 } else { 4435 // If we are positive, we need to increment the significand. 4436 4437 // We only cross a binade boundary that requires adjusting the exponent if 4438 // the input is not a denormal and all of said input's significand bits 4439 // are set. If all of said conditions are true: clear the significand, set 4440 // the integral bit to 1, and increment the exponent. If we have a 4441 // denormal always increment since moving denormals and the numbers in the 4442 // smallest normal binade have the same exponent in our representation. 4443 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4444 4445 if (WillCrossBinadeBoundary) { 4446 integerPart *Parts = significandParts(); 4447 APInt::tcSet(Parts, 0, partCount()); 4448 APInt::tcSetBit(Parts, semantics->precision - 1); 4449 assert(exponent != semantics->maxExponent && 4450 "We can not increment an exponent beyond the maxExponent allowed" 4451 " by the given floating point semantics."); 4452 exponent++; 4453 } else { 4454 incrementSignificand(); 4455 } 4456 } 4457 break; 4458 } 4459 4460 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4461 if (nextDown) 4462 changeSign(); 4463 4464 return result; 4465 } 4466 4467 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4468 return ::exponentNaN(*semantics); 4469 } 4470 4471 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4472 return ::exponentInf(*semantics); 4473 } 4474 4475 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4476 return ::exponentZero(*semantics); 4477 } 4478 4479 void IEEEFloat::makeInf(bool Negative) { 4480 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4481 // There is no Inf, so make NaN instead. 4482 makeNaN(false, Negative); 4483 return; 4484 } 4485 category = fcInfinity; 4486 sign = Negative; 4487 exponent = exponentInf(); 4488 APInt::tcSet(significandParts(), 0, partCount()); 4489 } 4490 4491 void IEEEFloat::makeZero(bool Negative) { 4492 category = fcZero; 4493 sign = Negative; 4494 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4495 // Merge negative zero to positive because 0b10000...000 is used for NaN 4496 sign = false; 4497 } 4498 exponent = exponentZero(); 4499 APInt::tcSet(significandParts(), 0, partCount()); 4500 } 4501 4502 void IEEEFloat::makeQuiet() { 4503 assert(isNaN()); 4504 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4505 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4506 } 4507 4508 int ilogb(const IEEEFloat &Arg) { 4509 if (Arg.isNaN()) 4510 return IEEEFloat::IEK_NaN; 4511 if (Arg.isZero()) 4512 return IEEEFloat::IEK_Zero; 4513 if (Arg.isInfinity()) 4514 return IEEEFloat::IEK_Inf; 4515 if (!Arg.isDenormal()) 4516 return Arg.exponent; 4517 4518 IEEEFloat Normalized(Arg); 4519 int SignificandBits = Arg.getSemantics().precision - 1; 4520 4521 Normalized.exponent += SignificandBits; 4522 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4523 return Normalized.exponent - SignificandBits; 4524 } 4525 4526 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4527 auto MaxExp = X.getSemantics().maxExponent; 4528 auto MinExp = X.getSemantics().minExponent; 4529 4530 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4531 // overflow; clamp it to a safe range before adding, but ensure that the range 4532 // is large enough that the clamp does not change the result. The range we 4533 // need to support is the difference between the largest possible exponent and 4534 // the normalized exponent of half the smallest denormal. 4535 4536 int SignificandBits = X.getSemantics().precision - 1; 4537 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4538 4539 // Clamp to one past the range ends to let normalize handle overlflow. 4540 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4541 X.normalize(RoundingMode, lfExactlyZero); 4542 if (X.isNaN()) 4543 X.makeQuiet(); 4544 return X; 4545 } 4546 4547 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4548 Exp = ilogb(Val); 4549 4550 // Quiet signalling nans. 4551 if (Exp == IEEEFloat::IEK_NaN) { 4552 IEEEFloat Quiet(Val); 4553 Quiet.makeQuiet(); 4554 return Quiet; 4555 } 4556 4557 if (Exp == IEEEFloat::IEK_Inf) 4558 return Val; 4559 4560 // 1 is added because frexp is defined to return a normalized fraction in 4561 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4562 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4563 return scalbn(Val, -Exp, RM); 4564 } 4565 4566 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4567 : Semantics(&S), 4568 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4569 assert(Semantics == &semPPCDoubleDouble); 4570 } 4571 4572 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4573 : Semantics(&S), 4574 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4575 APFloat(semIEEEdouble, uninitialized)}) { 4576 assert(Semantics == &semPPCDoubleDouble); 4577 } 4578 4579 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4580 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4581 APFloat(semIEEEdouble)}) { 4582 assert(Semantics == &semPPCDoubleDouble); 4583 } 4584 4585 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4586 : Semantics(&S), 4587 Floats(new APFloat[2]{ 4588 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4589 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4590 assert(Semantics == &semPPCDoubleDouble); 4591 } 4592 4593 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4594 APFloat &&Second) 4595 : Semantics(&S), 4596 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4597 assert(Semantics == &semPPCDoubleDouble); 4598 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4599 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4600 } 4601 4602 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4603 : Semantics(RHS.Semantics), 4604 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4605 APFloat(RHS.Floats[1])} 4606 : nullptr) { 4607 assert(Semantics == &semPPCDoubleDouble); 4608 } 4609 4610 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4611 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4612 RHS.Semantics = &semBogus; 4613 assert(Semantics == &semPPCDoubleDouble); 4614 } 4615 4616 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4617 if (Semantics == RHS.Semantics && RHS.Floats) { 4618 Floats[0] = RHS.Floats[0]; 4619 Floats[1] = RHS.Floats[1]; 4620 } else if (this != &RHS) { 4621 this->~DoubleAPFloat(); 4622 new (this) DoubleAPFloat(RHS); 4623 } 4624 return *this; 4625 } 4626 4627 // Implement addition, subtraction, multiplication and division based on: 4628 // "Software for Doubled-Precision Floating-Point Computations", 4629 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4630 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4631 const APFloat &c, const APFloat &cc, 4632 roundingMode RM) { 4633 int Status = opOK; 4634 APFloat z = a; 4635 Status |= z.add(c, RM); 4636 if (!z.isFinite()) { 4637 if (!z.isInfinity()) { 4638 Floats[0] = std::move(z); 4639 Floats[1].makeZero(/* Neg = */ false); 4640 return (opStatus)Status; 4641 } 4642 Status = opOK; 4643 auto AComparedToC = a.compareAbsoluteValue(c); 4644 z = cc; 4645 Status |= z.add(aa, RM); 4646 if (AComparedToC == APFloat::cmpGreaterThan) { 4647 // z = cc + aa + c + a; 4648 Status |= z.add(c, RM); 4649 Status |= z.add(a, RM); 4650 } else { 4651 // z = cc + aa + a + c; 4652 Status |= z.add(a, RM); 4653 Status |= z.add(c, RM); 4654 } 4655 if (!z.isFinite()) { 4656 Floats[0] = std::move(z); 4657 Floats[1].makeZero(/* Neg = */ false); 4658 return (opStatus)Status; 4659 } 4660 Floats[0] = z; 4661 APFloat zz = aa; 4662 Status |= zz.add(cc, RM); 4663 if (AComparedToC == APFloat::cmpGreaterThan) { 4664 // Floats[1] = a - z + c + zz; 4665 Floats[1] = a; 4666 Status |= Floats[1].subtract(z, RM); 4667 Status |= Floats[1].add(c, RM); 4668 Status |= Floats[1].add(zz, RM); 4669 } else { 4670 // Floats[1] = c - z + a + zz; 4671 Floats[1] = c; 4672 Status |= Floats[1].subtract(z, RM); 4673 Status |= Floats[1].add(a, RM); 4674 Status |= Floats[1].add(zz, RM); 4675 } 4676 } else { 4677 // q = a - z; 4678 APFloat q = a; 4679 Status |= q.subtract(z, RM); 4680 4681 // zz = q + c + (a - (q + z)) + aa + cc; 4682 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4683 auto zz = q; 4684 Status |= zz.add(c, RM); 4685 Status |= q.add(z, RM); 4686 Status |= q.subtract(a, RM); 4687 q.changeSign(); 4688 Status |= zz.add(q, RM); 4689 Status |= zz.add(aa, RM); 4690 Status |= zz.add(cc, RM); 4691 if (zz.isZero() && !zz.isNegative()) { 4692 Floats[0] = std::move(z); 4693 Floats[1].makeZero(/* Neg = */ false); 4694 return opOK; 4695 } 4696 Floats[0] = z; 4697 Status |= Floats[0].add(zz, RM); 4698 if (!Floats[0].isFinite()) { 4699 Floats[1].makeZero(/* Neg = */ false); 4700 return (opStatus)Status; 4701 } 4702 Floats[1] = std::move(z); 4703 Status |= Floats[1].subtract(Floats[0], RM); 4704 Status |= Floats[1].add(zz, RM); 4705 } 4706 return (opStatus)Status; 4707 } 4708 4709 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4710 const DoubleAPFloat &RHS, 4711 DoubleAPFloat &Out, 4712 roundingMode RM) { 4713 if (LHS.getCategory() == fcNaN) { 4714 Out = LHS; 4715 return opOK; 4716 } 4717 if (RHS.getCategory() == fcNaN) { 4718 Out = RHS; 4719 return opOK; 4720 } 4721 if (LHS.getCategory() == fcZero) { 4722 Out = RHS; 4723 return opOK; 4724 } 4725 if (RHS.getCategory() == fcZero) { 4726 Out = LHS; 4727 return opOK; 4728 } 4729 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4730 LHS.isNegative() != RHS.isNegative()) { 4731 Out.makeNaN(false, Out.isNegative(), nullptr); 4732 return opInvalidOp; 4733 } 4734 if (LHS.getCategory() == fcInfinity) { 4735 Out = LHS; 4736 return opOK; 4737 } 4738 if (RHS.getCategory() == fcInfinity) { 4739 Out = RHS; 4740 return opOK; 4741 } 4742 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4743 4744 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4745 CC(RHS.Floats[1]); 4746 assert(&A.getSemantics() == &semIEEEdouble); 4747 assert(&AA.getSemantics() == &semIEEEdouble); 4748 assert(&C.getSemantics() == &semIEEEdouble); 4749 assert(&CC.getSemantics() == &semIEEEdouble); 4750 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4751 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4752 return Out.addImpl(A, AA, C, CC, RM); 4753 } 4754 4755 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4756 roundingMode RM) { 4757 return addWithSpecial(*this, RHS, *this, RM); 4758 } 4759 4760 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4761 roundingMode RM) { 4762 changeSign(); 4763 auto Ret = add(RHS, RM); 4764 changeSign(); 4765 return Ret; 4766 } 4767 4768 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4769 APFloat::roundingMode RM) { 4770 const auto &LHS = *this; 4771 auto &Out = *this; 4772 /* Interesting observation: For special categories, finding the lowest 4773 common ancestor of the following layered graph gives the correct 4774 return category: 4775 4776 NaN 4777 / \ 4778 Zero Inf 4779 \ / 4780 Normal 4781 4782 e.g. NaN * NaN = NaN 4783 Zero * Inf = NaN 4784 Normal * Zero = Zero 4785 Normal * Inf = Inf 4786 */ 4787 if (LHS.getCategory() == fcNaN) { 4788 Out = LHS; 4789 return opOK; 4790 } 4791 if (RHS.getCategory() == fcNaN) { 4792 Out = RHS; 4793 return opOK; 4794 } 4795 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4796 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4797 Out.makeNaN(false, false, nullptr); 4798 return opOK; 4799 } 4800 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4801 Out = LHS; 4802 return opOK; 4803 } 4804 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4805 Out = RHS; 4806 return opOK; 4807 } 4808 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4809 "Special cases not handled exhaustively"); 4810 4811 int Status = opOK; 4812 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4813 // t = a * c 4814 APFloat T = A; 4815 Status |= T.multiply(C, RM); 4816 if (!T.isFiniteNonZero()) { 4817 Floats[0] = T; 4818 Floats[1].makeZero(/* Neg = */ false); 4819 return (opStatus)Status; 4820 } 4821 4822 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4823 APFloat Tau = A; 4824 T.changeSign(); 4825 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4826 T.changeSign(); 4827 { 4828 // v = a * d 4829 APFloat V = A; 4830 Status |= V.multiply(D, RM); 4831 // w = b * c 4832 APFloat W = B; 4833 Status |= W.multiply(C, RM); 4834 Status |= V.add(W, RM); 4835 // tau += v + w 4836 Status |= Tau.add(V, RM); 4837 } 4838 // u = t + tau 4839 APFloat U = T; 4840 Status |= U.add(Tau, RM); 4841 4842 Floats[0] = U; 4843 if (!U.isFinite()) { 4844 Floats[1].makeZero(/* Neg = */ false); 4845 } else { 4846 // Floats[1] = (t - u) + tau 4847 Status |= T.subtract(U, RM); 4848 Status |= T.add(Tau, RM); 4849 Floats[1] = T; 4850 } 4851 return (opStatus)Status; 4852 } 4853 4854 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4855 APFloat::roundingMode RM) { 4856 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4857 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4858 auto Ret = 4859 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4860 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4861 return Ret; 4862 } 4863 4864 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4865 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4866 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4867 auto Ret = 4868 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4869 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4870 return Ret; 4871 } 4872 4873 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4874 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4875 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4876 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4877 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4878 return Ret; 4879 } 4880 4881 APFloat::opStatus 4882 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4883 const DoubleAPFloat &Addend, 4884 APFloat::roundingMode RM) { 4885 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4886 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4887 auto Ret = Tmp.fusedMultiplyAdd( 4888 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4889 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4890 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4891 return Ret; 4892 } 4893 4894 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4895 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4896 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4897 auto Ret = Tmp.roundToIntegral(RM); 4898 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4899 return Ret; 4900 } 4901 4902 void DoubleAPFloat::changeSign() { 4903 Floats[0].changeSign(); 4904 Floats[1].changeSign(); 4905 } 4906 4907 APFloat::cmpResult 4908 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4909 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4910 if (Result != cmpEqual) 4911 return Result; 4912 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4913 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4914 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4915 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4916 if (Against && !RHSAgainst) 4917 return cmpLessThan; 4918 if (!Against && RHSAgainst) 4919 return cmpGreaterThan; 4920 if (!Against && !RHSAgainst) 4921 return Result; 4922 if (Against && RHSAgainst) 4923 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4924 } 4925 return Result; 4926 } 4927 4928 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4929 return Floats[0].getCategory(); 4930 } 4931 4932 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4933 4934 void DoubleAPFloat::makeInf(bool Neg) { 4935 Floats[0].makeInf(Neg); 4936 Floats[1].makeZero(/* Neg = */ false); 4937 } 4938 4939 void DoubleAPFloat::makeZero(bool Neg) { 4940 Floats[0].makeZero(Neg); 4941 Floats[1].makeZero(/* Neg = */ false); 4942 } 4943 4944 void DoubleAPFloat::makeLargest(bool Neg) { 4945 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4946 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4947 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4948 if (Neg) 4949 changeSign(); 4950 } 4951 4952 void DoubleAPFloat::makeSmallest(bool Neg) { 4953 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4954 Floats[0].makeSmallest(Neg); 4955 Floats[1].makeZero(/* Neg = */ false); 4956 } 4957 4958 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4959 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4960 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4961 if (Neg) 4962 Floats[0].changeSign(); 4963 Floats[1].makeZero(/* Neg = */ false); 4964 } 4965 4966 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4967 Floats[0].makeNaN(SNaN, Neg, fill); 4968 Floats[1].makeZero(/* Neg = */ false); 4969 } 4970 4971 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4972 auto Result = Floats[0].compare(RHS.Floats[0]); 4973 // |Float[0]| > |Float[1]| 4974 if (Result == APFloat::cmpEqual) 4975 return Floats[1].compare(RHS.Floats[1]); 4976 return Result; 4977 } 4978 4979 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4980 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4981 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4982 } 4983 4984 hash_code hash_value(const DoubleAPFloat &Arg) { 4985 if (Arg.Floats) 4986 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4987 return hash_combine(Arg.Semantics); 4988 } 4989 4990 APInt DoubleAPFloat::bitcastToAPInt() const { 4991 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4992 uint64_t Data[] = { 4993 Floats[0].bitcastToAPInt().getRawData()[0], 4994 Floats[1].bitcastToAPInt().getRawData()[0], 4995 }; 4996 return APInt(128, 2, Data); 4997 } 4998 4999 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5000 roundingMode RM) { 5001 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5002 APFloat Tmp(semPPCDoubleDoubleLegacy); 5003 auto Ret = Tmp.convertFromString(S, RM); 5004 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5005 return Ret; 5006 } 5007 5008 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5009 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5010 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5011 auto Ret = Tmp.next(nextDown); 5012 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5013 return Ret; 5014 } 5015 5016 APFloat::opStatus 5017 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5018 unsigned int Width, bool IsSigned, 5019 roundingMode RM, bool *IsExact) const { 5020 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5021 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5022 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5023 } 5024 5025 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5026 bool IsSigned, 5027 roundingMode RM) { 5028 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5029 APFloat Tmp(semPPCDoubleDoubleLegacy); 5030 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5031 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5032 return Ret; 5033 } 5034 5035 APFloat::opStatus 5036 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5037 unsigned int InputSize, 5038 bool IsSigned, roundingMode RM) { 5039 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5040 APFloat Tmp(semPPCDoubleDoubleLegacy); 5041 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5042 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5043 return Ret; 5044 } 5045 5046 APFloat::opStatus 5047 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5048 unsigned int InputSize, 5049 bool IsSigned, roundingMode RM) { 5050 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5051 APFloat Tmp(semPPCDoubleDoubleLegacy); 5052 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5053 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5054 return Ret; 5055 } 5056 5057 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5058 unsigned int HexDigits, 5059 bool UpperCase, 5060 roundingMode RM) const { 5061 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5062 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5063 .convertToHexString(DST, HexDigits, UpperCase, RM); 5064 } 5065 5066 bool DoubleAPFloat::isDenormal() const { 5067 return getCategory() == fcNormal && 5068 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5069 // (double)(Hi + Lo) == Hi defines a normal number. 5070 Floats[0] != Floats[0] + Floats[1]); 5071 } 5072 5073 bool DoubleAPFloat::isSmallest() const { 5074 if (getCategory() != fcNormal) 5075 return false; 5076 DoubleAPFloat Tmp(*this); 5077 Tmp.makeSmallest(this->isNegative()); 5078 return Tmp.compare(*this) == cmpEqual; 5079 } 5080 5081 bool DoubleAPFloat::isSmallestNormalized() const { 5082 if (getCategory() != fcNormal) 5083 return false; 5084 5085 DoubleAPFloat Tmp(*this); 5086 Tmp.makeSmallestNormalized(this->isNegative()); 5087 return Tmp.compare(*this) == cmpEqual; 5088 } 5089 5090 bool DoubleAPFloat::isLargest() const { 5091 if (getCategory() != fcNormal) 5092 return false; 5093 DoubleAPFloat Tmp(*this); 5094 Tmp.makeLargest(this->isNegative()); 5095 return Tmp.compare(*this) == cmpEqual; 5096 } 5097 5098 bool DoubleAPFloat::isInteger() const { 5099 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5100 return Floats[0].isInteger() && Floats[1].isInteger(); 5101 } 5102 5103 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5104 unsigned FormatPrecision, 5105 unsigned FormatMaxPadding, 5106 bool TruncateZero) const { 5107 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5108 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5109 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5110 } 5111 5112 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5113 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5114 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5115 if (!inv) 5116 return Tmp.getExactInverse(nullptr); 5117 APFloat Inv(semPPCDoubleDoubleLegacy); 5118 auto Ret = Tmp.getExactInverse(&Inv); 5119 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5120 return Ret; 5121 } 5122 5123 int DoubleAPFloat::getExactLog2() const { 5124 // TODO: Implement me 5125 return INT_MIN; 5126 } 5127 5128 int DoubleAPFloat::getExactLog2Abs() const { 5129 // TODO: Implement me 5130 return INT_MIN; 5131 } 5132 5133 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5134 APFloat::roundingMode RM) { 5135 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5136 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5137 scalbn(Arg.Floats[1], Exp, RM)); 5138 } 5139 5140 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5141 APFloat::roundingMode RM) { 5142 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5143 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5144 APFloat Second = Arg.Floats[1]; 5145 if (Arg.getCategory() == APFloat::fcNormal) 5146 Second = scalbn(Second, -Exp, RM); 5147 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5148 } 5149 5150 } // namespace detail 5151 5152 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5153 if (usesLayout<IEEEFloat>(Semantics)) { 5154 new (&IEEE) IEEEFloat(std::move(F)); 5155 return; 5156 } 5157 if (usesLayout<DoubleAPFloat>(Semantics)) { 5158 const fltSemantics& S = F.getSemantics(); 5159 new (&Double) 5160 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5161 APFloat(semIEEEdouble)); 5162 return; 5163 } 5164 llvm_unreachable("Unexpected semantics"); 5165 } 5166 5167 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5168 roundingMode RM) { 5169 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5170 } 5171 5172 hash_code hash_value(const APFloat &Arg) { 5173 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5174 return hash_value(Arg.U.IEEE); 5175 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5176 return hash_value(Arg.U.Double); 5177 llvm_unreachable("Unexpected semantics"); 5178 } 5179 5180 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5181 : APFloat(Semantics) { 5182 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5183 assert(StatusOrErr && "Invalid floating point representation"); 5184 consumeError(StatusOrErr.takeError()); 5185 } 5186 5187 FPClassTest APFloat::classify() const { 5188 if (isZero()) 5189 return isNegative() ? fcNegZero : fcPosZero; 5190 if (isNormal()) 5191 return isNegative() ? fcNegNormal : fcPosNormal; 5192 if (isDenormal()) 5193 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5194 if (isInfinity()) 5195 return isNegative() ? fcNegInf : fcPosInf; 5196 assert(isNaN() && "Other class of FP constant"); 5197 return isSignaling() ? fcSNan : fcQNan; 5198 } 5199 5200 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5201 roundingMode RM, bool *losesInfo) { 5202 if (&getSemantics() == &ToSemantics) { 5203 *losesInfo = false; 5204 return opOK; 5205 } 5206 if (usesLayout<IEEEFloat>(getSemantics()) && 5207 usesLayout<IEEEFloat>(ToSemantics)) 5208 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5209 if (usesLayout<IEEEFloat>(getSemantics()) && 5210 usesLayout<DoubleAPFloat>(ToSemantics)) { 5211 assert(&ToSemantics == &semPPCDoubleDouble); 5212 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5213 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5214 return Ret; 5215 } 5216 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5217 usesLayout<IEEEFloat>(ToSemantics)) { 5218 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5219 *this = APFloat(std::move(getIEEE()), ToSemantics); 5220 return Ret; 5221 } 5222 llvm_unreachable("Unexpected semantics"); 5223 } 5224 5225 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5226 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5227 } 5228 5229 void APFloat::print(raw_ostream &OS) const { 5230 SmallVector<char, 16> Buffer; 5231 toString(Buffer); 5232 OS << Buffer << "\n"; 5233 } 5234 5235 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5236 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5237 #endif 5238 5239 void APFloat::Profile(FoldingSetNodeID &NID) const { 5240 NID.Add(bitcastToAPInt()); 5241 } 5242 5243 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5244 an APSInt, whose initial bit-width and signed-ness are used to determine the 5245 precision of the conversion. 5246 */ 5247 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5248 roundingMode rounding_mode, 5249 bool *isExact) const { 5250 unsigned bitWidth = result.getBitWidth(); 5251 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5252 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5253 rounding_mode, isExact); 5254 // Keeps the original signed-ness. 5255 result = APInt(bitWidth, parts); 5256 return status; 5257 } 5258 5259 double APFloat::convertToDouble() const { 5260 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5261 return getIEEE().convertToDouble(); 5262 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5263 "Float semantics is not representable by IEEEdouble"); 5264 APFloat Temp = *this; 5265 bool LosesInfo; 5266 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5267 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5268 (void)St; 5269 return Temp.getIEEE().convertToDouble(); 5270 } 5271 5272 #ifdef HAS_IEE754_FLOAT128 5273 float128 APFloat::convertToQuad() const { 5274 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5275 return getIEEE().convertToQuad(); 5276 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5277 "Float semantics is not representable by IEEEquad"); 5278 APFloat Temp = *this; 5279 bool LosesInfo; 5280 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5281 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5282 (void)St; 5283 return Temp.getIEEE().convertToQuad(); 5284 } 5285 #endif 5286 5287 float APFloat::convertToFloat() const { 5288 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5289 return getIEEE().convertToFloat(); 5290 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5291 "Float semantics is not representable by IEEEsingle"); 5292 APFloat Temp = *this; 5293 bool LosesInfo; 5294 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5295 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5296 (void)St; 5297 return Temp.getIEEE().convertToFloat(); 5298 } 5299 5300 } // namespace llvm 5301 5302 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5303