1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 }; 72 73 // How NaN values are represented. This is curently only used in combination 74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 75 // while having IEEE non-finite behavior is liable to lead to unexpected 76 // results. 77 enum class fltNanEncoding { 78 // Represents the standard IEEE behavior where a value is NaN if its 79 // exponent is all 1s and the significand is non-zero. 80 IEEE, 81 82 // Represents the behavior in the Float8E4M3 floating point type where NaN is 83 // represented by having the exponent and mantissa set to all 1s. 84 // This behavior matches the FP8 E4M3 type described in 85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 86 // as non-signalling, although the paper does not state whether the NaN 87 // values are signalling or not. 88 AllOnes, 89 90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 93 // there is only one NaN value, it is treated as quiet NaN. This matches the 94 // behavior described in https://arxiv.org/abs/2206.02915 . 95 NegativeZero, 96 }; 97 98 /* Represents floating point arithmetic semantics. */ 99 struct fltSemantics { 100 /* The largest E such that 2^E is representable; this matches the 101 definition of IEEE 754. */ 102 APFloatBase::ExponentType maxExponent; 103 104 /* The smallest E such that 2^E is a normalized number; this 105 matches the definition of IEEE 754. */ 106 APFloatBase::ExponentType minExponent; 107 108 /* Number of bits in the significand. This includes the integer 109 bit. */ 110 unsigned int precision; 111 112 /* Number of bits actually used in the semantics. */ 113 unsigned int sizeInBits; 114 115 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 116 117 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 118 // Returns true if any number described by this semantics can be precisely 119 // represented by the specified semantics. Does not take into account 120 // the value of fltNonfiniteBehavior. 121 bool isRepresentableBy(const fltSemantics &S) const { 122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 123 precision <= S.precision; 124 } 125 }; 126 127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 133 static constexpr fltSemantics semFloat8E5M2FNUZ = { 134 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 135 static constexpr fltSemantics semFloat8E4M3FN = { 136 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 137 static constexpr fltSemantics semFloat8E4M3FNUZ = { 138 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 140 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 143 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 144 145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 149 to each other, and two 11-bit exponents. 150 151 Note: we need to make the value different from semBogus as otherwise 152 an unsafe optimization may collapse both values to a single address, 153 and we heavily rely on them having distinct addresses. */ 154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 155 156 /* These are legacy semantics for the fallback, inaccrurate implementation of 157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 158 operation. It's equivalent to having an IEEE number with consecutive 106 159 bits of mantissa and 11 bits of exponent. 160 161 It's not equivalent to IBM double-double. For example, a legit IBM 162 double-double, 1 + epsilon: 163 164 1 + epsilon = 1 + (1 >> 1076) 165 166 is not representable by a consecutive 106 bits of mantissa. 167 168 Currently, these semantics are used in the following way: 169 170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 172 semPPCDoubleDoubleLegacy -> IEEE operations 173 174 We use bitcastToAPInt() to get the bit representation (in APInt) of the 175 underlying IEEEdouble, then use the APInt constructor to construct the 176 legacy IEEE float. 177 178 TODO: Implement all operations in semPPCDoubleDouble, and delete these 179 semantics. */ 180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 181 53 + 53, 128}; 182 183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 184 switch (S) { 185 case S_IEEEhalf: 186 return IEEEhalf(); 187 case S_BFloat: 188 return BFloat(); 189 case S_IEEEsingle: 190 return IEEEsingle(); 191 case S_IEEEdouble: 192 return IEEEdouble(); 193 case S_IEEEquad: 194 return IEEEquad(); 195 case S_PPCDoubleDouble: 196 return PPCDoubleDouble(); 197 case S_Float8E5M2: 198 return Float8E5M2(); 199 case S_Float8E5M2FNUZ: 200 return Float8E5M2FNUZ(); 201 case S_Float8E4M3FN: 202 return Float8E4M3FN(); 203 case S_Float8E4M3FNUZ: 204 return Float8E4M3FNUZ(); 205 case S_Float8E4M3B11FNUZ: 206 return Float8E4M3B11FNUZ(); 207 case S_FloatTF32: 208 return FloatTF32(); 209 case S_x87DoubleExtended: 210 return x87DoubleExtended(); 211 } 212 llvm_unreachable("Unrecognised floating semantics"); 213 } 214 215 APFloatBase::Semantics 216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 217 if (&Sem == &llvm::APFloat::IEEEhalf()) 218 return S_IEEEhalf; 219 else if (&Sem == &llvm::APFloat::BFloat()) 220 return S_BFloat; 221 else if (&Sem == &llvm::APFloat::IEEEsingle()) 222 return S_IEEEsingle; 223 else if (&Sem == &llvm::APFloat::IEEEdouble()) 224 return S_IEEEdouble; 225 else if (&Sem == &llvm::APFloat::IEEEquad()) 226 return S_IEEEquad; 227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 228 return S_PPCDoubleDouble; 229 else if (&Sem == &llvm::APFloat::Float8E5M2()) 230 return S_Float8E5M2; 231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 232 return S_Float8E5M2FNUZ; 233 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 234 return S_Float8E4M3FN; 235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 236 return S_Float8E4M3FNUZ; 237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 238 return S_Float8E4M3B11FNUZ; 239 else if (&Sem == &llvm::APFloat::FloatTF32()) 240 return S_FloatTF32; 241 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 242 return S_x87DoubleExtended; 243 else 244 llvm_unreachable("Unknown floating semantics"); 245 } 246 247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 252 const fltSemantics &APFloatBase::PPCDoubleDouble() { 253 return semPPCDoubleDouble; 254 } 255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 260 return semFloat8E4M3B11FNUZ; 261 } 262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 263 const fltSemantics &APFloatBase::x87DoubleExtended() { 264 return semX87DoubleExtended; 265 } 266 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 267 268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 269 constexpr RoundingMode APFloatBase::rmTowardPositive; 270 constexpr RoundingMode APFloatBase::rmTowardNegative; 271 constexpr RoundingMode APFloatBase::rmTowardZero; 272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 273 274 /* A tight upper bound on number of parts required to hold the value 275 pow(5, power) is 276 277 power * 815 / (351 * integerPartWidth) + 1 278 279 However, whilst the result may require only this many parts, 280 because we are multiplying two values to get it, the 281 multiplication may require an extra part with the excess part 282 being zero (consider the trivial case of 1 * 1, tcFullMultiply 283 requires two parts to hold the single-part result). So we add an 284 extra one to guarantee enough space whilst multiplying. */ 285 const unsigned int maxExponent = 16383; 286 const unsigned int maxPrecision = 113; 287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 288 const unsigned int maxPowerOfFiveParts = 289 2 + 290 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 291 292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 293 return semantics.precision; 294 } 295 APFloatBase::ExponentType 296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 297 return semantics.maxExponent; 298 } 299 APFloatBase::ExponentType 300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 301 return semantics.minExponent; 302 } 303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 304 return semantics.sizeInBits; 305 } 306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 307 bool isSigned) { 308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 309 // at least one more bit than the MaxExponent to hold the max FP value. 310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 311 // Extra sign bit needed. 312 if (isSigned) 313 ++MinBitWidth; 314 return MinBitWidth; 315 } 316 317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 318 const fltSemantics &Dst) { 319 // Exponent range must be larger. 320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 321 return false; 322 323 // If the mantissa is long enough, the result value could still be denormal 324 // with a larger exponent range. 325 // 326 // FIXME: This condition is probably not accurate but also shouldn't be a 327 // practical concern with existing types. 328 return Dst.precision >= Src.precision; 329 } 330 331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 332 return Sem.sizeInBits; 333 } 334 335 static constexpr APFloatBase::ExponentType 336 exponentZero(const fltSemantics &semantics) { 337 return semantics.minExponent - 1; 338 } 339 340 static constexpr APFloatBase::ExponentType 341 exponentInf(const fltSemantics &semantics) { 342 return semantics.maxExponent + 1; 343 } 344 345 static constexpr APFloatBase::ExponentType 346 exponentNaN(const fltSemantics &semantics) { 347 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 348 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 349 return exponentZero(semantics); 350 return semantics.maxExponent; 351 } 352 return semantics.maxExponent + 1; 353 } 354 355 /* A bunch of private, handy routines. */ 356 357 static inline Error createError(const Twine &Err) { 358 return make_error<StringError>(Err, inconvertibleErrorCode()); 359 } 360 361 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 362 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 363 } 364 365 /* Returns 0U-9U. Return values >= 10U are not digits. */ 366 static inline unsigned int 367 decDigitValue(unsigned int c) 368 { 369 return c - '0'; 370 } 371 372 /* Return the value of a decimal exponent of the form 373 [+-]ddddddd. 374 375 If the exponent overflows, returns a large exponent with the 376 appropriate sign. */ 377 static Expected<int> readExponent(StringRef::iterator begin, 378 StringRef::iterator end) { 379 bool isNegative; 380 unsigned int absExponent; 381 const unsigned int overlargeExponent = 24000; /* FIXME. */ 382 StringRef::iterator p = begin; 383 384 // Treat no exponent as 0 to match binutils 385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 386 return 0; 387 } 388 389 isNegative = (*p == '-'); 390 if (*p == '-' || *p == '+') { 391 p++; 392 if (p == end) 393 return createError("Exponent has no digits"); 394 } 395 396 absExponent = decDigitValue(*p++); 397 if (absExponent >= 10U) 398 return createError("Invalid character in exponent"); 399 400 for (; p != end; ++p) { 401 unsigned int value; 402 403 value = decDigitValue(*p); 404 if (value >= 10U) 405 return createError("Invalid character in exponent"); 406 407 absExponent = absExponent * 10U + value; 408 if (absExponent >= overlargeExponent) { 409 absExponent = overlargeExponent; 410 break; 411 } 412 } 413 414 if (isNegative) 415 return -(int) absExponent; 416 else 417 return (int) absExponent; 418 } 419 420 /* This is ugly and needs cleaning up, but I don't immediately see 421 how whilst remaining safe. */ 422 static Expected<int> totalExponent(StringRef::iterator p, 423 StringRef::iterator end, 424 int exponentAdjustment) { 425 int unsignedExponent; 426 bool negative, overflow; 427 int exponent = 0; 428 429 if (p == end) 430 return createError("Exponent has no digits"); 431 432 negative = *p == '-'; 433 if (*p == '-' || *p == '+') { 434 p++; 435 if (p == end) 436 return createError("Exponent has no digits"); 437 } 438 439 unsignedExponent = 0; 440 overflow = false; 441 for (; p != end; ++p) { 442 unsigned int value; 443 444 value = decDigitValue(*p); 445 if (value >= 10U) 446 return createError("Invalid character in exponent"); 447 448 unsignedExponent = unsignedExponent * 10 + value; 449 if (unsignedExponent > 32767) { 450 overflow = true; 451 break; 452 } 453 } 454 455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 456 overflow = true; 457 458 if (!overflow) { 459 exponent = unsignedExponent; 460 if (negative) 461 exponent = -exponent; 462 exponent += exponentAdjustment; 463 if (exponent > 32767 || exponent < -32768) 464 overflow = true; 465 } 466 467 if (overflow) 468 exponent = negative ? -32768: 32767; 469 470 return exponent; 471 } 472 473 static Expected<StringRef::iterator> 474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 475 StringRef::iterator *dot) { 476 StringRef::iterator p = begin; 477 *dot = end; 478 while (p != end && *p == '0') 479 p++; 480 481 if (p != end && *p == '.') { 482 *dot = p++; 483 484 if (end - begin == 1) 485 return createError("Significand has no digits"); 486 487 while (p != end && *p == '0') 488 p++; 489 } 490 491 return p; 492 } 493 494 /* Given a normal decimal floating point number of the form 495 496 dddd.dddd[eE][+-]ddd 497 498 where the decimal point and exponent are optional, fill out the 499 structure D. Exponent is appropriate if the significand is 500 treated as an integer, and normalizedExponent if the significand 501 is taken to have the decimal point after a single leading 502 non-zero digit. 503 504 If the value is zero, V->firstSigDigit points to a non-digit, and 505 the return exponent is zero. 506 */ 507 struct decimalInfo { 508 const char *firstSigDigit; 509 const char *lastSigDigit; 510 int exponent; 511 int normalizedExponent; 512 }; 513 514 static Error interpretDecimal(StringRef::iterator begin, 515 StringRef::iterator end, decimalInfo *D) { 516 StringRef::iterator dot = end; 517 518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 519 if (!PtrOrErr) 520 return PtrOrErr.takeError(); 521 StringRef::iterator p = *PtrOrErr; 522 523 D->firstSigDigit = p; 524 D->exponent = 0; 525 D->normalizedExponent = 0; 526 527 for (; p != end; ++p) { 528 if (*p == '.') { 529 if (dot != end) 530 return createError("String contains multiple dots"); 531 dot = p++; 532 if (p == end) 533 break; 534 } 535 if (decDigitValue(*p) >= 10U) 536 break; 537 } 538 539 if (p != end) { 540 if (*p != 'e' && *p != 'E') 541 return createError("Invalid character in significand"); 542 if (p == begin) 543 return createError("Significand has no digits"); 544 if (dot != end && p - begin == 1) 545 return createError("Significand has no digits"); 546 547 /* p points to the first non-digit in the string */ 548 auto ExpOrErr = readExponent(p + 1, end); 549 if (!ExpOrErr) 550 return ExpOrErr.takeError(); 551 D->exponent = *ExpOrErr; 552 553 /* Implied decimal point? */ 554 if (dot == end) 555 dot = p; 556 } 557 558 /* If number is all zeroes accept any exponent. */ 559 if (p != D->firstSigDigit) { 560 /* Drop insignificant trailing zeroes. */ 561 if (p != begin) { 562 do 563 do 564 p--; 565 while (p != begin && *p == '0'); 566 while (p != begin && *p == '.'); 567 } 568 569 /* Adjust the exponents for any decimal point. */ 570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 571 D->normalizedExponent = (D->exponent + 572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 573 - (dot > D->firstSigDigit && dot < p))); 574 } 575 576 D->lastSigDigit = p; 577 return Error::success(); 578 } 579 580 /* Return the trailing fraction of a hexadecimal number. 581 DIGITVALUE is the first hex digit of the fraction, P points to 582 the next digit. */ 583 static Expected<lostFraction> 584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 585 unsigned int digitValue) { 586 unsigned int hexDigit; 587 588 /* If the first trailing digit isn't 0 or 8 we can work out the 589 fraction immediately. */ 590 if (digitValue > 8) 591 return lfMoreThanHalf; 592 else if (digitValue < 8 && digitValue > 0) 593 return lfLessThanHalf; 594 595 // Otherwise we need to find the first non-zero digit. 596 while (p != end && (*p == '0' || *p == '.')) 597 p++; 598 599 if (p == end) 600 return createError("Invalid trailing hexadecimal fraction!"); 601 602 hexDigit = hexDigitValue(*p); 603 604 /* If we ran off the end it is exactly zero or one-half, otherwise 605 a little more. */ 606 if (hexDigit == UINT_MAX) 607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 608 else 609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 610 } 611 612 /* Return the fraction lost were a bignum truncated losing the least 613 significant BITS bits. */ 614 static lostFraction 615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 616 unsigned int partCount, 617 unsigned int bits) 618 { 619 unsigned int lsb; 620 621 lsb = APInt::tcLSB(parts, partCount); 622 623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 624 if (bits <= lsb) 625 return lfExactlyZero; 626 if (bits == lsb + 1) 627 return lfExactlyHalf; 628 if (bits <= partCount * APFloatBase::integerPartWidth && 629 APInt::tcExtractBit(parts, bits - 1)) 630 return lfMoreThanHalf; 631 632 return lfLessThanHalf; 633 } 634 635 /* Shift DST right BITS bits noting lost fraction. */ 636 static lostFraction 637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 638 { 639 lostFraction lost_fraction; 640 641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 642 643 APInt::tcShiftRight(dst, parts, bits); 644 645 return lost_fraction; 646 } 647 648 /* Combine the effect of two lost fractions. */ 649 static lostFraction 650 combineLostFractions(lostFraction moreSignificant, 651 lostFraction lessSignificant) 652 { 653 if (lessSignificant != lfExactlyZero) { 654 if (moreSignificant == lfExactlyZero) 655 moreSignificant = lfLessThanHalf; 656 else if (moreSignificant == lfExactlyHalf) 657 moreSignificant = lfMoreThanHalf; 658 } 659 660 return moreSignificant; 661 } 662 663 /* The error from the true value, in half-ulps, on multiplying two 664 floating point numbers, which differ from the value they 665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 666 than the returned value. 667 668 See "How to Read Floating Point Numbers Accurately" by William D 669 Clinger. */ 670 static unsigned int 671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 672 { 673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 674 675 if (HUerr1 + HUerr2 == 0) 676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 677 else 678 return inexactMultiply + 2 * (HUerr1 + HUerr2); 679 } 680 681 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 682 when the least significant BITS are truncated. BITS cannot be 683 zero. */ 684 static APFloatBase::integerPart 685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 686 bool isNearest) { 687 unsigned int count, partBits; 688 APFloatBase::integerPart part, boundary; 689 690 assert(bits != 0); 691 692 bits--; 693 count = bits / APFloatBase::integerPartWidth; 694 partBits = bits % APFloatBase::integerPartWidth + 1; 695 696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 697 698 if (isNearest) 699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 700 else 701 boundary = 0; 702 703 if (count == 0) { 704 if (part - boundary <= boundary - part) 705 return part - boundary; 706 else 707 return boundary - part; 708 } 709 710 if (part == boundary) { 711 while (--count) 712 if (parts[count]) 713 return ~(APFloatBase::integerPart) 0; /* A lot. */ 714 715 return parts[0]; 716 } else if (part == boundary - 1) { 717 while (--count) 718 if (~parts[count]) 719 return ~(APFloatBase::integerPart) 0; /* A lot. */ 720 721 return -parts[0]; 722 } 723 724 return ~(APFloatBase::integerPart) 0; /* A lot. */ 725 } 726 727 /* Place pow(5, power) in DST, and return the number of parts used. 728 DST must be at least one part larger than size of the answer. */ 729 static unsigned int 730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 732 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 733 pow5s[0] = 78125 * 5; 734 735 unsigned int partsCount = 1; 736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 737 unsigned int result; 738 assert(power <= maxExponent); 739 740 p1 = dst; 741 p2 = scratch; 742 743 *p1 = firstEightPowers[power & 7]; 744 power >>= 3; 745 746 result = 1; 747 pow5 = pow5s; 748 749 for (unsigned int n = 0; power; power >>= 1, n++) { 750 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 751 if (n != 0) { 752 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 753 partsCount, partsCount); 754 partsCount *= 2; 755 if (pow5[partsCount - 1] == 0) 756 partsCount--; 757 } 758 759 if (power & 1) { 760 APFloatBase::integerPart *tmp; 761 762 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 763 result += partsCount; 764 if (p2[result - 1] == 0) 765 result--; 766 767 /* Now result is in p1 with partsCount parts and p2 is scratch 768 space. */ 769 tmp = p1; 770 p1 = p2; 771 p2 = tmp; 772 } 773 774 pow5 += partsCount; 775 } 776 777 if (p1 != dst) 778 APInt::tcAssign(dst, p1, result); 779 780 return result; 781 } 782 783 /* Zero at the end to avoid modular arithmetic when adding one; used 784 when rounding up during hexadecimal output. */ 785 static const char hexDigitsLower[] = "0123456789abcdef0"; 786 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 787 static const char infinityL[] = "infinity"; 788 static const char infinityU[] = "INFINITY"; 789 static const char NaNL[] = "nan"; 790 static const char NaNU[] = "NAN"; 791 792 /* Write out an integerPart in hexadecimal, starting with the most 793 significant nibble. Write out exactly COUNT hexdigits, return 794 COUNT. */ 795 static unsigned int 796 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 797 const char *hexDigitChars) 798 { 799 unsigned int result = count; 800 801 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 802 803 part >>= (APFloatBase::integerPartWidth - 4 * count); 804 while (count--) { 805 dst[count] = hexDigitChars[part & 0xf]; 806 part >>= 4; 807 } 808 809 return result; 810 } 811 812 /* Write out an unsigned decimal integer. */ 813 static char * 814 writeUnsignedDecimal (char *dst, unsigned int n) 815 { 816 char buff[40], *p; 817 818 p = buff; 819 do 820 *p++ = '0' + n % 10; 821 while (n /= 10); 822 823 do 824 *dst++ = *--p; 825 while (p != buff); 826 827 return dst; 828 } 829 830 /* Write out a signed decimal integer. */ 831 static char * 832 writeSignedDecimal (char *dst, int value) 833 { 834 if (value < 0) { 835 *dst++ = '-'; 836 dst = writeUnsignedDecimal(dst, -(unsigned) value); 837 } else 838 dst = writeUnsignedDecimal(dst, value); 839 840 return dst; 841 } 842 843 namespace detail { 844 /* Constructors. */ 845 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 846 unsigned int count; 847 848 semantics = ourSemantics; 849 count = partCount(); 850 if (count > 1) 851 significand.parts = new integerPart[count]; 852 } 853 854 void IEEEFloat::freeSignificand() { 855 if (needsCleanup()) 856 delete [] significand.parts; 857 } 858 859 void IEEEFloat::assign(const IEEEFloat &rhs) { 860 assert(semantics == rhs.semantics); 861 862 sign = rhs.sign; 863 category = rhs.category; 864 exponent = rhs.exponent; 865 if (isFiniteNonZero() || category == fcNaN) 866 copySignificand(rhs); 867 } 868 869 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 870 assert(isFiniteNonZero() || category == fcNaN); 871 assert(rhs.partCount() >= partCount()); 872 873 APInt::tcAssign(significandParts(), rhs.significandParts(), 874 partCount()); 875 } 876 877 /* Make this number a NaN, with an arbitrary but deterministic value 878 for the significand. If double or longer, this is a signalling NaN, 879 which may not be ideal. If float, this is QNaN(0). */ 880 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 881 category = fcNaN; 882 sign = Negative; 883 exponent = exponentNaN(); 884 885 integerPart *significand = significandParts(); 886 unsigned numParts = partCount(); 887 888 APInt fill_storage; 889 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 890 // Finite-only types do not distinguish signalling and quiet NaN, so 891 // make them all signalling. 892 SNaN = false; 893 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 894 sign = true; 895 fill_storage = APInt::getZero(semantics->precision - 1); 896 } else { 897 fill_storage = APInt::getAllOnes(semantics->precision - 1); 898 } 899 fill = &fill_storage; 900 } 901 902 // Set the significand bits to the fill. 903 if (!fill || fill->getNumWords() < numParts) 904 APInt::tcSet(significand, 0, numParts); 905 if (fill) { 906 APInt::tcAssign(significand, fill->getRawData(), 907 std::min(fill->getNumWords(), numParts)); 908 909 // Zero out the excess bits of the significand. 910 unsigned bitsToPreserve = semantics->precision - 1; 911 unsigned part = bitsToPreserve / 64; 912 bitsToPreserve %= 64; 913 significand[part] &= ((1ULL << bitsToPreserve) - 1); 914 for (part++; part != numParts; ++part) 915 significand[part] = 0; 916 } 917 918 unsigned QNaNBit = semantics->precision - 2; 919 920 if (SNaN) { 921 // We always have to clear the QNaN bit to make it an SNaN. 922 APInt::tcClearBit(significand, QNaNBit); 923 924 // If there are no bits set in the payload, we have to set 925 // *something* to make it a NaN instead of an infinity; 926 // conventionally, this is the next bit down from the QNaN bit. 927 if (APInt::tcIsZero(significand, numParts)) 928 APInt::tcSetBit(significand, QNaNBit - 1); 929 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 930 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 931 // Do nothing. 932 } else { 933 // We always have to set the QNaN bit to make it a QNaN. 934 APInt::tcSetBit(significand, QNaNBit); 935 } 936 937 // For x87 extended precision, we want to make a NaN, not a 938 // pseudo-NaN. Maybe we should expose the ability to make 939 // pseudo-NaNs? 940 if (semantics == &semX87DoubleExtended) 941 APInt::tcSetBit(significand, QNaNBit + 1); 942 } 943 944 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 945 if (this != &rhs) { 946 if (semantics != rhs.semantics) { 947 freeSignificand(); 948 initialize(rhs.semantics); 949 } 950 assign(rhs); 951 } 952 953 return *this; 954 } 955 956 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 957 freeSignificand(); 958 959 semantics = rhs.semantics; 960 significand = rhs.significand; 961 exponent = rhs.exponent; 962 category = rhs.category; 963 sign = rhs.sign; 964 965 rhs.semantics = &semBogus; 966 return *this; 967 } 968 969 bool IEEEFloat::isDenormal() const { 970 return isFiniteNonZero() && (exponent == semantics->minExponent) && 971 (APInt::tcExtractBit(significandParts(), 972 semantics->precision - 1) == 0); 973 } 974 975 bool IEEEFloat::isSmallest() const { 976 // The smallest number by magnitude in our format will be the smallest 977 // denormal, i.e. the floating point number with exponent being minimum 978 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 979 return isFiniteNonZero() && exponent == semantics->minExponent && 980 significandMSB() == 0; 981 } 982 983 bool IEEEFloat::isSmallestNormalized() const { 984 return getCategory() == fcNormal && exponent == semantics->minExponent && 985 isSignificandAllZerosExceptMSB(); 986 } 987 988 bool IEEEFloat::isSignificandAllOnes() const { 989 // Test if the significand excluding the integral bit is all ones. This allows 990 // us to test for binade boundaries. 991 const integerPart *Parts = significandParts(); 992 const unsigned PartCount = partCountForBits(semantics->precision); 993 for (unsigned i = 0; i < PartCount - 1; i++) 994 if (~Parts[i]) 995 return false; 996 997 // Set the unused high bits to all ones when we compare. 998 const unsigned NumHighBits = 999 PartCount*integerPartWidth - semantics->precision + 1; 1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1001 "Can not have more high bits to fill than integerPartWidth"); 1002 const integerPart HighBitFill = 1003 ~integerPart(0) << (integerPartWidth - NumHighBits); 1004 if (~(Parts[PartCount - 1] | HighBitFill)) 1005 return false; 1006 1007 return true; 1008 } 1009 1010 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1011 // Test if the significand excluding the integral bit is all ones except for 1012 // the least significant bit. 1013 const integerPart *Parts = significandParts(); 1014 1015 if (Parts[0] & 1) 1016 return false; 1017 1018 const unsigned PartCount = partCountForBits(semantics->precision); 1019 for (unsigned i = 0; i < PartCount - 1; i++) { 1020 if (~Parts[i] & ~unsigned{!i}) 1021 return false; 1022 } 1023 1024 // Set the unused high bits to all ones when we compare. 1025 const unsigned NumHighBits = 1026 PartCount * integerPartWidth - semantics->precision + 1; 1027 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1028 "Can not have more high bits to fill than integerPartWidth"); 1029 const integerPart HighBitFill = ~integerPart(0) 1030 << (integerPartWidth - NumHighBits); 1031 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1032 return false; 1033 1034 return true; 1035 } 1036 1037 bool IEEEFloat::isSignificandAllZeros() const { 1038 // Test if the significand excluding the integral bit is all zeros. This 1039 // allows us to test for binade boundaries. 1040 const integerPart *Parts = significandParts(); 1041 const unsigned PartCount = partCountForBits(semantics->precision); 1042 1043 for (unsigned i = 0; i < PartCount - 1; i++) 1044 if (Parts[i]) 1045 return false; 1046 1047 // Compute how many bits are used in the final word. 1048 const unsigned NumHighBits = 1049 PartCount*integerPartWidth - semantics->precision + 1; 1050 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1051 "clear than integerPartWidth"); 1052 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1053 1054 if (Parts[PartCount - 1] & HighBitMask) 1055 return false; 1056 1057 return true; 1058 } 1059 1060 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1061 const integerPart *Parts = significandParts(); 1062 const unsigned PartCount = partCountForBits(semantics->precision); 1063 1064 for (unsigned i = 0; i < PartCount - 1; i++) { 1065 if (Parts[i]) 1066 return false; 1067 } 1068 1069 const unsigned NumHighBits = 1070 PartCount * integerPartWidth - semantics->precision + 1; 1071 return Parts[PartCount - 1] == integerPart(1) 1072 << (integerPartWidth - NumHighBits); 1073 } 1074 1075 bool IEEEFloat::isLargest() const { 1076 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1077 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1078 // The largest number by magnitude in our format will be the floating point 1079 // number with maximum exponent and with significand that is all ones except 1080 // the LSB. 1081 return isFiniteNonZero() && exponent == semantics->maxExponent && 1082 isSignificandAllOnesExceptLSB(); 1083 } else { 1084 // The largest number by magnitude in our format will be the floating point 1085 // number with maximum exponent and with significand that is all ones. 1086 return isFiniteNonZero() && exponent == semantics->maxExponent && 1087 isSignificandAllOnes(); 1088 } 1089 } 1090 1091 bool IEEEFloat::isInteger() const { 1092 // This could be made more efficient; I'm going for obviously correct. 1093 if (!isFinite()) return false; 1094 IEEEFloat truncated = *this; 1095 truncated.roundToIntegral(rmTowardZero); 1096 return compare(truncated) == cmpEqual; 1097 } 1098 1099 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1100 if (this == &rhs) 1101 return true; 1102 if (semantics != rhs.semantics || 1103 category != rhs.category || 1104 sign != rhs.sign) 1105 return false; 1106 if (category==fcZero || category==fcInfinity) 1107 return true; 1108 1109 if (isFiniteNonZero() && exponent != rhs.exponent) 1110 return false; 1111 1112 return std::equal(significandParts(), significandParts() + partCount(), 1113 rhs.significandParts()); 1114 } 1115 1116 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1117 initialize(&ourSemantics); 1118 sign = 0; 1119 category = fcNormal; 1120 zeroSignificand(); 1121 exponent = ourSemantics.precision - 1; 1122 significandParts()[0] = value; 1123 normalize(rmNearestTiesToEven, lfExactlyZero); 1124 } 1125 1126 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1127 initialize(&ourSemantics); 1128 makeZero(false); 1129 } 1130 1131 // Delegate to the previous constructor, because later copy constructor may 1132 // actually inspects category, which can't be garbage. 1133 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1134 : IEEEFloat(ourSemantics) {} 1135 1136 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1137 initialize(rhs.semantics); 1138 assign(rhs); 1139 } 1140 1141 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1142 *this = std::move(rhs); 1143 } 1144 1145 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1146 1147 unsigned int IEEEFloat::partCount() const { 1148 return partCountForBits(semantics->precision + 1); 1149 } 1150 1151 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1152 return const_cast<IEEEFloat *>(this)->significandParts(); 1153 } 1154 1155 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1156 if (partCount() > 1) 1157 return significand.parts; 1158 else 1159 return &significand.part; 1160 } 1161 1162 void IEEEFloat::zeroSignificand() { 1163 APInt::tcSet(significandParts(), 0, partCount()); 1164 } 1165 1166 /* Increment an fcNormal floating point number's significand. */ 1167 void IEEEFloat::incrementSignificand() { 1168 integerPart carry; 1169 1170 carry = APInt::tcIncrement(significandParts(), partCount()); 1171 1172 /* Our callers should never cause us to overflow. */ 1173 assert(carry == 0); 1174 (void)carry; 1175 } 1176 1177 /* Add the significand of the RHS. Returns the carry flag. */ 1178 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1179 integerPart *parts; 1180 1181 parts = significandParts(); 1182 1183 assert(semantics == rhs.semantics); 1184 assert(exponent == rhs.exponent); 1185 1186 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1187 } 1188 1189 /* Subtract the significand of the RHS with a borrow flag. Returns 1190 the borrow flag. */ 1191 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1192 integerPart borrow) { 1193 integerPart *parts; 1194 1195 parts = significandParts(); 1196 1197 assert(semantics == rhs.semantics); 1198 assert(exponent == rhs.exponent); 1199 1200 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1201 partCount()); 1202 } 1203 1204 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1205 on to the full-precision result of the multiplication. Returns the 1206 lost fraction. */ 1207 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1208 IEEEFloat addend) { 1209 unsigned int omsb; // One, not zero, based MSB. 1210 unsigned int partsCount, newPartsCount, precision; 1211 integerPart *lhsSignificand; 1212 integerPart scratch[4]; 1213 integerPart *fullSignificand; 1214 lostFraction lost_fraction; 1215 bool ignored; 1216 1217 assert(semantics == rhs.semantics); 1218 1219 precision = semantics->precision; 1220 1221 // Allocate space for twice as many bits as the original significand, plus one 1222 // extra bit for the addition to overflow into. 1223 newPartsCount = partCountForBits(precision * 2 + 1); 1224 1225 if (newPartsCount > 4) 1226 fullSignificand = new integerPart[newPartsCount]; 1227 else 1228 fullSignificand = scratch; 1229 1230 lhsSignificand = significandParts(); 1231 partsCount = partCount(); 1232 1233 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1234 rhs.significandParts(), partsCount, partsCount); 1235 1236 lost_fraction = lfExactlyZero; 1237 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1238 exponent += rhs.exponent; 1239 1240 // Assume the operands involved in the multiplication are single-precision 1241 // FP, and the two multiplicants are: 1242 // *this = a23 . a22 ... a0 * 2^e1 1243 // rhs = b23 . b22 ... b0 * 2^e2 1244 // the result of multiplication is: 1245 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1246 // Note that there are three significant bits at the left-hand side of the 1247 // radix point: two for the multiplication, and an overflow bit for the 1248 // addition (that will always be zero at this point). Move the radix point 1249 // toward left by two bits, and adjust exponent accordingly. 1250 exponent += 2; 1251 1252 if (addend.isNonZero()) { 1253 // The intermediate result of the multiplication has "2 * precision" 1254 // signicant bit; adjust the addend to be consistent with mul result. 1255 // 1256 Significand savedSignificand = significand; 1257 const fltSemantics *savedSemantics = semantics; 1258 fltSemantics extendedSemantics; 1259 opStatus status; 1260 unsigned int extendedPrecision; 1261 1262 // Normalize our MSB to one below the top bit to allow for overflow. 1263 extendedPrecision = 2 * precision + 1; 1264 if (omsb != extendedPrecision - 1) { 1265 assert(extendedPrecision > omsb); 1266 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1267 (extendedPrecision - 1) - omsb); 1268 exponent -= (extendedPrecision - 1) - omsb; 1269 } 1270 1271 /* Create new semantics. */ 1272 extendedSemantics = *semantics; 1273 extendedSemantics.precision = extendedPrecision; 1274 1275 if (newPartsCount == 1) 1276 significand.part = fullSignificand[0]; 1277 else 1278 significand.parts = fullSignificand; 1279 semantics = &extendedSemantics; 1280 1281 // Make a copy so we can convert it to the extended semantics. 1282 // Note that we cannot convert the addend directly, as the extendedSemantics 1283 // is a local variable (which we take a reference to). 1284 IEEEFloat extendedAddend(addend); 1285 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1286 assert(status == opOK); 1287 (void)status; 1288 1289 // Shift the significand of the addend right by one bit. This guarantees 1290 // that the high bit of the significand is zero (same as fullSignificand), 1291 // so the addition will overflow (if it does overflow at all) into the top bit. 1292 lost_fraction = extendedAddend.shiftSignificandRight(1); 1293 assert(lost_fraction == lfExactlyZero && 1294 "Lost precision while shifting addend for fused-multiply-add."); 1295 1296 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1297 1298 /* Restore our state. */ 1299 if (newPartsCount == 1) 1300 fullSignificand[0] = significand.part; 1301 significand = savedSignificand; 1302 semantics = savedSemantics; 1303 1304 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1305 } 1306 1307 // Convert the result having "2 * precision" significant-bits back to the one 1308 // having "precision" significant-bits. First, move the radix point from 1309 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1310 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1311 exponent -= precision + 1; 1312 1313 // In case MSB resides at the left-hand side of radix point, shift the 1314 // mantissa right by some amount to make sure the MSB reside right before 1315 // the radix point (i.e. "MSB . rest-significant-bits"). 1316 // 1317 // Note that the result is not normalized when "omsb < precision". So, the 1318 // caller needs to call IEEEFloat::normalize() if normalized value is 1319 // expected. 1320 if (omsb > precision) { 1321 unsigned int bits, significantParts; 1322 lostFraction lf; 1323 1324 bits = omsb - precision; 1325 significantParts = partCountForBits(omsb); 1326 lf = shiftRight(fullSignificand, significantParts, bits); 1327 lost_fraction = combineLostFractions(lf, lost_fraction); 1328 exponent += bits; 1329 } 1330 1331 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1332 1333 if (newPartsCount > 4) 1334 delete [] fullSignificand; 1335 1336 return lost_fraction; 1337 } 1338 1339 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1340 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1341 } 1342 1343 /* Multiply the significands of LHS and RHS to DST. */ 1344 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1345 unsigned int bit, i, partsCount; 1346 const integerPart *rhsSignificand; 1347 integerPart *lhsSignificand, *dividend, *divisor; 1348 integerPart scratch[4]; 1349 lostFraction lost_fraction; 1350 1351 assert(semantics == rhs.semantics); 1352 1353 lhsSignificand = significandParts(); 1354 rhsSignificand = rhs.significandParts(); 1355 partsCount = partCount(); 1356 1357 if (partsCount > 2) 1358 dividend = new integerPart[partsCount * 2]; 1359 else 1360 dividend = scratch; 1361 1362 divisor = dividend + partsCount; 1363 1364 /* Copy the dividend and divisor as they will be modified in-place. */ 1365 for (i = 0; i < partsCount; i++) { 1366 dividend[i] = lhsSignificand[i]; 1367 divisor[i] = rhsSignificand[i]; 1368 lhsSignificand[i] = 0; 1369 } 1370 1371 exponent -= rhs.exponent; 1372 1373 unsigned int precision = semantics->precision; 1374 1375 /* Normalize the divisor. */ 1376 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1377 if (bit) { 1378 exponent += bit; 1379 APInt::tcShiftLeft(divisor, partsCount, bit); 1380 } 1381 1382 /* Normalize the dividend. */ 1383 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1384 if (bit) { 1385 exponent -= bit; 1386 APInt::tcShiftLeft(dividend, partsCount, bit); 1387 } 1388 1389 /* Ensure the dividend >= divisor initially for the loop below. 1390 Incidentally, this means that the division loop below is 1391 guaranteed to set the integer bit to one. */ 1392 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1393 exponent--; 1394 APInt::tcShiftLeft(dividend, partsCount, 1); 1395 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1396 } 1397 1398 /* Long division. */ 1399 for (bit = precision; bit; bit -= 1) { 1400 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1401 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1402 APInt::tcSetBit(lhsSignificand, bit - 1); 1403 } 1404 1405 APInt::tcShiftLeft(dividend, partsCount, 1); 1406 } 1407 1408 /* Figure out the lost fraction. */ 1409 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1410 1411 if (cmp > 0) 1412 lost_fraction = lfMoreThanHalf; 1413 else if (cmp == 0) 1414 lost_fraction = lfExactlyHalf; 1415 else if (APInt::tcIsZero(dividend, partsCount)) 1416 lost_fraction = lfExactlyZero; 1417 else 1418 lost_fraction = lfLessThanHalf; 1419 1420 if (partsCount > 2) 1421 delete [] dividend; 1422 1423 return lost_fraction; 1424 } 1425 1426 unsigned int IEEEFloat::significandMSB() const { 1427 return APInt::tcMSB(significandParts(), partCount()); 1428 } 1429 1430 unsigned int IEEEFloat::significandLSB() const { 1431 return APInt::tcLSB(significandParts(), partCount()); 1432 } 1433 1434 /* Note that a zero result is NOT normalized to fcZero. */ 1435 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1436 /* Our exponent should not overflow. */ 1437 assert((ExponentType) (exponent + bits) >= exponent); 1438 1439 exponent += bits; 1440 1441 return shiftRight(significandParts(), partCount(), bits); 1442 } 1443 1444 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1445 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1446 assert(bits < semantics->precision); 1447 1448 if (bits) { 1449 unsigned int partsCount = partCount(); 1450 1451 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1452 exponent -= bits; 1453 1454 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1455 } 1456 } 1457 1458 IEEEFloat::cmpResult 1459 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1460 int compare; 1461 1462 assert(semantics == rhs.semantics); 1463 assert(isFiniteNonZero()); 1464 assert(rhs.isFiniteNonZero()); 1465 1466 compare = exponent - rhs.exponent; 1467 1468 /* If exponents are equal, do an unsigned bignum comparison of the 1469 significands. */ 1470 if (compare == 0) 1471 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1472 partCount()); 1473 1474 if (compare > 0) 1475 return cmpGreaterThan; 1476 else if (compare < 0) 1477 return cmpLessThan; 1478 else 1479 return cmpEqual; 1480 } 1481 1482 /* Set the least significant BITS bits of a bignum, clear the 1483 rest. */ 1484 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1485 unsigned bits) { 1486 unsigned i = 0; 1487 while (bits > APInt::APINT_BITS_PER_WORD) { 1488 dst[i++] = ~(APInt::WordType)0; 1489 bits -= APInt::APINT_BITS_PER_WORD; 1490 } 1491 1492 if (bits) 1493 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1494 1495 while (i < parts) 1496 dst[i++] = 0; 1497 } 1498 1499 /* Handle overflow. Sign is preserved. We either become infinity or 1500 the largest finite number. */ 1501 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1502 /* Infinity? */ 1503 if (rounding_mode == rmNearestTiesToEven || 1504 rounding_mode == rmNearestTiesToAway || 1505 (rounding_mode == rmTowardPositive && !sign) || 1506 (rounding_mode == rmTowardNegative && sign)) { 1507 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1508 makeNaN(false, sign); 1509 else 1510 category = fcInfinity; 1511 return (opStatus) (opOverflow | opInexact); 1512 } 1513 1514 /* Otherwise we become the largest finite number. */ 1515 category = fcNormal; 1516 exponent = semantics->maxExponent; 1517 tcSetLeastSignificantBits(significandParts(), partCount(), 1518 semantics->precision); 1519 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1520 semantics->nanEncoding == fltNanEncoding::AllOnes) 1521 APInt::tcClearBit(significandParts(), 0); 1522 1523 return opInexact; 1524 } 1525 1526 /* Returns TRUE if, when truncating the current number, with BIT the 1527 new LSB, with the given lost fraction and rounding mode, the result 1528 would need to be rounded away from zero (i.e., by increasing the 1529 signficand). This routine must work for fcZero of both signs, and 1530 fcNormal numbers. */ 1531 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1532 lostFraction lost_fraction, 1533 unsigned int bit) const { 1534 /* NaNs and infinities should not have lost fractions. */ 1535 assert(isFiniteNonZero() || category == fcZero); 1536 1537 /* Current callers never pass this so we don't handle it. */ 1538 assert(lost_fraction != lfExactlyZero); 1539 1540 switch (rounding_mode) { 1541 case rmNearestTiesToAway: 1542 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1543 1544 case rmNearestTiesToEven: 1545 if (lost_fraction == lfMoreThanHalf) 1546 return true; 1547 1548 /* Our zeroes don't have a significand to test. */ 1549 if (lost_fraction == lfExactlyHalf && category != fcZero) 1550 return APInt::tcExtractBit(significandParts(), bit); 1551 1552 return false; 1553 1554 case rmTowardZero: 1555 return false; 1556 1557 case rmTowardPositive: 1558 return !sign; 1559 1560 case rmTowardNegative: 1561 return sign; 1562 1563 default: 1564 break; 1565 } 1566 llvm_unreachable("Invalid rounding mode found"); 1567 } 1568 1569 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1570 lostFraction lost_fraction) { 1571 unsigned int omsb; /* One, not zero, based MSB. */ 1572 int exponentChange; 1573 1574 if (!isFiniteNonZero()) 1575 return opOK; 1576 1577 /* Before rounding normalize the exponent of fcNormal numbers. */ 1578 omsb = significandMSB() + 1; 1579 1580 if (omsb) { 1581 /* OMSB is numbered from 1. We want to place it in the integer 1582 bit numbered PRECISION if possible, with a compensating change in 1583 the exponent. */ 1584 exponentChange = omsb - semantics->precision; 1585 1586 /* If the resulting exponent is too high, overflow according to 1587 the rounding mode. */ 1588 if (exponent + exponentChange > semantics->maxExponent) 1589 return handleOverflow(rounding_mode); 1590 1591 /* Subnormal numbers have exponent minExponent, and their MSB 1592 is forced based on that. */ 1593 if (exponent + exponentChange < semantics->minExponent) 1594 exponentChange = semantics->minExponent - exponent; 1595 1596 /* Shifting left is easy as we don't lose precision. */ 1597 if (exponentChange < 0) { 1598 assert(lost_fraction == lfExactlyZero); 1599 1600 shiftSignificandLeft(-exponentChange); 1601 1602 return opOK; 1603 } 1604 1605 if (exponentChange > 0) { 1606 lostFraction lf; 1607 1608 /* Shift right and capture any new lost fraction. */ 1609 lf = shiftSignificandRight(exponentChange); 1610 1611 lost_fraction = combineLostFractions(lf, lost_fraction); 1612 1613 /* Keep OMSB up-to-date. */ 1614 if (omsb > (unsigned) exponentChange) 1615 omsb -= exponentChange; 1616 else 1617 omsb = 0; 1618 } 1619 } 1620 1621 // The all-ones values is an overflow if NaN is all ones. If NaN is 1622 // represented by negative zero, then it is a valid finite value. 1623 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1624 semantics->nanEncoding == fltNanEncoding::AllOnes && 1625 exponent == semantics->maxExponent && isSignificandAllOnes()) 1626 return handleOverflow(rounding_mode); 1627 1628 /* Now round the number according to rounding_mode given the lost 1629 fraction. */ 1630 1631 /* As specified in IEEE 754, since we do not trap we do not report 1632 underflow for exact results. */ 1633 if (lost_fraction == lfExactlyZero) { 1634 /* Canonicalize zeroes. */ 1635 if (omsb == 0) { 1636 category = fcZero; 1637 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1638 sign = false; 1639 } 1640 1641 return opOK; 1642 } 1643 1644 /* Increment the significand if we're rounding away from zero. */ 1645 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1646 if (omsb == 0) 1647 exponent = semantics->minExponent; 1648 1649 incrementSignificand(); 1650 omsb = significandMSB() + 1; 1651 1652 /* Did the significand increment overflow? */ 1653 if (omsb == (unsigned) semantics->precision + 1) { 1654 /* Renormalize by incrementing the exponent and shifting our 1655 significand right one. However if we already have the 1656 maximum exponent we overflow to infinity. */ 1657 if (exponent == semantics->maxExponent) 1658 // Invoke overflow handling with a rounding mode that will guarantee 1659 // that the result gets turned into the correct infinity representation. 1660 // This is needed instead of just setting the category to infinity to 1661 // account for 8-bit floating point types that have no inf, only NaN. 1662 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1663 1664 shiftSignificandRight(1); 1665 1666 return opInexact; 1667 } 1668 1669 // The all-ones values is an overflow if NaN is all ones. If NaN is 1670 // represented by negative zero, then it is a valid finite value. 1671 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1672 semantics->nanEncoding == fltNanEncoding::AllOnes && 1673 exponent == semantics->maxExponent && isSignificandAllOnes()) 1674 return handleOverflow(rounding_mode); 1675 } 1676 1677 /* The normal case - we were and are not denormal, and any 1678 significand increment above didn't overflow. */ 1679 if (omsb == semantics->precision) 1680 return opInexact; 1681 1682 /* We have a non-zero denormal. */ 1683 assert(omsb < semantics->precision); 1684 1685 /* Canonicalize zeroes. */ 1686 if (omsb == 0) { 1687 category = fcZero; 1688 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1689 sign = false; 1690 } 1691 1692 /* The fcZero case is a denormal that underflowed to zero. */ 1693 return (opStatus) (opUnderflow | opInexact); 1694 } 1695 1696 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1697 bool subtract) { 1698 switch (PackCategoriesIntoKey(category, rhs.category)) { 1699 default: 1700 llvm_unreachable(nullptr); 1701 1702 case PackCategoriesIntoKey(fcZero, fcNaN): 1703 case PackCategoriesIntoKey(fcNormal, fcNaN): 1704 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1705 assign(rhs); 1706 [[fallthrough]]; 1707 case PackCategoriesIntoKey(fcNaN, fcZero): 1708 case PackCategoriesIntoKey(fcNaN, fcNormal): 1709 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1710 case PackCategoriesIntoKey(fcNaN, fcNaN): 1711 if (isSignaling()) { 1712 makeQuiet(); 1713 return opInvalidOp; 1714 } 1715 return rhs.isSignaling() ? opInvalidOp : opOK; 1716 1717 case PackCategoriesIntoKey(fcNormal, fcZero): 1718 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1719 case PackCategoriesIntoKey(fcInfinity, fcZero): 1720 return opOK; 1721 1722 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1723 case PackCategoriesIntoKey(fcZero, fcInfinity): 1724 category = fcInfinity; 1725 sign = rhs.sign ^ subtract; 1726 return opOK; 1727 1728 case PackCategoriesIntoKey(fcZero, fcNormal): 1729 assign(rhs); 1730 sign = rhs.sign ^ subtract; 1731 return opOK; 1732 1733 case PackCategoriesIntoKey(fcZero, fcZero): 1734 /* Sign depends on rounding mode; handled by caller. */ 1735 return opOK; 1736 1737 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1738 /* Differently signed infinities can only be validly 1739 subtracted. */ 1740 if (((sign ^ rhs.sign)!=0) != subtract) { 1741 makeNaN(); 1742 return opInvalidOp; 1743 } 1744 1745 return opOK; 1746 1747 case PackCategoriesIntoKey(fcNormal, fcNormal): 1748 return opDivByZero; 1749 } 1750 } 1751 1752 /* Add or subtract two normal numbers. */ 1753 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1754 bool subtract) { 1755 integerPart carry; 1756 lostFraction lost_fraction; 1757 int bits; 1758 1759 /* Determine if the operation on the absolute values is effectively 1760 an addition or subtraction. */ 1761 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1762 1763 /* Are we bigger exponent-wise than the RHS? */ 1764 bits = exponent - rhs.exponent; 1765 1766 /* Subtraction is more subtle than one might naively expect. */ 1767 if (subtract) { 1768 IEEEFloat temp_rhs(rhs); 1769 1770 if (bits == 0) 1771 lost_fraction = lfExactlyZero; 1772 else if (bits > 0) { 1773 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1774 shiftSignificandLeft(1); 1775 } else { 1776 lost_fraction = shiftSignificandRight(-bits - 1); 1777 temp_rhs.shiftSignificandLeft(1); 1778 } 1779 1780 // Should we reverse the subtraction. 1781 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1782 carry = temp_rhs.subtractSignificand 1783 (*this, lost_fraction != lfExactlyZero); 1784 copySignificand(temp_rhs); 1785 sign = !sign; 1786 } else { 1787 carry = subtractSignificand 1788 (temp_rhs, lost_fraction != lfExactlyZero); 1789 } 1790 1791 /* Invert the lost fraction - it was on the RHS and 1792 subtracted. */ 1793 if (lost_fraction == lfLessThanHalf) 1794 lost_fraction = lfMoreThanHalf; 1795 else if (lost_fraction == lfMoreThanHalf) 1796 lost_fraction = lfLessThanHalf; 1797 1798 /* The code above is intended to ensure that no borrow is 1799 necessary. */ 1800 assert(!carry); 1801 (void)carry; 1802 } else { 1803 if (bits > 0) { 1804 IEEEFloat temp_rhs(rhs); 1805 1806 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1807 carry = addSignificand(temp_rhs); 1808 } else { 1809 lost_fraction = shiftSignificandRight(-bits); 1810 carry = addSignificand(rhs); 1811 } 1812 1813 /* We have a guard bit; generating a carry cannot happen. */ 1814 assert(!carry); 1815 (void)carry; 1816 } 1817 1818 return lost_fraction; 1819 } 1820 1821 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1822 switch (PackCategoriesIntoKey(category, rhs.category)) { 1823 default: 1824 llvm_unreachable(nullptr); 1825 1826 case PackCategoriesIntoKey(fcZero, fcNaN): 1827 case PackCategoriesIntoKey(fcNormal, fcNaN): 1828 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1829 assign(rhs); 1830 sign = false; 1831 [[fallthrough]]; 1832 case PackCategoriesIntoKey(fcNaN, fcZero): 1833 case PackCategoriesIntoKey(fcNaN, fcNormal): 1834 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1835 case PackCategoriesIntoKey(fcNaN, fcNaN): 1836 sign ^= rhs.sign; // restore the original sign 1837 if (isSignaling()) { 1838 makeQuiet(); 1839 return opInvalidOp; 1840 } 1841 return rhs.isSignaling() ? opInvalidOp : opOK; 1842 1843 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1844 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1845 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1846 category = fcInfinity; 1847 return opOK; 1848 1849 case PackCategoriesIntoKey(fcZero, fcNormal): 1850 case PackCategoriesIntoKey(fcNormal, fcZero): 1851 case PackCategoriesIntoKey(fcZero, fcZero): 1852 category = fcZero; 1853 return opOK; 1854 1855 case PackCategoriesIntoKey(fcZero, fcInfinity): 1856 case PackCategoriesIntoKey(fcInfinity, fcZero): 1857 makeNaN(); 1858 return opInvalidOp; 1859 1860 case PackCategoriesIntoKey(fcNormal, fcNormal): 1861 return opOK; 1862 } 1863 } 1864 1865 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1866 switch (PackCategoriesIntoKey(category, rhs.category)) { 1867 default: 1868 llvm_unreachable(nullptr); 1869 1870 case PackCategoriesIntoKey(fcZero, fcNaN): 1871 case PackCategoriesIntoKey(fcNormal, fcNaN): 1872 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1873 assign(rhs); 1874 sign = false; 1875 [[fallthrough]]; 1876 case PackCategoriesIntoKey(fcNaN, fcZero): 1877 case PackCategoriesIntoKey(fcNaN, fcNormal): 1878 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1879 case PackCategoriesIntoKey(fcNaN, fcNaN): 1880 sign ^= rhs.sign; // restore the original sign 1881 if (isSignaling()) { 1882 makeQuiet(); 1883 return opInvalidOp; 1884 } 1885 return rhs.isSignaling() ? opInvalidOp : opOK; 1886 1887 case PackCategoriesIntoKey(fcInfinity, fcZero): 1888 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1889 case PackCategoriesIntoKey(fcZero, fcInfinity): 1890 case PackCategoriesIntoKey(fcZero, fcNormal): 1891 return opOK; 1892 1893 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1894 category = fcZero; 1895 return opOK; 1896 1897 case PackCategoriesIntoKey(fcNormal, fcZero): 1898 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1899 makeNaN(false, sign); 1900 else 1901 category = fcInfinity; 1902 return opDivByZero; 1903 1904 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1905 case PackCategoriesIntoKey(fcZero, fcZero): 1906 makeNaN(); 1907 return opInvalidOp; 1908 1909 case PackCategoriesIntoKey(fcNormal, fcNormal): 1910 return opOK; 1911 } 1912 } 1913 1914 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1915 switch (PackCategoriesIntoKey(category, rhs.category)) { 1916 default: 1917 llvm_unreachable(nullptr); 1918 1919 case PackCategoriesIntoKey(fcZero, fcNaN): 1920 case PackCategoriesIntoKey(fcNormal, fcNaN): 1921 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1922 assign(rhs); 1923 [[fallthrough]]; 1924 case PackCategoriesIntoKey(fcNaN, fcZero): 1925 case PackCategoriesIntoKey(fcNaN, fcNormal): 1926 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1927 case PackCategoriesIntoKey(fcNaN, fcNaN): 1928 if (isSignaling()) { 1929 makeQuiet(); 1930 return opInvalidOp; 1931 } 1932 return rhs.isSignaling() ? opInvalidOp : opOK; 1933 1934 case PackCategoriesIntoKey(fcZero, fcInfinity): 1935 case PackCategoriesIntoKey(fcZero, fcNormal): 1936 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1937 return opOK; 1938 1939 case PackCategoriesIntoKey(fcNormal, fcZero): 1940 case PackCategoriesIntoKey(fcInfinity, fcZero): 1941 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1942 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1943 case PackCategoriesIntoKey(fcZero, fcZero): 1944 makeNaN(); 1945 return opInvalidOp; 1946 1947 case PackCategoriesIntoKey(fcNormal, fcNormal): 1948 return opOK; 1949 } 1950 } 1951 1952 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1953 switch (PackCategoriesIntoKey(category, rhs.category)) { 1954 default: 1955 llvm_unreachable(nullptr); 1956 1957 case PackCategoriesIntoKey(fcZero, fcNaN): 1958 case PackCategoriesIntoKey(fcNormal, fcNaN): 1959 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1960 assign(rhs); 1961 [[fallthrough]]; 1962 case PackCategoriesIntoKey(fcNaN, fcZero): 1963 case PackCategoriesIntoKey(fcNaN, fcNormal): 1964 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1965 case PackCategoriesIntoKey(fcNaN, fcNaN): 1966 if (isSignaling()) { 1967 makeQuiet(); 1968 return opInvalidOp; 1969 } 1970 return rhs.isSignaling() ? opInvalidOp : opOK; 1971 1972 case PackCategoriesIntoKey(fcZero, fcInfinity): 1973 case PackCategoriesIntoKey(fcZero, fcNormal): 1974 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1975 return opOK; 1976 1977 case PackCategoriesIntoKey(fcNormal, fcZero): 1978 case PackCategoriesIntoKey(fcInfinity, fcZero): 1979 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1980 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1981 case PackCategoriesIntoKey(fcZero, fcZero): 1982 makeNaN(); 1983 return opInvalidOp; 1984 1985 case PackCategoriesIntoKey(fcNormal, fcNormal): 1986 return opDivByZero; // fake status, indicating this is not a special case 1987 } 1988 } 1989 1990 /* Change sign. */ 1991 void IEEEFloat::changeSign() { 1992 // With NaN-as-negative-zero, neither NaN or negative zero can change 1993 // their signs. 1994 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 1995 (isZero() || isNaN())) 1996 return; 1997 /* Look mummy, this one's easy. */ 1998 sign = !sign; 1999 } 2000 2001 /* Normalized addition or subtraction. */ 2002 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2003 roundingMode rounding_mode, 2004 bool subtract) { 2005 opStatus fs; 2006 2007 fs = addOrSubtractSpecials(rhs, subtract); 2008 2009 /* This return code means it was not a simple case. */ 2010 if (fs == opDivByZero) { 2011 lostFraction lost_fraction; 2012 2013 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2014 fs = normalize(rounding_mode, lost_fraction); 2015 2016 /* Can only be zero if we lost no fraction. */ 2017 assert(category != fcZero || lost_fraction == lfExactlyZero); 2018 } 2019 2020 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2021 positive zero unless rounding to minus infinity, except that 2022 adding two like-signed zeroes gives that zero. */ 2023 if (category == fcZero) { 2024 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2025 sign = (rounding_mode == rmTowardNegative); 2026 // NaN-in-negative-zero means zeros need to be normalized to +0. 2027 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2028 sign = false; 2029 } 2030 2031 return fs; 2032 } 2033 2034 /* Normalized addition. */ 2035 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2036 roundingMode rounding_mode) { 2037 return addOrSubtract(rhs, rounding_mode, false); 2038 } 2039 2040 /* Normalized subtraction. */ 2041 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2042 roundingMode rounding_mode) { 2043 return addOrSubtract(rhs, rounding_mode, true); 2044 } 2045 2046 /* Normalized multiply. */ 2047 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2048 roundingMode rounding_mode) { 2049 opStatus fs; 2050 2051 sign ^= rhs.sign; 2052 fs = multiplySpecials(rhs); 2053 2054 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2055 sign = false; 2056 if (isFiniteNonZero()) { 2057 lostFraction lost_fraction = multiplySignificand(rhs); 2058 fs = normalize(rounding_mode, lost_fraction); 2059 if (lost_fraction != lfExactlyZero) 2060 fs = (opStatus) (fs | opInexact); 2061 } 2062 2063 return fs; 2064 } 2065 2066 /* Normalized divide. */ 2067 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2068 roundingMode rounding_mode) { 2069 opStatus fs; 2070 2071 sign ^= rhs.sign; 2072 fs = divideSpecials(rhs); 2073 2074 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2075 sign = false; 2076 if (isFiniteNonZero()) { 2077 lostFraction lost_fraction = divideSignificand(rhs); 2078 fs = normalize(rounding_mode, lost_fraction); 2079 if (lost_fraction != lfExactlyZero) 2080 fs = (opStatus) (fs | opInexact); 2081 } 2082 2083 return fs; 2084 } 2085 2086 /* Normalized remainder. */ 2087 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2088 opStatus fs; 2089 unsigned int origSign = sign; 2090 2091 // First handle the special cases. 2092 fs = remainderSpecials(rhs); 2093 if (fs != opDivByZero) 2094 return fs; 2095 2096 fs = opOK; 2097 2098 // Make sure the current value is less than twice the denom. If the addition 2099 // did not succeed (an overflow has happened), which means that the finite 2100 // value we currently posses must be less than twice the denom (as we are 2101 // using the same semantics). 2102 IEEEFloat P2 = rhs; 2103 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2104 fs = mod(P2); 2105 assert(fs == opOK); 2106 } 2107 2108 // Lets work with absolute numbers. 2109 IEEEFloat P = rhs; 2110 P.sign = false; 2111 sign = false; 2112 2113 // 2114 // To calculate the remainder we use the following scheme. 2115 // 2116 // The remainder is defained as follows: 2117 // 2118 // remainder = numer - rquot * denom = x - r * p 2119 // 2120 // Where r is the result of: x/p, rounded toward the nearest integral value 2121 // (with halfway cases rounded toward the even number). 2122 // 2123 // Currently, (after x mod 2p): 2124 // r is the number of 2p's present inside x, which is inherently, an even 2125 // number of p's. 2126 // 2127 // We may split the remaining calculation into 4 options: 2128 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2129 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2130 // are done as well. 2131 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2132 // to subtract 1p at least once. 2133 // - if x >= p then we must subtract p at least once, as x must be a 2134 // remainder. 2135 // 2136 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2137 // 2138 // We can now split the remaining calculation to the following 3 options: 2139 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2140 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2141 // must round up to the next even number. so we must subtract p once more. 2142 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2143 // integral, and subtract p once more. 2144 // 2145 2146 // Extend the semantics to prevent an overflow/underflow or inexact result. 2147 bool losesInfo; 2148 fltSemantics extendedSemantics = *semantics; 2149 extendedSemantics.maxExponent++; 2150 extendedSemantics.minExponent--; 2151 extendedSemantics.precision += 2; 2152 2153 IEEEFloat VEx = *this; 2154 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2155 assert(fs == opOK && !losesInfo); 2156 IEEEFloat PEx = P; 2157 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2158 assert(fs == opOK && !losesInfo); 2159 2160 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2161 // any fraction. 2162 fs = VEx.add(VEx, rmNearestTiesToEven); 2163 assert(fs == opOK); 2164 2165 if (VEx.compare(PEx) == cmpGreaterThan) { 2166 fs = subtract(P, rmNearestTiesToEven); 2167 assert(fs == opOK); 2168 2169 // Make VEx = this.add(this), but because we have different semantics, we do 2170 // not want to `convert` again, so we just subtract PEx twice (which equals 2171 // to the desired value). 2172 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2173 assert(fs == opOK); 2174 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2175 assert(fs == opOK); 2176 2177 cmpResult result = VEx.compare(PEx); 2178 if (result == cmpGreaterThan || result == cmpEqual) { 2179 fs = subtract(P, rmNearestTiesToEven); 2180 assert(fs == opOK); 2181 } 2182 } 2183 2184 if (isZero()) { 2185 sign = origSign; // IEEE754 requires this 2186 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2187 // But some 8-bit floats only have positive 0. 2188 sign = false; 2189 } 2190 2191 else 2192 sign ^= origSign; 2193 return fs; 2194 } 2195 2196 /* Normalized llvm frem (C fmod). */ 2197 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2198 opStatus fs; 2199 fs = modSpecials(rhs); 2200 unsigned int origSign = sign; 2201 2202 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2203 compareAbsoluteValue(rhs) != cmpLessThan) { 2204 int Exp = ilogb(*this) - ilogb(rhs); 2205 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2206 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2207 // check for it. 2208 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2209 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2210 V.sign = sign; 2211 2212 fs = subtract(V, rmNearestTiesToEven); 2213 assert(fs==opOK); 2214 } 2215 if (isZero()) { 2216 sign = origSign; // fmod requires this 2217 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2218 sign = false; 2219 } 2220 return fs; 2221 } 2222 2223 /* Normalized fused-multiply-add. */ 2224 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2225 const IEEEFloat &addend, 2226 roundingMode rounding_mode) { 2227 opStatus fs; 2228 2229 /* Post-multiplication sign, before addition. */ 2230 sign ^= multiplicand.sign; 2231 2232 /* If and only if all arguments are normal do we need to do an 2233 extended-precision calculation. */ 2234 if (isFiniteNonZero() && 2235 multiplicand.isFiniteNonZero() && 2236 addend.isFinite()) { 2237 lostFraction lost_fraction; 2238 2239 lost_fraction = multiplySignificand(multiplicand, addend); 2240 fs = normalize(rounding_mode, lost_fraction); 2241 if (lost_fraction != lfExactlyZero) 2242 fs = (opStatus) (fs | opInexact); 2243 2244 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2245 positive zero unless rounding to minus infinity, except that 2246 adding two like-signed zeroes gives that zero. */ 2247 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2248 sign = (rounding_mode == rmTowardNegative); 2249 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2250 sign = false; 2251 } 2252 } else { 2253 fs = multiplySpecials(multiplicand); 2254 2255 /* FS can only be opOK or opInvalidOp. There is no more work 2256 to do in the latter case. The IEEE-754R standard says it is 2257 implementation-defined in this case whether, if ADDEND is a 2258 quiet NaN, we raise invalid op; this implementation does so. 2259 2260 If we need to do the addition we can do so with normal 2261 precision. */ 2262 if (fs == opOK) 2263 fs = addOrSubtract(addend, rounding_mode, false); 2264 } 2265 2266 return fs; 2267 } 2268 2269 /* Rounding-mode correct round to integral value. */ 2270 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2271 opStatus fs; 2272 2273 if (isInfinity()) 2274 // [IEEE Std 754-2008 6.1]: 2275 // The behavior of infinity in floating-point arithmetic is derived from the 2276 // limiting cases of real arithmetic with operands of arbitrarily 2277 // large magnitude, when such a limit exists. 2278 // ... 2279 // Operations on infinite operands are usually exact and therefore signal no 2280 // exceptions ... 2281 return opOK; 2282 2283 if (isNaN()) { 2284 if (isSignaling()) { 2285 // [IEEE Std 754-2008 6.2]: 2286 // Under default exception handling, any operation signaling an invalid 2287 // operation exception and for which a floating-point result is to be 2288 // delivered shall deliver a quiet NaN. 2289 makeQuiet(); 2290 // [IEEE Std 754-2008 6.2]: 2291 // Signaling NaNs shall be reserved operands that, under default exception 2292 // handling, signal the invalid operation exception(see 7.2) for every 2293 // general-computational and signaling-computational operation except for 2294 // the conversions described in 5.12. 2295 return opInvalidOp; 2296 } else { 2297 // [IEEE Std 754-2008 6.2]: 2298 // For an operation with quiet NaN inputs, other than maximum and minimum 2299 // operations, if a floating-point result is to be delivered the result 2300 // shall be a quiet NaN which should be one of the input NaNs. 2301 // ... 2302 // Every general-computational and quiet-computational operation involving 2303 // one or more input NaNs, none of them signaling, shall signal no 2304 // exception, except fusedMultiplyAdd might signal the invalid operation 2305 // exception(see 7.2). 2306 return opOK; 2307 } 2308 } 2309 2310 if (isZero()) { 2311 // [IEEE Std 754-2008 6.3]: 2312 // ... the sign of the result of conversions, the quantize operation, the 2313 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2314 // the sign of the first or only operand. 2315 return opOK; 2316 } 2317 2318 // If the exponent is large enough, we know that this value is already 2319 // integral, and the arithmetic below would potentially cause it to saturate 2320 // to +/-Inf. Bail out early instead. 2321 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2322 return opOK; 2323 2324 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2325 // precision of our format, and then subtract it back off again. The choice 2326 // of rounding modes for the addition/subtraction determines the rounding mode 2327 // for our integral rounding as well. 2328 // NOTE: When the input value is negative, we do subtraction followed by 2329 // addition instead. 2330 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2331 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2332 IEEEFloat MagicConstant(*semantics); 2333 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2334 rmNearestTiesToEven); 2335 assert(fs == opOK); 2336 MagicConstant.sign = sign; 2337 2338 // Preserve the input sign so that we can handle the case of zero result 2339 // correctly. 2340 bool inputSign = isNegative(); 2341 2342 fs = add(MagicConstant, rounding_mode); 2343 2344 // Current value and 'MagicConstant' are both integers, so the result of the 2345 // subtraction is always exact according to Sterbenz' lemma. 2346 subtract(MagicConstant, rounding_mode); 2347 2348 // Restore the input sign. 2349 if (inputSign != isNegative()) 2350 changeSign(); 2351 2352 return fs; 2353 } 2354 2355 2356 /* Comparison requires normalized numbers. */ 2357 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2358 cmpResult result; 2359 2360 assert(semantics == rhs.semantics); 2361 2362 switch (PackCategoriesIntoKey(category, rhs.category)) { 2363 default: 2364 llvm_unreachable(nullptr); 2365 2366 case PackCategoriesIntoKey(fcNaN, fcZero): 2367 case PackCategoriesIntoKey(fcNaN, fcNormal): 2368 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2369 case PackCategoriesIntoKey(fcNaN, fcNaN): 2370 case PackCategoriesIntoKey(fcZero, fcNaN): 2371 case PackCategoriesIntoKey(fcNormal, fcNaN): 2372 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2373 return cmpUnordered; 2374 2375 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2376 case PackCategoriesIntoKey(fcInfinity, fcZero): 2377 case PackCategoriesIntoKey(fcNormal, fcZero): 2378 if (sign) 2379 return cmpLessThan; 2380 else 2381 return cmpGreaterThan; 2382 2383 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2384 case PackCategoriesIntoKey(fcZero, fcInfinity): 2385 case PackCategoriesIntoKey(fcZero, fcNormal): 2386 if (rhs.sign) 2387 return cmpGreaterThan; 2388 else 2389 return cmpLessThan; 2390 2391 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2392 if (sign == rhs.sign) 2393 return cmpEqual; 2394 else if (sign) 2395 return cmpLessThan; 2396 else 2397 return cmpGreaterThan; 2398 2399 case PackCategoriesIntoKey(fcZero, fcZero): 2400 return cmpEqual; 2401 2402 case PackCategoriesIntoKey(fcNormal, fcNormal): 2403 break; 2404 } 2405 2406 /* Two normal numbers. Do they have the same sign? */ 2407 if (sign != rhs.sign) { 2408 if (sign) 2409 result = cmpLessThan; 2410 else 2411 result = cmpGreaterThan; 2412 } else { 2413 /* Compare absolute values; invert result if negative. */ 2414 result = compareAbsoluteValue(rhs); 2415 2416 if (sign) { 2417 if (result == cmpLessThan) 2418 result = cmpGreaterThan; 2419 else if (result == cmpGreaterThan) 2420 result = cmpLessThan; 2421 } 2422 } 2423 2424 return result; 2425 } 2426 2427 /// IEEEFloat::convert - convert a value of one floating point type to another. 2428 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2429 /// records whether the transformation lost information, i.e. whether 2430 /// converting the result back to the original type will produce the 2431 /// original value (this is almost the same as return value==fsOK, but there 2432 /// are edge cases where this is not so). 2433 2434 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2435 roundingMode rounding_mode, 2436 bool *losesInfo) { 2437 lostFraction lostFraction; 2438 unsigned int newPartCount, oldPartCount; 2439 opStatus fs; 2440 int shift; 2441 const fltSemantics &fromSemantics = *semantics; 2442 bool is_signaling = isSignaling(); 2443 2444 lostFraction = lfExactlyZero; 2445 newPartCount = partCountForBits(toSemantics.precision + 1); 2446 oldPartCount = partCount(); 2447 shift = toSemantics.precision - fromSemantics.precision; 2448 2449 bool X86SpecialNan = false; 2450 if (&fromSemantics == &semX87DoubleExtended && 2451 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2452 (!(*significandParts() & 0x8000000000000000ULL) || 2453 !(*significandParts() & 0x4000000000000000ULL))) { 2454 // x86 has some unusual NaNs which cannot be represented in any other 2455 // format; note them here. 2456 X86SpecialNan = true; 2457 } 2458 2459 // If this is a truncation of a denormal number, and the target semantics 2460 // has larger exponent range than the source semantics (this can happen 2461 // when truncating from PowerPC double-double to double format), the 2462 // right shift could lose result mantissa bits. Adjust exponent instead 2463 // of performing excessive shift. 2464 // Also do a similar trick in case shifting denormal would produce zero 2465 // significand as this case isn't handled correctly by normalize. 2466 if (shift < 0 && isFiniteNonZero()) { 2467 int omsb = significandMSB() + 1; 2468 int exponentChange = omsb - fromSemantics.precision; 2469 if (exponent + exponentChange < toSemantics.minExponent) 2470 exponentChange = toSemantics.minExponent - exponent; 2471 if (exponentChange < shift) 2472 exponentChange = shift; 2473 if (exponentChange < 0) { 2474 shift -= exponentChange; 2475 exponent += exponentChange; 2476 } else if (omsb <= -shift) { 2477 exponentChange = omsb + shift - 1; // leave at least one bit set 2478 shift -= exponentChange; 2479 exponent += exponentChange; 2480 } 2481 } 2482 2483 // If this is a truncation, perform the shift before we narrow the storage. 2484 if (shift < 0 && (isFiniteNonZero() || 2485 (category == fcNaN && semantics->nonFiniteBehavior != 2486 fltNonfiniteBehavior::NanOnly))) 2487 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2488 2489 // Fix the storage so it can hold to new value. 2490 if (newPartCount > oldPartCount) { 2491 // The new type requires more storage; make it available. 2492 integerPart *newParts; 2493 newParts = new integerPart[newPartCount]; 2494 APInt::tcSet(newParts, 0, newPartCount); 2495 if (isFiniteNonZero() || category==fcNaN) 2496 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2497 freeSignificand(); 2498 significand.parts = newParts; 2499 } else if (newPartCount == 1 && oldPartCount != 1) { 2500 // Switch to built-in storage for a single part. 2501 integerPart newPart = 0; 2502 if (isFiniteNonZero() || category==fcNaN) 2503 newPart = significandParts()[0]; 2504 freeSignificand(); 2505 significand.part = newPart; 2506 } 2507 2508 // Now that we have the right storage, switch the semantics. 2509 semantics = &toSemantics; 2510 2511 // If this is an extension, perform the shift now that the storage is 2512 // available. 2513 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2514 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2515 2516 if (isFiniteNonZero()) { 2517 fs = normalize(rounding_mode, lostFraction); 2518 *losesInfo = (fs != opOK); 2519 } else if (category == fcNaN) { 2520 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2521 *losesInfo = 2522 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2523 makeNaN(false, sign); 2524 return is_signaling ? opInvalidOp : opOK; 2525 } 2526 2527 // If NaN is negative zero, we need to create a new NaN to avoid converting 2528 // NaN to -Inf. 2529 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2530 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2531 makeNaN(false, false); 2532 2533 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2534 2535 // For x87 extended precision, we want to make a NaN, not a special NaN if 2536 // the input wasn't special either. 2537 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2538 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2539 2540 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2541 // This also guarantees that a sNaN does not become Inf on a truncation 2542 // that loses all payload bits. 2543 if (is_signaling) { 2544 makeQuiet(); 2545 fs = opInvalidOp; 2546 } else { 2547 fs = opOK; 2548 } 2549 } else if (category == fcInfinity && 2550 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2551 makeNaN(false, sign); 2552 *losesInfo = true; 2553 fs = opInexact; 2554 } else if (category == fcZero && 2555 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2556 // Negative zero loses info, but positive zero doesn't. 2557 *losesInfo = 2558 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2559 fs = *losesInfo ? opInexact : opOK; 2560 // NaN is negative zero means -0 -> +0, which can lose information 2561 sign = false; 2562 } else { 2563 *losesInfo = false; 2564 fs = opOK; 2565 } 2566 2567 return fs; 2568 } 2569 2570 /* Convert a floating point number to an integer according to the 2571 rounding mode. If the rounded integer value is out of range this 2572 returns an invalid operation exception and the contents of the 2573 destination parts are unspecified. If the rounded value is in 2574 range but the floating point number is not the exact integer, the C 2575 standard doesn't require an inexact exception to be raised. IEEE 2576 854 does require it so we do that. 2577 2578 Note that for conversions to integer type the C standard requires 2579 round-to-zero to always be used. */ 2580 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2581 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2582 roundingMode rounding_mode, bool *isExact) const { 2583 lostFraction lost_fraction; 2584 const integerPart *src; 2585 unsigned int dstPartsCount, truncatedBits; 2586 2587 *isExact = false; 2588 2589 /* Handle the three special cases first. */ 2590 if (category == fcInfinity || category == fcNaN) 2591 return opInvalidOp; 2592 2593 dstPartsCount = partCountForBits(width); 2594 assert(dstPartsCount <= parts.size() && "Integer too big"); 2595 2596 if (category == fcZero) { 2597 APInt::tcSet(parts.data(), 0, dstPartsCount); 2598 // Negative zero can't be represented as an int. 2599 *isExact = !sign; 2600 return opOK; 2601 } 2602 2603 src = significandParts(); 2604 2605 /* Step 1: place our absolute value, with any fraction truncated, in 2606 the destination. */ 2607 if (exponent < 0) { 2608 /* Our absolute value is less than one; truncate everything. */ 2609 APInt::tcSet(parts.data(), 0, dstPartsCount); 2610 /* For exponent -1 the integer bit represents .5, look at that. 2611 For smaller exponents leftmost truncated bit is 0. */ 2612 truncatedBits = semantics->precision -1U - exponent; 2613 } else { 2614 /* We want the most significant (exponent + 1) bits; the rest are 2615 truncated. */ 2616 unsigned int bits = exponent + 1U; 2617 2618 /* Hopelessly large in magnitude? */ 2619 if (bits > width) 2620 return opInvalidOp; 2621 2622 if (bits < semantics->precision) { 2623 /* We truncate (semantics->precision - bits) bits. */ 2624 truncatedBits = semantics->precision - bits; 2625 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2626 } else { 2627 /* We want at least as many bits as are available. */ 2628 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2629 0); 2630 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2631 bits - semantics->precision); 2632 truncatedBits = 0; 2633 } 2634 } 2635 2636 /* Step 2: work out any lost fraction, and increment the absolute 2637 value if we would round away from zero. */ 2638 if (truncatedBits) { 2639 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2640 truncatedBits); 2641 if (lost_fraction != lfExactlyZero && 2642 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2643 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2644 return opInvalidOp; /* Overflow. */ 2645 } 2646 } else { 2647 lost_fraction = lfExactlyZero; 2648 } 2649 2650 /* Step 3: check if we fit in the destination. */ 2651 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2652 2653 if (sign) { 2654 if (!isSigned) { 2655 /* Negative numbers cannot be represented as unsigned. */ 2656 if (omsb != 0) 2657 return opInvalidOp; 2658 } else { 2659 /* It takes omsb bits to represent the unsigned integer value. 2660 We lose a bit for the sign, but care is needed as the 2661 maximally negative integer is a special case. */ 2662 if (omsb == width && 2663 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2664 return opInvalidOp; 2665 2666 /* This case can happen because of rounding. */ 2667 if (omsb > width) 2668 return opInvalidOp; 2669 } 2670 2671 APInt::tcNegate (parts.data(), dstPartsCount); 2672 } else { 2673 if (omsb >= width + !isSigned) 2674 return opInvalidOp; 2675 } 2676 2677 if (lost_fraction == lfExactlyZero) { 2678 *isExact = true; 2679 return opOK; 2680 } else 2681 return opInexact; 2682 } 2683 2684 /* Same as convertToSignExtendedInteger, except we provide 2685 deterministic values in case of an invalid operation exception, 2686 namely zero for NaNs and the minimal or maximal value respectively 2687 for underflow or overflow. 2688 The *isExact output tells whether the result is exact, in the sense 2689 that converting it back to the original floating point type produces 2690 the original value. This is almost equivalent to result==opOK, 2691 except for negative zeroes. 2692 */ 2693 IEEEFloat::opStatus 2694 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2695 unsigned int width, bool isSigned, 2696 roundingMode rounding_mode, bool *isExact) const { 2697 opStatus fs; 2698 2699 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2700 isExact); 2701 2702 if (fs == opInvalidOp) { 2703 unsigned int bits, dstPartsCount; 2704 2705 dstPartsCount = partCountForBits(width); 2706 assert(dstPartsCount <= parts.size() && "Integer too big"); 2707 2708 if (category == fcNaN) 2709 bits = 0; 2710 else if (sign) 2711 bits = isSigned; 2712 else 2713 bits = width - isSigned; 2714 2715 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2716 if (sign && isSigned) 2717 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2718 } 2719 2720 return fs; 2721 } 2722 2723 /* Convert an unsigned integer SRC to a floating point number, 2724 rounding according to ROUNDING_MODE. The sign of the floating 2725 point number is not modified. */ 2726 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2727 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2728 unsigned int omsb, precision, dstCount; 2729 integerPart *dst; 2730 lostFraction lost_fraction; 2731 2732 category = fcNormal; 2733 omsb = APInt::tcMSB(src, srcCount) + 1; 2734 dst = significandParts(); 2735 dstCount = partCount(); 2736 precision = semantics->precision; 2737 2738 /* We want the most significant PRECISION bits of SRC. There may not 2739 be that many; extract what we can. */ 2740 if (precision <= omsb) { 2741 exponent = omsb - 1; 2742 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2743 omsb - precision); 2744 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2745 } else { 2746 exponent = precision - 1; 2747 lost_fraction = lfExactlyZero; 2748 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2749 } 2750 2751 return normalize(rounding_mode, lost_fraction); 2752 } 2753 2754 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2755 roundingMode rounding_mode) { 2756 unsigned int partCount = Val.getNumWords(); 2757 APInt api = Val; 2758 2759 sign = false; 2760 if (isSigned && api.isNegative()) { 2761 sign = true; 2762 api = -api; 2763 } 2764 2765 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2766 } 2767 2768 /* Convert a two's complement integer SRC to a floating point number, 2769 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2770 integer is signed, in which case it must be sign-extended. */ 2771 IEEEFloat::opStatus 2772 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2773 unsigned int srcCount, bool isSigned, 2774 roundingMode rounding_mode) { 2775 opStatus status; 2776 2777 if (isSigned && 2778 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2779 integerPart *copy; 2780 2781 /* If we're signed and negative negate a copy. */ 2782 sign = true; 2783 copy = new integerPart[srcCount]; 2784 APInt::tcAssign(copy, src, srcCount); 2785 APInt::tcNegate(copy, srcCount); 2786 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2787 delete [] copy; 2788 } else { 2789 sign = false; 2790 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2791 } 2792 2793 return status; 2794 } 2795 2796 /* FIXME: should this just take a const APInt reference? */ 2797 IEEEFloat::opStatus 2798 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2799 unsigned int width, bool isSigned, 2800 roundingMode rounding_mode) { 2801 unsigned int partCount = partCountForBits(width); 2802 APInt api = APInt(width, ArrayRef(parts, partCount)); 2803 2804 sign = false; 2805 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2806 sign = true; 2807 api = -api; 2808 } 2809 2810 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2811 } 2812 2813 Expected<IEEEFloat::opStatus> 2814 IEEEFloat::convertFromHexadecimalString(StringRef s, 2815 roundingMode rounding_mode) { 2816 lostFraction lost_fraction = lfExactlyZero; 2817 2818 category = fcNormal; 2819 zeroSignificand(); 2820 exponent = 0; 2821 2822 integerPart *significand = significandParts(); 2823 unsigned partsCount = partCount(); 2824 unsigned bitPos = partsCount * integerPartWidth; 2825 bool computedTrailingFraction = false; 2826 2827 // Skip leading zeroes and any (hexa)decimal point. 2828 StringRef::iterator begin = s.begin(); 2829 StringRef::iterator end = s.end(); 2830 StringRef::iterator dot; 2831 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2832 if (!PtrOrErr) 2833 return PtrOrErr.takeError(); 2834 StringRef::iterator p = *PtrOrErr; 2835 StringRef::iterator firstSignificantDigit = p; 2836 2837 while (p != end) { 2838 integerPart hex_value; 2839 2840 if (*p == '.') { 2841 if (dot != end) 2842 return createError("String contains multiple dots"); 2843 dot = p++; 2844 continue; 2845 } 2846 2847 hex_value = hexDigitValue(*p); 2848 if (hex_value == UINT_MAX) 2849 break; 2850 2851 p++; 2852 2853 // Store the number while we have space. 2854 if (bitPos) { 2855 bitPos -= 4; 2856 hex_value <<= bitPos % integerPartWidth; 2857 significand[bitPos / integerPartWidth] |= hex_value; 2858 } else if (!computedTrailingFraction) { 2859 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2860 if (!FractOrErr) 2861 return FractOrErr.takeError(); 2862 lost_fraction = *FractOrErr; 2863 computedTrailingFraction = true; 2864 } 2865 } 2866 2867 /* Hex floats require an exponent but not a hexadecimal point. */ 2868 if (p == end) 2869 return createError("Hex strings require an exponent"); 2870 if (*p != 'p' && *p != 'P') 2871 return createError("Invalid character in significand"); 2872 if (p == begin) 2873 return createError("Significand has no digits"); 2874 if (dot != end && p - begin == 1) 2875 return createError("Significand has no digits"); 2876 2877 /* Ignore the exponent if we are zero. */ 2878 if (p != firstSignificantDigit) { 2879 int expAdjustment; 2880 2881 /* Implicit hexadecimal point? */ 2882 if (dot == end) 2883 dot = p; 2884 2885 /* Calculate the exponent adjustment implicit in the number of 2886 significant digits. */ 2887 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2888 if (expAdjustment < 0) 2889 expAdjustment++; 2890 expAdjustment = expAdjustment * 4 - 1; 2891 2892 /* Adjust for writing the significand starting at the most 2893 significant nibble. */ 2894 expAdjustment += semantics->precision; 2895 expAdjustment -= partsCount * integerPartWidth; 2896 2897 /* Adjust for the given exponent. */ 2898 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2899 if (!ExpOrErr) 2900 return ExpOrErr.takeError(); 2901 exponent = *ExpOrErr; 2902 } 2903 2904 return normalize(rounding_mode, lost_fraction); 2905 } 2906 2907 IEEEFloat::opStatus 2908 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2909 unsigned sigPartCount, int exp, 2910 roundingMode rounding_mode) { 2911 unsigned int parts, pow5PartCount; 2912 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2913 integerPart pow5Parts[maxPowerOfFiveParts]; 2914 bool isNearest; 2915 2916 isNearest = (rounding_mode == rmNearestTiesToEven || 2917 rounding_mode == rmNearestTiesToAway); 2918 2919 parts = partCountForBits(semantics->precision + 11); 2920 2921 /* Calculate pow(5, abs(exp)). */ 2922 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2923 2924 for (;; parts *= 2) { 2925 opStatus sigStatus, powStatus; 2926 unsigned int excessPrecision, truncatedBits; 2927 2928 calcSemantics.precision = parts * integerPartWidth - 1; 2929 excessPrecision = calcSemantics.precision - semantics->precision; 2930 truncatedBits = excessPrecision; 2931 2932 IEEEFloat decSig(calcSemantics, uninitialized); 2933 decSig.makeZero(sign); 2934 IEEEFloat pow5(calcSemantics); 2935 2936 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2937 rmNearestTiesToEven); 2938 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2939 rmNearestTiesToEven); 2940 /* Add exp, as 10^n = 5^n * 2^n. */ 2941 decSig.exponent += exp; 2942 2943 lostFraction calcLostFraction; 2944 integerPart HUerr, HUdistance; 2945 unsigned int powHUerr; 2946 2947 if (exp >= 0) { 2948 /* multiplySignificand leaves the precision-th bit set to 1. */ 2949 calcLostFraction = decSig.multiplySignificand(pow5); 2950 powHUerr = powStatus != opOK; 2951 } else { 2952 calcLostFraction = decSig.divideSignificand(pow5); 2953 /* Denormal numbers have less precision. */ 2954 if (decSig.exponent < semantics->minExponent) { 2955 excessPrecision += (semantics->minExponent - decSig.exponent); 2956 truncatedBits = excessPrecision; 2957 if (excessPrecision > calcSemantics.precision) 2958 excessPrecision = calcSemantics.precision; 2959 } 2960 /* Extra half-ulp lost in reciprocal of exponent. */ 2961 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2962 } 2963 2964 /* Both multiplySignificand and divideSignificand return the 2965 result with the integer bit set. */ 2966 assert(APInt::tcExtractBit 2967 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2968 2969 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2970 powHUerr); 2971 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2972 excessPrecision, isNearest); 2973 2974 /* Are we guaranteed to round correctly if we truncate? */ 2975 if (HUdistance >= HUerr) { 2976 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2977 calcSemantics.precision - excessPrecision, 2978 excessPrecision); 2979 /* Take the exponent of decSig. If we tcExtract-ed less bits 2980 above we must adjust our exponent to compensate for the 2981 implicit right shift. */ 2982 exponent = (decSig.exponent + semantics->precision 2983 - (calcSemantics.precision - excessPrecision)); 2984 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2985 decSig.partCount(), 2986 truncatedBits); 2987 return normalize(rounding_mode, calcLostFraction); 2988 } 2989 } 2990 } 2991 2992 Expected<IEEEFloat::opStatus> 2993 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 2994 decimalInfo D; 2995 opStatus fs; 2996 2997 /* Scan the text. */ 2998 StringRef::iterator p = str.begin(); 2999 if (Error Err = interpretDecimal(p, str.end(), &D)) 3000 return std::move(Err); 3001 3002 /* Handle the quick cases. First the case of no significant digits, 3003 i.e. zero, and then exponents that are obviously too large or too 3004 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3005 definitely overflows if 3006 3007 (exp - 1) * L >= maxExponent 3008 3009 and definitely underflows to zero where 3010 3011 (exp + 1) * L <= minExponent - precision 3012 3013 With integer arithmetic the tightest bounds for L are 3014 3015 93/28 < L < 196/59 [ numerator <= 256 ] 3016 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3017 */ 3018 3019 // Test if we have a zero number allowing for strings with no null terminators 3020 // and zero decimals with non-zero exponents. 3021 // 3022 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3023 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3024 // be at most one dot. On the other hand, if we have a zero with a non-zero 3025 // exponent, then we know that D.firstSigDigit will be non-numeric. 3026 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3027 category = fcZero; 3028 fs = opOK; 3029 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3030 sign = false; 3031 3032 /* Check whether the normalized exponent is high enough to overflow 3033 max during the log-rebasing in the max-exponent check below. */ 3034 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3035 fs = handleOverflow(rounding_mode); 3036 3037 /* If it wasn't, then it also wasn't high enough to overflow max 3038 during the log-rebasing in the min-exponent check. Check that it 3039 won't overflow min in either check, then perform the min-exponent 3040 check. */ 3041 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3042 (D.normalizedExponent + 1) * 28738 <= 3043 8651 * (semantics->minExponent - (int) semantics->precision)) { 3044 /* Underflow to zero and round. */ 3045 category = fcNormal; 3046 zeroSignificand(); 3047 fs = normalize(rounding_mode, lfLessThanHalf); 3048 3049 /* We can finally safely perform the max-exponent check. */ 3050 } else if ((D.normalizedExponent - 1) * 42039 3051 >= 12655 * semantics->maxExponent) { 3052 /* Overflow and round. */ 3053 fs = handleOverflow(rounding_mode); 3054 } else { 3055 integerPart *decSignificand; 3056 unsigned int partCount; 3057 3058 /* A tight upper bound on number of bits required to hold an 3059 N-digit decimal integer is N * 196 / 59. Allocate enough space 3060 to hold the full significand, and an extra part required by 3061 tcMultiplyPart. */ 3062 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3063 partCount = partCountForBits(1 + 196 * partCount / 59); 3064 decSignificand = new integerPart[partCount + 1]; 3065 partCount = 0; 3066 3067 /* Convert to binary efficiently - we do almost all multiplication 3068 in an integerPart. When this would overflow do we do a single 3069 bignum multiplication, and then revert again to multiplication 3070 in an integerPart. */ 3071 do { 3072 integerPart decValue, val, multiplier; 3073 3074 val = 0; 3075 multiplier = 1; 3076 3077 do { 3078 if (*p == '.') { 3079 p++; 3080 if (p == str.end()) { 3081 break; 3082 } 3083 } 3084 decValue = decDigitValue(*p++); 3085 if (decValue >= 10U) { 3086 delete[] decSignificand; 3087 return createError("Invalid character in significand"); 3088 } 3089 multiplier *= 10; 3090 val = val * 10 + decValue; 3091 /* The maximum number that can be multiplied by ten with any 3092 digit added without overflowing an integerPart. */ 3093 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3094 3095 /* Multiply out the current part. */ 3096 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3097 partCount, partCount + 1, false); 3098 3099 /* If we used another part (likely but not guaranteed), increase 3100 the count. */ 3101 if (decSignificand[partCount]) 3102 partCount++; 3103 } while (p <= D.lastSigDigit); 3104 3105 category = fcNormal; 3106 fs = roundSignificandWithExponent(decSignificand, partCount, 3107 D.exponent, rounding_mode); 3108 3109 delete [] decSignificand; 3110 } 3111 3112 return fs; 3113 } 3114 3115 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3116 const size_t MIN_NAME_SIZE = 3; 3117 3118 if (str.size() < MIN_NAME_SIZE) 3119 return false; 3120 3121 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3122 makeInf(false); 3123 return true; 3124 } 3125 3126 bool IsNegative = str.front() == '-'; 3127 if (IsNegative) { 3128 str = str.drop_front(); 3129 if (str.size() < MIN_NAME_SIZE) 3130 return false; 3131 3132 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3133 makeInf(true); 3134 return true; 3135 } 3136 } 3137 3138 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3139 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3140 if (IsSignaling) { 3141 str = str.drop_front(); 3142 if (str.size() < MIN_NAME_SIZE) 3143 return false; 3144 } 3145 3146 if (str.starts_with("nan") || str.starts_with("NaN")) { 3147 str = str.drop_front(3); 3148 3149 // A NaN without payload. 3150 if (str.empty()) { 3151 makeNaN(IsSignaling, IsNegative); 3152 return true; 3153 } 3154 3155 // Allow the payload to be inside parentheses. 3156 if (str.front() == '(') { 3157 // Parentheses should be balanced (and not empty). 3158 if (str.size() <= 2 || str.back() != ')') 3159 return false; 3160 3161 str = str.slice(1, str.size() - 1); 3162 } 3163 3164 // Determine the payload number's radix. 3165 unsigned Radix = 10; 3166 if (str[0] == '0') { 3167 if (str.size() > 1 && tolower(str[1]) == 'x') { 3168 str = str.drop_front(2); 3169 Radix = 16; 3170 } else 3171 Radix = 8; 3172 } 3173 3174 // Parse the payload and make the NaN. 3175 APInt Payload; 3176 if (!str.getAsInteger(Radix, Payload)) { 3177 makeNaN(IsSignaling, IsNegative, &Payload); 3178 return true; 3179 } 3180 } 3181 3182 return false; 3183 } 3184 3185 Expected<IEEEFloat::opStatus> 3186 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3187 if (str.empty()) 3188 return createError("Invalid string length"); 3189 3190 // Handle special cases. 3191 if (convertFromStringSpecials(str)) 3192 return opOK; 3193 3194 /* Handle a leading minus sign. */ 3195 StringRef::iterator p = str.begin(); 3196 size_t slen = str.size(); 3197 sign = *p == '-' ? 1 : 0; 3198 if (*p == '-' || *p == '+') { 3199 p++; 3200 slen--; 3201 if (!slen) 3202 return createError("String has no digits"); 3203 } 3204 3205 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3206 if (slen == 2) 3207 return createError("Invalid string"); 3208 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3209 rounding_mode); 3210 } 3211 3212 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3213 } 3214 3215 /* Write out a hexadecimal representation of the floating point value 3216 to DST, which must be of sufficient size, in the C99 form 3217 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3218 excluding the terminating NUL. 3219 3220 If UPPERCASE, the output is in upper case, otherwise in lower case. 3221 3222 HEXDIGITS digits appear altogether, rounding the value if 3223 necessary. If HEXDIGITS is 0, the minimal precision to display the 3224 number precisely is used instead. If nothing would appear after 3225 the decimal point it is suppressed. 3226 3227 The decimal exponent is always printed and has at least one digit. 3228 Zero values display an exponent of zero. Infinities and NaNs 3229 appear as "infinity" or "nan" respectively. 3230 3231 The above rules are as specified by C99. There is ambiguity about 3232 what the leading hexadecimal digit should be. This implementation 3233 uses whatever is necessary so that the exponent is displayed as 3234 stored. This implies the exponent will fall within the IEEE format 3235 range, and the leading hexadecimal digit will be 0 (for denormals), 3236 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3237 any other digits zero). 3238 */ 3239 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3240 bool upperCase, 3241 roundingMode rounding_mode) const { 3242 char *p; 3243 3244 p = dst; 3245 if (sign) 3246 *dst++ = '-'; 3247 3248 switch (category) { 3249 case fcInfinity: 3250 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3251 dst += sizeof infinityL - 1; 3252 break; 3253 3254 case fcNaN: 3255 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3256 dst += sizeof NaNU - 1; 3257 break; 3258 3259 case fcZero: 3260 *dst++ = '0'; 3261 *dst++ = upperCase ? 'X': 'x'; 3262 *dst++ = '0'; 3263 if (hexDigits > 1) { 3264 *dst++ = '.'; 3265 memset (dst, '0', hexDigits - 1); 3266 dst += hexDigits - 1; 3267 } 3268 *dst++ = upperCase ? 'P': 'p'; 3269 *dst++ = '0'; 3270 break; 3271 3272 case fcNormal: 3273 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3274 break; 3275 } 3276 3277 *dst = 0; 3278 3279 return static_cast<unsigned int>(dst - p); 3280 } 3281 3282 /* Does the hard work of outputting the correctly rounded hexadecimal 3283 form of a normal floating point number with the specified number of 3284 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3285 digits necessary to print the value precisely is output. */ 3286 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3287 bool upperCase, 3288 roundingMode rounding_mode) const { 3289 unsigned int count, valueBits, shift, partsCount, outputDigits; 3290 const char *hexDigitChars; 3291 const integerPart *significand; 3292 char *p; 3293 bool roundUp; 3294 3295 *dst++ = '0'; 3296 *dst++ = upperCase ? 'X': 'x'; 3297 3298 roundUp = false; 3299 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3300 3301 significand = significandParts(); 3302 partsCount = partCount(); 3303 3304 /* +3 because the first digit only uses the single integer bit, so 3305 we have 3 virtual zero most-significant-bits. */ 3306 valueBits = semantics->precision + 3; 3307 shift = integerPartWidth - valueBits % integerPartWidth; 3308 3309 /* The natural number of digits required ignoring trailing 3310 insignificant zeroes. */ 3311 outputDigits = (valueBits - significandLSB () + 3) / 4; 3312 3313 /* hexDigits of zero means use the required number for the 3314 precision. Otherwise, see if we are truncating. If we are, 3315 find out if we need to round away from zero. */ 3316 if (hexDigits) { 3317 if (hexDigits < outputDigits) { 3318 /* We are dropping non-zero bits, so need to check how to round. 3319 "bits" is the number of dropped bits. */ 3320 unsigned int bits; 3321 lostFraction fraction; 3322 3323 bits = valueBits - hexDigits * 4; 3324 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3325 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3326 } 3327 outputDigits = hexDigits; 3328 } 3329 3330 /* Write the digits consecutively, and start writing in the location 3331 of the hexadecimal point. We move the most significant digit 3332 left and add the hexadecimal point later. */ 3333 p = ++dst; 3334 3335 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3336 3337 while (outputDigits && count) { 3338 integerPart part; 3339 3340 /* Put the most significant integerPartWidth bits in "part". */ 3341 if (--count == partsCount) 3342 part = 0; /* An imaginary higher zero part. */ 3343 else 3344 part = significand[count] << shift; 3345 3346 if (count && shift) 3347 part |= significand[count - 1] >> (integerPartWidth - shift); 3348 3349 /* Convert as much of "part" to hexdigits as we can. */ 3350 unsigned int curDigits = integerPartWidth / 4; 3351 3352 if (curDigits > outputDigits) 3353 curDigits = outputDigits; 3354 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3355 outputDigits -= curDigits; 3356 } 3357 3358 if (roundUp) { 3359 char *q = dst; 3360 3361 /* Note that hexDigitChars has a trailing '0'. */ 3362 do { 3363 q--; 3364 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3365 } while (*q == '0'); 3366 assert(q >= p); 3367 } else { 3368 /* Add trailing zeroes. */ 3369 memset (dst, '0', outputDigits); 3370 dst += outputDigits; 3371 } 3372 3373 /* Move the most significant digit to before the point, and if there 3374 is something after the decimal point add it. This must come 3375 after rounding above. */ 3376 p[-1] = p[0]; 3377 if (dst -1 == p) 3378 dst--; 3379 else 3380 p[0] = '.'; 3381 3382 /* Finally output the exponent. */ 3383 *dst++ = upperCase ? 'P': 'p'; 3384 3385 return writeSignedDecimal (dst, exponent); 3386 } 3387 3388 hash_code hash_value(const IEEEFloat &Arg) { 3389 if (!Arg.isFiniteNonZero()) 3390 return hash_combine((uint8_t)Arg.category, 3391 // NaN has no sign, fix it at zero. 3392 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3393 Arg.semantics->precision); 3394 3395 // Normal floats need their exponent and significand hashed. 3396 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3397 Arg.semantics->precision, Arg.exponent, 3398 hash_combine_range( 3399 Arg.significandParts(), 3400 Arg.significandParts() + Arg.partCount())); 3401 } 3402 3403 // Conversion from APFloat to/from host float/double. It may eventually be 3404 // possible to eliminate these and have everybody deal with APFloats, but that 3405 // will take a while. This approach will not easily extend to long double. 3406 // Current implementation requires integerPartWidth==64, which is correct at 3407 // the moment but could be made more general. 3408 3409 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3410 // the actual IEEE respresentations. We compensate for that here. 3411 3412 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3413 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3414 assert(partCount()==2); 3415 3416 uint64_t myexponent, mysignificand; 3417 3418 if (isFiniteNonZero()) { 3419 myexponent = exponent+16383; //bias 3420 mysignificand = significandParts()[0]; 3421 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3422 myexponent = 0; // denormal 3423 } else if (category==fcZero) { 3424 myexponent = 0; 3425 mysignificand = 0; 3426 } else if (category==fcInfinity) { 3427 myexponent = 0x7fff; 3428 mysignificand = 0x8000000000000000ULL; 3429 } else { 3430 assert(category == fcNaN && "Unknown category"); 3431 myexponent = 0x7fff; 3432 mysignificand = significandParts()[0]; 3433 } 3434 3435 uint64_t words[2]; 3436 words[0] = mysignificand; 3437 words[1] = ((uint64_t)(sign & 1) << 15) | 3438 (myexponent & 0x7fffLL); 3439 return APInt(80, words); 3440 } 3441 3442 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3443 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3444 assert(partCount()==2); 3445 3446 uint64_t words[2]; 3447 opStatus fs; 3448 bool losesInfo; 3449 3450 // Convert number to double. To avoid spurious underflows, we re- 3451 // normalize against the "double" minExponent first, and only *then* 3452 // truncate the mantissa. The result of that second conversion 3453 // may be inexact, but should never underflow. 3454 // Declare fltSemantics before APFloat that uses it (and 3455 // saves pointer to it) to ensure correct destruction order. 3456 fltSemantics extendedSemantics = *semantics; 3457 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3458 IEEEFloat extended(*this); 3459 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3460 assert(fs == opOK && !losesInfo); 3461 (void)fs; 3462 3463 IEEEFloat u(extended); 3464 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3465 assert(fs == opOK || fs == opInexact); 3466 (void)fs; 3467 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3468 3469 // If conversion was exact or resulted in a special case, we're done; 3470 // just set the second double to zero. Otherwise, re-convert back to 3471 // the extended format and compute the difference. This now should 3472 // convert exactly to double. 3473 if (u.isFiniteNonZero() && losesInfo) { 3474 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3475 assert(fs == opOK && !losesInfo); 3476 (void)fs; 3477 3478 IEEEFloat v(extended); 3479 v.subtract(u, rmNearestTiesToEven); 3480 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3481 assert(fs == opOK && !losesInfo); 3482 (void)fs; 3483 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3484 } else { 3485 words[1] = 0; 3486 } 3487 3488 return APInt(128, words); 3489 } 3490 3491 template <const fltSemantics &S> 3492 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3493 assert(semantics == &S); 3494 3495 constexpr int bias = -(S.minExponent - 1); 3496 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3497 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3498 constexpr integerPart integer_bit = 3499 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3500 constexpr uint64_t significand_mask = integer_bit - 1; 3501 constexpr unsigned int exponent_bits = 3502 S.sizeInBits - 1 - trailing_significand_bits; 3503 static_assert(exponent_bits < 64); 3504 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3505 3506 uint64_t myexponent; 3507 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3508 mysignificand; 3509 3510 if (isFiniteNonZero()) { 3511 myexponent = exponent + bias; 3512 std::copy_n(significandParts(), mysignificand.size(), 3513 mysignificand.begin()); 3514 if (myexponent == 1 && 3515 !(significandParts()[integer_bit_part] & integer_bit)) 3516 myexponent = 0; // denormal 3517 } else if (category == fcZero) { 3518 myexponent = ::exponentZero(S) + bias; 3519 mysignificand.fill(0); 3520 } else if (category == fcInfinity) { 3521 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 3522 llvm_unreachable("semantics don't support inf!"); 3523 } 3524 myexponent = ::exponentInf(S) + bias; 3525 mysignificand.fill(0); 3526 } else { 3527 assert(category == fcNaN && "Unknown category!"); 3528 myexponent = ::exponentNaN(S) + bias; 3529 std::copy_n(significandParts(), mysignificand.size(), 3530 mysignificand.begin()); 3531 } 3532 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3533 auto words_iter = 3534 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3535 if constexpr (significand_mask != 0) { 3536 // Clear the integer bit. 3537 words[mysignificand.size() - 1] &= significand_mask; 3538 } 3539 std::fill(words_iter, words.end(), uint64_t{0}); 3540 constexpr size_t last_word = words.size() - 1; 3541 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3542 << ((S.sizeInBits - 1) % 64); 3543 words[last_word] |= shifted_sign; 3544 uint64_t shifted_exponent = (myexponent & exponent_mask) 3545 << (trailing_significand_bits % 64); 3546 words[last_word] |= shifted_exponent; 3547 if constexpr (last_word == 0) { 3548 return APInt(S.sizeInBits, words[0]); 3549 } 3550 return APInt(S.sizeInBits, words); 3551 } 3552 3553 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3554 assert(partCount() == 2); 3555 return convertIEEEFloatToAPInt<semIEEEquad>(); 3556 } 3557 3558 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3559 assert(partCount()==1); 3560 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3561 } 3562 3563 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3564 assert(partCount()==1); 3565 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3566 } 3567 3568 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3569 assert(partCount() == 1); 3570 return convertIEEEFloatToAPInt<semBFloat>(); 3571 } 3572 3573 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3574 assert(partCount()==1); 3575 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3576 } 3577 3578 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3579 assert(partCount() == 1); 3580 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3581 } 3582 3583 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3584 assert(partCount() == 1); 3585 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3586 } 3587 3588 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3589 assert(partCount() == 1); 3590 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3591 } 3592 3593 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3594 assert(partCount() == 1); 3595 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3596 } 3597 3598 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3599 assert(partCount() == 1); 3600 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3601 } 3602 3603 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3604 assert(partCount() == 1); 3605 return convertIEEEFloatToAPInt<semFloatTF32>(); 3606 } 3607 3608 // This function creates an APInt that is just a bit map of the floating 3609 // point constant as it would appear in memory. It is not a conversion, 3610 // and treating the result as a normal integer is unlikely to be useful. 3611 3612 APInt IEEEFloat::bitcastToAPInt() const { 3613 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3614 return convertHalfAPFloatToAPInt(); 3615 3616 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3617 return convertBFloatAPFloatToAPInt(); 3618 3619 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3620 return convertFloatAPFloatToAPInt(); 3621 3622 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3623 return convertDoubleAPFloatToAPInt(); 3624 3625 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3626 return convertQuadrupleAPFloatToAPInt(); 3627 3628 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3629 return convertPPCDoubleDoubleAPFloatToAPInt(); 3630 3631 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3632 return convertFloat8E5M2APFloatToAPInt(); 3633 3634 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3635 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3636 3637 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3638 return convertFloat8E4M3FNAPFloatToAPInt(); 3639 3640 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3641 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3642 3643 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3644 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3645 3646 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3647 return convertFloatTF32APFloatToAPInt(); 3648 3649 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3650 "unknown format!"); 3651 return convertF80LongDoubleAPFloatToAPInt(); 3652 } 3653 3654 float IEEEFloat::convertToFloat() const { 3655 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3656 "Float semantics are not IEEEsingle"); 3657 APInt api = bitcastToAPInt(); 3658 return api.bitsToFloat(); 3659 } 3660 3661 double IEEEFloat::convertToDouble() const { 3662 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3663 "Float semantics are not IEEEdouble"); 3664 APInt api = bitcastToAPInt(); 3665 return api.bitsToDouble(); 3666 } 3667 3668 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3669 /// does not support these bit patterns: 3670 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3671 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3672 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3673 /// exponent = 0, integer bit 1 ("pseudodenormal") 3674 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3675 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3676 uint64_t i1 = api.getRawData()[0]; 3677 uint64_t i2 = api.getRawData()[1]; 3678 uint64_t myexponent = (i2 & 0x7fff); 3679 uint64_t mysignificand = i1; 3680 uint8_t myintegerbit = mysignificand >> 63; 3681 3682 initialize(&semX87DoubleExtended); 3683 assert(partCount()==2); 3684 3685 sign = static_cast<unsigned int>(i2>>15); 3686 if (myexponent == 0 && mysignificand == 0) { 3687 makeZero(sign); 3688 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3689 makeInf(sign); 3690 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3691 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3692 category = fcNaN; 3693 exponent = exponentNaN(); 3694 significandParts()[0] = mysignificand; 3695 significandParts()[1] = 0; 3696 } else { 3697 category = fcNormal; 3698 exponent = myexponent - 16383; 3699 significandParts()[0] = mysignificand; 3700 significandParts()[1] = 0; 3701 if (myexponent==0) // denormal 3702 exponent = -16382; 3703 } 3704 } 3705 3706 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3707 uint64_t i1 = api.getRawData()[0]; 3708 uint64_t i2 = api.getRawData()[1]; 3709 opStatus fs; 3710 bool losesInfo; 3711 3712 // Get the first double and convert to our format. 3713 initFromDoubleAPInt(APInt(64, i1)); 3714 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3715 assert(fs == opOK && !losesInfo); 3716 (void)fs; 3717 3718 // Unless we have a special case, add in second double. 3719 if (isFiniteNonZero()) { 3720 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3721 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3722 assert(fs == opOK && !losesInfo); 3723 (void)fs; 3724 3725 add(v, rmNearestTiesToEven); 3726 } 3727 } 3728 3729 template <const fltSemantics &S> 3730 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3731 assert(api.getBitWidth() == S.sizeInBits); 3732 constexpr integerPart integer_bit = integerPart{1} 3733 << ((S.precision - 1) % integerPartWidth); 3734 constexpr uint64_t significand_mask = integer_bit - 1; 3735 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3736 constexpr unsigned int stored_significand_parts = 3737 partCountForBits(trailing_significand_bits); 3738 constexpr unsigned int exponent_bits = 3739 S.sizeInBits - 1 - trailing_significand_bits; 3740 static_assert(exponent_bits < 64); 3741 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3742 constexpr int bias = -(S.minExponent - 1); 3743 3744 // Copy the bits of the significand. We need to clear out the exponent and 3745 // sign bit in the last word. 3746 std::array<integerPart, stored_significand_parts> mysignificand; 3747 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3748 if constexpr (significand_mask != 0) { 3749 mysignificand[mysignificand.size() - 1] &= significand_mask; 3750 } 3751 3752 // We assume the last word holds the sign bit, the exponent, and potentially 3753 // some of the trailing significand field. 3754 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3755 uint64_t myexponent = 3756 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3757 3758 initialize(&S); 3759 assert(partCount() == mysignificand.size()); 3760 3761 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3762 3763 bool all_zero_significand = 3764 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3765 3766 bool is_zero = myexponent == 0 && all_zero_significand; 3767 3768 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3769 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3770 makeInf(sign); 3771 return; 3772 } 3773 } 3774 3775 bool is_nan = false; 3776 3777 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3778 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3779 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3780 bool all_ones_significand = 3781 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3782 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3783 (!significand_mask || 3784 mysignificand[mysignificand.size() - 1] == significand_mask); 3785 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3786 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3787 is_nan = is_zero && sign; 3788 } 3789 3790 if (is_nan) { 3791 category = fcNaN; 3792 exponent = ::exponentNaN(S); 3793 std::copy_n(mysignificand.begin(), mysignificand.size(), 3794 significandParts()); 3795 return; 3796 } 3797 3798 if (is_zero) { 3799 makeZero(sign); 3800 return; 3801 } 3802 3803 category = fcNormal; 3804 exponent = myexponent - bias; 3805 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3806 if (myexponent == 0) // denormal 3807 exponent = S.minExponent; 3808 else 3809 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3810 } 3811 3812 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3813 initFromIEEEAPInt<semIEEEquad>(api); 3814 } 3815 3816 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3817 initFromIEEEAPInt<semIEEEdouble>(api); 3818 } 3819 3820 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3821 initFromIEEEAPInt<semIEEEsingle>(api); 3822 } 3823 3824 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3825 initFromIEEEAPInt<semBFloat>(api); 3826 } 3827 3828 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3829 initFromIEEEAPInt<semIEEEhalf>(api); 3830 } 3831 3832 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3833 initFromIEEEAPInt<semFloat8E5M2>(api); 3834 } 3835 3836 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3837 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3838 } 3839 3840 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3841 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3842 } 3843 3844 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3845 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3846 } 3847 3848 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3849 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3850 } 3851 3852 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3853 initFromIEEEAPInt<semFloatTF32>(api); 3854 } 3855 3856 /// Treat api as containing the bits of a floating point number. 3857 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3858 assert(api.getBitWidth() == Sem->sizeInBits); 3859 if (Sem == &semIEEEhalf) 3860 return initFromHalfAPInt(api); 3861 if (Sem == &semBFloat) 3862 return initFromBFloatAPInt(api); 3863 if (Sem == &semIEEEsingle) 3864 return initFromFloatAPInt(api); 3865 if (Sem == &semIEEEdouble) 3866 return initFromDoubleAPInt(api); 3867 if (Sem == &semX87DoubleExtended) 3868 return initFromF80LongDoubleAPInt(api); 3869 if (Sem == &semIEEEquad) 3870 return initFromQuadrupleAPInt(api); 3871 if (Sem == &semPPCDoubleDoubleLegacy) 3872 return initFromPPCDoubleDoubleAPInt(api); 3873 if (Sem == &semFloat8E5M2) 3874 return initFromFloat8E5M2APInt(api); 3875 if (Sem == &semFloat8E5M2FNUZ) 3876 return initFromFloat8E5M2FNUZAPInt(api); 3877 if (Sem == &semFloat8E4M3FN) 3878 return initFromFloat8E4M3FNAPInt(api); 3879 if (Sem == &semFloat8E4M3FNUZ) 3880 return initFromFloat8E4M3FNUZAPInt(api); 3881 if (Sem == &semFloat8E4M3B11FNUZ) 3882 return initFromFloat8E4M3B11FNUZAPInt(api); 3883 if (Sem == &semFloatTF32) 3884 return initFromFloatTF32APInt(api); 3885 3886 llvm_unreachable(nullptr); 3887 } 3888 3889 /// Make this number the largest magnitude normal number in the given 3890 /// semantics. 3891 void IEEEFloat::makeLargest(bool Negative) { 3892 // We want (in interchange format): 3893 // sign = {Negative} 3894 // exponent = 1..10 3895 // significand = 1..1 3896 category = fcNormal; 3897 sign = Negative; 3898 exponent = semantics->maxExponent; 3899 3900 // Use memset to set all but the highest integerPart to all ones. 3901 integerPart *significand = significandParts(); 3902 unsigned PartCount = partCount(); 3903 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3904 3905 // Set the high integerPart especially setting all unused top bits for 3906 // internal consistency. 3907 const unsigned NumUnusedHighBits = 3908 PartCount*integerPartWidth - semantics->precision; 3909 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3910 ? (~integerPart(0) >> NumUnusedHighBits) 3911 : 0; 3912 3913 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 3914 semantics->nanEncoding == fltNanEncoding::AllOnes) 3915 significand[0] &= ~integerPart(1); 3916 } 3917 3918 /// Make this number the smallest magnitude denormal number in the given 3919 /// semantics. 3920 void IEEEFloat::makeSmallest(bool Negative) { 3921 // We want (in interchange format): 3922 // sign = {Negative} 3923 // exponent = 0..0 3924 // significand = 0..01 3925 category = fcNormal; 3926 sign = Negative; 3927 exponent = semantics->minExponent; 3928 APInt::tcSet(significandParts(), 1, partCount()); 3929 } 3930 3931 void IEEEFloat::makeSmallestNormalized(bool Negative) { 3932 // We want (in interchange format): 3933 // sign = {Negative} 3934 // exponent = 0..0 3935 // significand = 10..0 3936 3937 category = fcNormal; 3938 zeroSignificand(); 3939 sign = Negative; 3940 exponent = semantics->minExponent; 3941 APInt::tcSetBit(significandParts(), semantics->precision - 1); 3942 } 3943 3944 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 3945 initFromAPInt(&Sem, API); 3946 } 3947 3948 IEEEFloat::IEEEFloat(float f) { 3949 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 3950 } 3951 3952 IEEEFloat::IEEEFloat(double d) { 3953 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 3954 } 3955 3956 namespace { 3957 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3958 Buffer.append(Str.begin(), Str.end()); 3959 } 3960 3961 /// Removes data from the given significand until it is no more 3962 /// precise than is required for the desired precision. 3963 void AdjustToPrecision(APInt &significand, 3964 int &exp, unsigned FormatPrecision) { 3965 unsigned bits = significand.getActiveBits(); 3966 3967 // 196/59 is a very slight overestimate of lg_2(10). 3968 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3969 3970 if (bits <= bitsRequired) return; 3971 3972 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3973 if (!tensRemovable) return; 3974 3975 exp += tensRemovable; 3976 3977 APInt divisor(significand.getBitWidth(), 1); 3978 APInt powten(significand.getBitWidth(), 10); 3979 while (true) { 3980 if (tensRemovable & 1) 3981 divisor *= powten; 3982 tensRemovable >>= 1; 3983 if (!tensRemovable) break; 3984 powten *= powten; 3985 } 3986 3987 significand = significand.udiv(divisor); 3988 3989 // Truncate the significand down to its active bit count. 3990 significand = significand.trunc(significand.getActiveBits()); 3991 } 3992 3993 3994 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3995 int &exp, unsigned FormatPrecision) { 3996 unsigned N = buffer.size(); 3997 if (N <= FormatPrecision) return; 3998 3999 // The most significant figures are the last ones in the buffer. 4000 unsigned FirstSignificant = N - FormatPrecision; 4001 4002 // Round. 4003 // FIXME: this probably shouldn't use 'round half up'. 4004 4005 // Rounding down is just a truncation, except we also want to drop 4006 // trailing zeros from the new result. 4007 if (buffer[FirstSignificant - 1] < '5') { 4008 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4009 FirstSignificant++; 4010 4011 exp += FirstSignificant; 4012 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4013 return; 4014 } 4015 4016 // Rounding up requires a decimal add-with-carry. If we continue 4017 // the carry, the newly-introduced zeros will just be truncated. 4018 for (unsigned I = FirstSignificant; I != N; ++I) { 4019 if (buffer[I] == '9') { 4020 FirstSignificant++; 4021 } else { 4022 buffer[I]++; 4023 break; 4024 } 4025 } 4026 4027 // If we carried through, we have exactly one digit of precision. 4028 if (FirstSignificant == N) { 4029 exp += FirstSignificant; 4030 buffer.clear(); 4031 buffer.push_back('1'); 4032 return; 4033 } 4034 4035 exp += FirstSignificant; 4036 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4037 } 4038 } // namespace 4039 4040 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4041 unsigned FormatMaxPadding, bool TruncateZero) const { 4042 switch (category) { 4043 case fcInfinity: 4044 if (isNegative()) 4045 return append(Str, "-Inf"); 4046 else 4047 return append(Str, "+Inf"); 4048 4049 case fcNaN: return append(Str, "NaN"); 4050 4051 case fcZero: 4052 if (isNegative()) 4053 Str.push_back('-'); 4054 4055 if (!FormatMaxPadding) { 4056 if (TruncateZero) 4057 append(Str, "0.0E+0"); 4058 else { 4059 append(Str, "0.0"); 4060 if (FormatPrecision > 1) 4061 Str.append(FormatPrecision - 1, '0'); 4062 append(Str, "e+00"); 4063 } 4064 } else 4065 Str.push_back('0'); 4066 return; 4067 4068 case fcNormal: 4069 break; 4070 } 4071 4072 if (isNegative()) 4073 Str.push_back('-'); 4074 4075 // Decompose the number into an APInt and an exponent. 4076 int exp = exponent - ((int) semantics->precision - 1); 4077 APInt significand( 4078 semantics->precision, 4079 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4080 4081 // Set FormatPrecision if zero. We want to do this before we 4082 // truncate trailing zeros, as those are part of the precision. 4083 if (!FormatPrecision) { 4084 // We use enough digits so the number can be round-tripped back to an 4085 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4086 // Accurately" by Steele and White. 4087 // FIXME: Using a formula based purely on the precision is conservative; 4088 // we can print fewer digits depending on the actual value being printed. 4089 4090 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4091 FormatPrecision = 2 + semantics->precision * 59 / 196; 4092 } 4093 4094 // Ignore trailing binary zeros. 4095 int trailingZeros = significand.countr_zero(); 4096 exp += trailingZeros; 4097 significand.lshrInPlace(trailingZeros); 4098 4099 // Change the exponent from 2^e to 10^e. 4100 if (exp == 0) { 4101 // Nothing to do. 4102 } else if (exp > 0) { 4103 // Just shift left. 4104 significand = significand.zext(semantics->precision + exp); 4105 significand <<= exp; 4106 exp = 0; 4107 } else { /* exp < 0 */ 4108 int texp = -exp; 4109 4110 // We transform this using the identity: 4111 // (N)(2^-e) == (N)(5^e)(10^-e) 4112 // This means we have to multiply N (the significand) by 5^e. 4113 // To avoid overflow, we have to operate on numbers large 4114 // enough to store N * 5^e: 4115 // log2(N * 5^e) == log2(N) + e * log2(5) 4116 // <= semantics->precision + e * 137 / 59 4117 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4118 4119 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 4120 4121 // Multiply significand by 5^e. 4122 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4123 significand = significand.zext(precision); 4124 APInt five_to_the_i(precision, 5); 4125 while (true) { 4126 if (texp & 1) significand *= five_to_the_i; 4127 4128 texp >>= 1; 4129 if (!texp) break; 4130 five_to_the_i *= five_to_the_i; 4131 } 4132 } 4133 4134 AdjustToPrecision(significand, exp, FormatPrecision); 4135 4136 SmallVector<char, 256> buffer; 4137 4138 // Fill the buffer. 4139 unsigned precision = significand.getBitWidth(); 4140 if (precision < 4) { 4141 // We need enough precision to store the value 10. 4142 precision = 4; 4143 significand = significand.zext(precision); 4144 } 4145 APInt ten(precision, 10); 4146 APInt digit(precision, 0); 4147 4148 bool inTrail = true; 4149 while (significand != 0) { 4150 // digit <- significand % 10 4151 // significand <- significand / 10 4152 APInt::udivrem(significand, ten, significand, digit); 4153 4154 unsigned d = digit.getZExtValue(); 4155 4156 // Drop trailing zeros. 4157 if (inTrail && !d) exp++; 4158 else { 4159 buffer.push_back((char) ('0' + d)); 4160 inTrail = false; 4161 } 4162 } 4163 4164 assert(!buffer.empty() && "no characters in buffer!"); 4165 4166 // Drop down to FormatPrecision. 4167 // TODO: don't do more precise calculations above than are required. 4168 AdjustToPrecision(buffer, exp, FormatPrecision); 4169 4170 unsigned NDigits = buffer.size(); 4171 4172 // Check whether we should use scientific notation. 4173 bool FormatScientific; 4174 if (!FormatMaxPadding) 4175 FormatScientific = true; 4176 else { 4177 if (exp >= 0) { 4178 // 765e3 --> 765000 4179 // ^^^ 4180 // But we shouldn't make the number look more precise than it is. 4181 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4182 NDigits + (unsigned) exp > FormatPrecision); 4183 } else { 4184 // Power of the most significant digit. 4185 int MSD = exp + (int) (NDigits - 1); 4186 if (MSD >= 0) { 4187 // 765e-2 == 7.65 4188 FormatScientific = false; 4189 } else { 4190 // 765e-5 == 0.00765 4191 // ^ ^^ 4192 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4193 } 4194 } 4195 } 4196 4197 // Scientific formatting is pretty straightforward. 4198 if (FormatScientific) { 4199 exp += (NDigits - 1); 4200 4201 Str.push_back(buffer[NDigits-1]); 4202 Str.push_back('.'); 4203 if (NDigits == 1 && TruncateZero) 4204 Str.push_back('0'); 4205 else 4206 for (unsigned I = 1; I != NDigits; ++I) 4207 Str.push_back(buffer[NDigits-1-I]); 4208 // Fill with zeros up to FormatPrecision. 4209 if (!TruncateZero && FormatPrecision > NDigits - 1) 4210 Str.append(FormatPrecision - NDigits + 1, '0'); 4211 // For !TruncateZero we use lower 'e'. 4212 Str.push_back(TruncateZero ? 'E' : 'e'); 4213 4214 Str.push_back(exp >= 0 ? '+' : '-'); 4215 if (exp < 0) exp = -exp; 4216 SmallVector<char, 6> expbuf; 4217 do { 4218 expbuf.push_back((char) ('0' + (exp % 10))); 4219 exp /= 10; 4220 } while (exp); 4221 // Exponent always at least two digits if we do not truncate zeros. 4222 if (!TruncateZero && expbuf.size() < 2) 4223 expbuf.push_back('0'); 4224 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4225 Str.push_back(expbuf[E-1-I]); 4226 return; 4227 } 4228 4229 // Non-scientific, positive exponents. 4230 if (exp >= 0) { 4231 for (unsigned I = 0; I != NDigits; ++I) 4232 Str.push_back(buffer[NDigits-1-I]); 4233 for (unsigned I = 0; I != (unsigned) exp; ++I) 4234 Str.push_back('0'); 4235 return; 4236 } 4237 4238 // Non-scientific, negative exponents. 4239 4240 // The number of digits to the left of the decimal point. 4241 int NWholeDigits = exp + (int) NDigits; 4242 4243 unsigned I = 0; 4244 if (NWholeDigits > 0) { 4245 for (; I != (unsigned) NWholeDigits; ++I) 4246 Str.push_back(buffer[NDigits-I-1]); 4247 Str.push_back('.'); 4248 } else { 4249 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4250 4251 Str.push_back('0'); 4252 Str.push_back('.'); 4253 for (unsigned Z = 1; Z != NZeros; ++Z) 4254 Str.push_back('0'); 4255 } 4256 4257 for (; I != NDigits; ++I) 4258 Str.push_back(buffer[NDigits-I-1]); 4259 } 4260 4261 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4262 // Special floats and denormals have no exact inverse. 4263 if (!isFiniteNonZero()) 4264 return false; 4265 4266 // Check that the number is a power of two by making sure that only the 4267 // integer bit is set in the significand. 4268 if (significandLSB() != semantics->precision - 1) 4269 return false; 4270 4271 // Get the inverse. 4272 IEEEFloat reciprocal(*semantics, 1ULL); 4273 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4274 return false; 4275 4276 // Avoid multiplication with a denormal, it is not safe on all platforms and 4277 // may be slower than a normal division. 4278 if (reciprocal.isDenormal()) 4279 return false; 4280 4281 assert(reciprocal.isFiniteNonZero() && 4282 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4283 4284 if (inv) 4285 *inv = APFloat(reciprocal, *semantics); 4286 4287 return true; 4288 } 4289 4290 int IEEEFloat::getExactLog2Abs() const { 4291 if (!isFinite() || isZero()) 4292 return INT_MIN; 4293 4294 const integerPart *Parts = significandParts(); 4295 const int PartCount = partCountForBits(semantics->precision); 4296 4297 int PopCount = 0; 4298 for (int i = 0; i < PartCount; ++i) { 4299 PopCount += llvm::popcount(Parts[i]); 4300 if (PopCount > 1) 4301 return INT_MIN; 4302 } 4303 4304 if (exponent != semantics->minExponent) 4305 return exponent; 4306 4307 int CountrParts = 0; 4308 for (int i = 0; i < PartCount; 4309 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4310 if (Parts[i] != 0) { 4311 return exponent - semantics->precision + CountrParts + 4312 llvm::countr_zero(Parts[i]) + 1; 4313 } 4314 } 4315 4316 llvm_unreachable("didn't find the set bit"); 4317 } 4318 4319 bool IEEEFloat::isSignaling() const { 4320 if (!isNaN()) 4321 return false; 4322 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 4323 return false; 4324 4325 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4326 // first bit of the trailing significand being 0. 4327 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4328 } 4329 4330 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4331 /// 4332 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4333 /// appropriate sign switching before/after the computation. 4334 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4335 // If we are performing nextDown, swap sign so we have -x. 4336 if (nextDown) 4337 changeSign(); 4338 4339 // Compute nextUp(x) 4340 opStatus result = opOK; 4341 4342 // Handle each float category separately. 4343 switch (category) { 4344 case fcInfinity: 4345 // nextUp(+inf) = +inf 4346 if (!isNegative()) 4347 break; 4348 // nextUp(-inf) = -getLargest() 4349 makeLargest(true); 4350 break; 4351 case fcNaN: 4352 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4353 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4354 // change the payload. 4355 if (isSignaling()) { 4356 result = opInvalidOp; 4357 // For consistency, propagate the sign of the sNaN to the qNaN. 4358 makeNaN(false, isNegative(), nullptr); 4359 } 4360 break; 4361 case fcZero: 4362 // nextUp(pm 0) = +getSmallest() 4363 makeSmallest(false); 4364 break; 4365 case fcNormal: 4366 // nextUp(-getSmallest()) = -0 4367 if (isSmallest() && isNegative()) { 4368 APInt::tcSet(significandParts(), 0, partCount()); 4369 category = fcZero; 4370 exponent = 0; 4371 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4372 sign = false; 4373 break; 4374 } 4375 4376 if (isLargest() && !isNegative()) { 4377 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4378 // nextUp(getLargest()) == NAN 4379 makeNaN(); 4380 break; 4381 } else { 4382 // nextUp(getLargest()) == INFINITY 4383 APInt::tcSet(significandParts(), 0, partCount()); 4384 category = fcInfinity; 4385 exponent = semantics->maxExponent + 1; 4386 break; 4387 } 4388 } 4389 4390 // nextUp(normal) == normal + inc. 4391 if (isNegative()) { 4392 // If we are negative, we need to decrement the significand. 4393 4394 // We only cross a binade boundary that requires adjusting the exponent 4395 // if: 4396 // 1. exponent != semantics->minExponent. This implies we are not in the 4397 // smallest binade or are dealing with denormals. 4398 // 2. Our significand excluding the integral bit is all zeros. 4399 bool WillCrossBinadeBoundary = 4400 exponent != semantics->minExponent && isSignificandAllZeros(); 4401 4402 // Decrement the significand. 4403 // 4404 // We always do this since: 4405 // 1. If we are dealing with a non-binade decrement, by definition we 4406 // just decrement the significand. 4407 // 2. If we are dealing with a normal -> normal binade decrement, since 4408 // we have an explicit integral bit the fact that all bits but the 4409 // integral bit are zero implies that subtracting one will yield a 4410 // significand with 0 integral bit and 1 in all other spots. Thus we 4411 // must just adjust the exponent and set the integral bit to 1. 4412 // 3. If we are dealing with a normal -> denormal binade decrement, 4413 // since we set the integral bit to 0 when we represent denormals, we 4414 // just decrement the significand. 4415 integerPart *Parts = significandParts(); 4416 APInt::tcDecrement(Parts, partCount()); 4417 4418 if (WillCrossBinadeBoundary) { 4419 // Our result is a normal number. Do the following: 4420 // 1. Set the integral bit to 1. 4421 // 2. Decrement the exponent. 4422 APInt::tcSetBit(Parts, semantics->precision - 1); 4423 exponent--; 4424 } 4425 } else { 4426 // If we are positive, we need to increment the significand. 4427 4428 // We only cross a binade boundary that requires adjusting the exponent if 4429 // the input is not a denormal and all of said input's significand bits 4430 // are set. If all of said conditions are true: clear the significand, set 4431 // the integral bit to 1, and increment the exponent. If we have a 4432 // denormal always increment since moving denormals and the numbers in the 4433 // smallest normal binade have the same exponent in our representation. 4434 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4435 4436 if (WillCrossBinadeBoundary) { 4437 integerPart *Parts = significandParts(); 4438 APInt::tcSet(Parts, 0, partCount()); 4439 APInt::tcSetBit(Parts, semantics->precision - 1); 4440 assert(exponent != semantics->maxExponent && 4441 "We can not increment an exponent beyond the maxExponent allowed" 4442 " by the given floating point semantics."); 4443 exponent++; 4444 } else { 4445 incrementSignificand(); 4446 } 4447 } 4448 break; 4449 } 4450 4451 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4452 if (nextDown) 4453 changeSign(); 4454 4455 return result; 4456 } 4457 4458 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4459 return ::exponentNaN(*semantics); 4460 } 4461 4462 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4463 return ::exponentInf(*semantics); 4464 } 4465 4466 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4467 return ::exponentZero(*semantics); 4468 } 4469 4470 void IEEEFloat::makeInf(bool Negative) { 4471 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4472 // There is no Inf, so make NaN instead. 4473 makeNaN(false, Negative); 4474 return; 4475 } 4476 category = fcInfinity; 4477 sign = Negative; 4478 exponent = exponentInf(); 4479 APInt::tcSet(significandParts(), 0, partCount()); 4480 } 4481 4482 void IEEEFloat::makeZero(bool Negative) { 4483 category = fcZero; 4484 sign = Negative; 4485 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4486 // Merge negative zero to positive because 0b10000...000 is used for NaN 4487 sign = false; 4488 } 4489 exponent = exponentZero(); 4490 APInt::tcSet(significandParts(), 0, partCount()); 4491 } 4492 4493 void IEEEFloat::makeQuiet() { 4494 assert(isNaN()); 4495 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4496 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4497 } 4498 4499 int ilogb(const IEEEFloat &Arg) { 4500 if (Arg.isNaN()) 4501 return IEEEFloat::IEK_NaN; 4502 if (Arg.isZero()) 4503 return IEEEFloat::IEK_Zero; 4504 if (Arg.isInfinity()) 4505 return IEEEFloat::IEK_Inf; 4506 if (!Arg.isDenormal()) 4507 return Arg.exponent; 4508 4509 IEEEFloat Normalized(Arg); 4510 int SignificandBits = Arg.getSemantics().precision - 1; 4511 4512 Normalized.exponent += SignificandBits; 4513 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4514 return Normalized.exponent - SignificandBits; 4515 } 4516 4517 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4518 auto MaxExp = X.getSemantics().maxExponent; 4519 auto MinExp = X.getSemantics().minExponent; 4520 4521 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4522 // overflow; clamp it to a safe range before adding, but ensure that the range 4523 // is large enough that the clamp does not change the result. The range we 4524 // need to support is the difference between the largest possible exponent and 4525 // the normalized exponent of half the smallest denormal. 4526 4527 int SignificandBits = X.getSemantics().precision - 1; 4528 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4529 4530 // Clamp to one past the range ends to let normalize handle overlflow. 4531 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4532 X.normalize(RoundingMode, lfExactlyZero); 4533 if (X.isNaN()) 4534 X.makeQuiet(); 4535 return X; 4536 } 4537 4538 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4539 Exp = ilogb(Val); 4540 4541 // Quiet signalling nans. 4542 if (Exp == IEEEFloat::IEK_NaN) { 4543 IEEEFloat Quiet(Val); 4544 Quiet.makeQuiet(); 4545 return Quiet; 4546 } 4547 4548 if (Exp == IEEEFloat::IEK_Inf) 4549 return Val; 4550 4551 // 1 is added because frexp is defined to return a normalized fraction in 4552 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4553 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4554 return scalbn(Val, -Exp, RM); 4555 } 4556 4557 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4558 : Semantics(&S), 4559 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4560 assert(Semantics == &semPPCDoubleDouble); 4561 } 4562 4563 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4564 : Semantics(&S), 4565 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4566 APFloat(semIEEEdouble, uninitialized)}) { 4567 assert(Semantics == &semPPCDoubleDouble); 4568 } 4569 4570 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4571 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4572 APFloat(semIEEEdouble)}) { 4573 assert(Semantics == &semPPCDoubleDouble); 4574 } 4575 4576 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4577 : Semantics(&S), 4578 Floats(new APFloat[2]{ 4579 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4580 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4581 assert(Semantics == &semPPCDoubleDouble); 4582 } 4583 4584 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4585 APFloat &&Second) 4586 : Semantics(&S), 4587 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4588 assert(Semantics == &semPPCDoubleDouble); 4589 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4590 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4591 } 4592 4593 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4594 : Semantics(RHS.Semantics), 4595 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4596 APFloat(RHS.Floats[1])} 4597 : nullptr) { 4598 assert(Semantics == &semPPCDoubleDouble); 4599 } 4600 4601 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4602 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4603 RHS.Semantics = &semBogus; 4604 assert(Semantics == &semPPCDoubleDouble); 4605 } 4606 4607 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4608 if (Semantics == RHS.Semantics && RHS.Floats) { 4609 Floats[0] = RHS.Floats[0]; 4610 Floats[1] = RHS.Floats[1]; 4611 } else if (this != &RHS) { 4612 this->~DoubleAPFloat(); 4613 new (this) DoubleAPFloat(RHS); 4614 } 4615 return *this; 4616 } 4617 4618 // Implement addition, subtraction, multiplication and division based on: 4619 // "Software for Doubled-Precision Floating-Point Computations", 4620 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4621 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4622 const APFloat &c, const APFloat &cc, 4623 roundingMode RM) { 4624 int Status = opOK; 4625 APFloat z = a; 4626 Status |= z.add(c, RM); 4627 if (!z.isFinite()) { 4628 if (!z.isInfinity()) { 4629 Floats[0] = std::move(z); 4630 Floats[1].makeZero(/* Neg = */ false); 4631 return (opStatus)Status; 4632 } 4633 Status = opOK; 4634 auto AComparedToC = a.compareAbsoluteValue(c); 4635 z = cc; 4636 Status |= z.add(aa, RM); 4637 if (AComparedToC == APFloat::cmpGreaterThan) { 4638 // z = cc + aa + c + a; 4639 Status |= z.add(c, RM); 4640 Status |= z.add(a, RM); 4641 } else { 4642 // z = cc + aa + a + c; 4643 Status |= z.add(a, RM); 4644 Status |= z.add(c, RM); 4645 } 4646 if (!z.isFinite()) { 4647 Floats[0] = std::move(z); 4648 Floats[1].makeZero(/* Neg = */ false); 4649 return (opStatus)Status; 4650 } 4651 Floats[0] = z; 4652 APFloat zz = aa; 4653 Status |= zz.add(cc, RM); 4654 if (AComparedToC == APFloat::cmpGreaterThan) { 4655 // Floats[1] = a - z + c + zz; 4656 Floats[1] = a; 4657 Status |= Floats[1].subtract(z, RM); 4658 Status |= Floats[1].add(c, RM); 4659 Status |= Floats[1].add(zz, RM); 4660 } else { 4661 // Floats[1] = c - z + a + zz; 4662 Floats[1] = c; 4663 Status |= Floats[1].subtract(z, RM); 4664 Status |= Floats[1].add(a, RM); 4665 Status |= Floats[1].add(zz, RM); 4666 } 4667 } else { 4668 // q = a - z; 4669 APFloat q = a; 4670 Status |= q.subtract(z, RM); 4671 4672 // zz = q + c + (a - (q + z)) + aa + cc; 4673 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4674 auto zz = q; 4675 Status |= zz.add(c, RM); 4676 Status |= q.add(z, RM); 4677 Status |= q.subtract(a, RM); 4678 q.changeSign(); 4679 Status |= zz.add(q, RM); 4680 Status |= zz.add(aa, RM); 4681 Status |= zz.add(cc, RM); 4682 if (zz.isZero() && !zz.isNegative()) { 4683 Floats[0] = std::move(z); 4684 Floats[1].makeZero(/* Neg = */ false); 4685 return opOK; 4686 } 4687 Floats[0] = z; 4688 Status |= Floats[0].add(zz, RM); 4689 if (!Floats[0].isFinite()) { 4690 Floats[1].makeZero(/* Neg = */ false); 4691 return (opStatus)Status; 4692 } 4693 Floats[1] = std::move(z); 4694 Status |= Floats[1].subtract(Floats[0], RM); 4695 Status |= Floats[1].add(zz, RM); 4696 } 4697 return (opStatus)Status; 4698 } 4699 4700 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4701 const DoubleAPFloat &RHS, 4702 DoubleAPFloat &Out, 4703 roundingMode RM) { 4704 if (LHS.getCategory() == fcNaN) { 4705 Out = LHS; 4706 return opOK; 4707 } 4708 if (RHS.getCategory() == fcNaN) { 4709 Out = RHS; 4710 return opOK; 4711 } 4712 if (LHS.getCategory() == fcZero) { 4713 Out = RHS; 4714 return opOK; 4715 } 4716 if (RHS.getCategory() == fcZero) { 4717 Out = LHS; 4718 return opOK; 4719 } 4720 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4721 LHS.isNegative() != RHS.isNegative()) { 4722 Out.makeNaN(false, Out.isNegative(), nullptr); 4723 return opInvalidOp; 4724 } 4725 if (LHS.getCategory() == fcInfinity) { 4726 Out = LHS; 4727 return opOK; 4728 } 4729 if (RHS.getCategory() == fcInfinity) { 4730 Out = RHS; 4731 return opOK; 4732 } 4733 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4734 4735 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4736 CC(RHS.Floats[1]); 4737 assert(&A.getSemantics() == &semIEEEdouble); 4738 assert(&AA.getSemantics() == &semIEEEdouble); 4739 assert(&C.getSemantics() == &semIEEEdouble); 4740 assert(&CC.getSemantics() == &semIEEEdouble); 4741 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4742 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4743 return Out.addImpl(A, AA, C, CC, RM); 4744 } 4745 4746 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4747 roundingMode RM) { 4748 return addWithSpecial(*this, RHS, *this, RM); 4749 } 4750 4751 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4752 roundingMode RM) { 4753 changeSign(); 4754 auto Ret = add(RHS, RM); 4755 changeSign(); 4756 return Ret; 4757 } 4758 4759 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4760 APFloat::roundingMode RM) { 4761 const auto &LHS = *this; 4762 auto &Out = *this; 4763 /* Interesting observation: For special categories, finding the lowest 4764 common ancestor of the following layered graph gives the correct 4765 return category: 4766 4767 NaN 4768 / \ 4769 Zero Inf 4770 \ / 4771 Normal 4772 4773 e.g. NaN * NaN = NaN 4774 Zero * Inf = NaN 4775 Normal * Zero = Zero 4776 Normal * Inf = Inf 4777 */ 4778 if (LHS.getCategory() == fcNaN) { 4779 Out = LHS; 4780 return opOK; 4781 } 4782 if (RHS.getCategory() == fcNaN) { 4783 Out = RHS; 4784 return opOK; 4785 } 4786 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4787 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4788 Out.makeNaN(false, false, nullptr); 4789 return opOK; 4790 } 4791 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4792 Out = LHS; 4793 return opOK; 4794 } 4795 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4796 Out = RHS; 4797 return opOK; 4798 } 4799 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4800 "Special cases not handled exhaustively"); 4801 4802 int Status = opOK; 4803 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4804 // t = a * c 4805 APFloat T = A; 4806 Status |= T.multiply(C, RM); 4807 if (!T.isFiniteNonZero()) { 4808 Floats[0] = T; 4809 Floats[1].makeZero(/* Neg = */ false); 4810 return (opStatus)Status; 4811 } 4812 4813 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4814 APFloat Tau = A; 4815 T.changeSign(); 4816 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4817 T.changeSign(); 4818 { 4819 // v = a * d 4820 APFloat V = A; 4821 Status |= V.multiply(D, RM); 4822 // w = b * c 4823 APFloat W = B; 4824 Status |= W.multiply(C, RM); 4825 Status |= V.add(W, RM); 4826 // tau += v + w 4827 Status |= Tau.add(V, RM); 4828 } 4829 // u = t + tau 4830 APFloat U = T; 4831 Status |= U.add(Tau, RM); 4832 4833 Floats[0] = U; 4834 if (!U.isFinite()) { 4835 Floats[1].makeZero(/* Neg = */ false); 4836 } else { 4837 // Floats[1] = (t - u) + tau 4838 Status |= T.subtract(U, RM); 4839 Status |= T.add(Tau, RM); 4840 Floats[1] = T; 4841 } 4842 return (opStatus)Status; 4843 } 4844 4845 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4846 APFloat::roundingMode RM) { 4847 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4848 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4849 auto Ret = 4850 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4851 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4852 return Ret; 4853 } 4854 4855 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 4856 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4857 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4858 auto Ret = 4859 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4860 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4861 return Ret; 4862 } 4863 4864 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 4865 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4866 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4867 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 4868 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4869 return Ret; 4870 } 4871 4872 APFloat::opStatus 4873 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 4874 const DoubleAPFloat &Addend, 4875 APFloat::roundingMode RM) { 4876 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4877 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4878 auto Ret = Tmp.fusedMultiplyAdd( 4879 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 4880 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 4881 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4882 return Ret; 4883 } 4884 4885 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 4886 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4887 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4888 auto Ret = Tmp.roundToIntegral(RM); 4889 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4890 return Ret; 4891 } 4892 4893 void DoubleAPFloat::changeSign() { 4894 Floats[0].changeSign(); 4895 Floats[1].changeSign(); 4896 } 4897 4898 APFloat::cmpResult 4899 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 4900 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 4901 if (Result != cmpEqual) 4902 return Result; 4903 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 4904 if (Result == cmpLessThan || Result == cmpGreaterThan) { 4905 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 4906 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 4907 if (Against && !RHSAgainst) 4908 return cmpLessThan; 4909 if (!Against && RHSAgainst) 4910 return cmpGreaterThan; 4911 if (!Against && !RHSAgainst) 4912 return Result; 4913 if (Against && RHSAgainst) 4914 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 4915 } 4916 return Result; 4917 } 4918 4919 APFloat::fltCategory DoubleAPFloat::getCategory() const { 4920 return Floats[0].getCategory(); 4921 } 4922 4923 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 4924 4925 void DoubleAPFloat::makeInf(bool Neg) { 4926 Floats[0].makeInf(Neg); 4927 Floats[1].makeZero(/* Neg = */ false); 4928 } 4929 4930 void DoubleAPFloat::makeZero(bool Neg) { 4931 Floats[0].makeZero(Neg); 4932 Floats[1].makeZero(/* Neg = */ false); 4933 } 4934 4935 void DoubleAPFloat::makeLargest(bool Neg) { 4936 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4937 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 4938 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 4939 if (Neg) 4940 changeSign(); 4941 } 4942 4943 void DoubleAPFloat::makeSmallest(bool Neg) { 4944 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4945 Floats[0].makeSmallest(Neg); 4946 Floats[1].makeZero(/* Neg = */ false); 4947 } 4948 4949 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 4950 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4951 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 4952 if (Neg) 4953 Floats[0].changeSign(); 4954 Floats[1].makeZero(/* Neg = */ false); 4955 } 4956 4957 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 4958 Floats[0].makeNaN(SNaN, Neg, fill); 4959 Floats[1].makeZero(/* Neg = */ false); 4960 } 4961 4962 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 4963 auto Result = Floats[0].compare(RHS.Floats[0]); 4964 // |Float[0]| > |Float[1]| 4965 if (Result == APFloat::cmpEqual) 4966 return Floats[1].compare(RHS.Floats[1]); 4967 return Result; 4968 } 4969 4970 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 4971 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 4972 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 4973 } 4974 4975 hash_code hash_value(const DoubleAPFloat &Arg) { 4976 if (Arg.Floats) 4977 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 4978 return hash_combine(Arg.Semantics); 4979 } 4980 4981 APInt DoubleAPFloat::bitcastToAPInt() const { 4982 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4983 uint64_t Data[] = { 4984 Floats[0].bitcastToAPInt().getRawData()[0], 4985 Floats[1].bitcastToAPInt().getRawData()[0], 4986 }; 4987 return APInt(128, 2, Data); 4988 } 4989 4990 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 4991 roundingMode RM) { 4992 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4993 APFloat Tmp(semPPCDoubleDoubleLegacy); 4994 auto Ret = Tmp.convertFromString(S, RM); 4995 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4996 return Ret; 4997 } 4998 4999 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5000 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5001 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5002 auto Ret = Tmp.next(nextDown); 5003 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5004 return Ret; 5005 } 5006 5007 APFloat::opStatus 5008 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5009 unsigned int Width, bool IsSigned, 5010 roundingMode RM, bool *IsExact) const { 5011 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5012 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5013 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5014 } 5015 5016 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5017 bool IsSigned, 5018 roundingMode RM) { 5019 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5020 APFloat Tmp(semPPCDoubleDoubleLegacy); 5021 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5022 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5023 return Ret; 5024 } 5025 5026 APFloat::opStatus 5027 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5028 unsigned int InputSize, 5029 bool IsSigned, roundingMode RM) { 5030 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5031 APFloat Tmp(semPPCDoubleDoubleLegacy); 5032 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5033 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5034 return Ret; 5035 } 5036 5037 APFloat::opStatus 5038 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5039 unsigned int InputSize, 5040 bool IsSigned, roundingMode RM) { 5041 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5042 APFloat Tmp(semPPCDoubleDoubleLegacy); 5043 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5044 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5045 return Ret; 5046 } 5047 5048 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5049 unsigned int HexDigits, 5050 bool UpperCase, 5051 roundingMode RM) const { 5052 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5053 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5054 .convertToHexString(DST, HexDigits, UpperCase, RM); 5055 } 5056 5057 bool DoubleAPFloat::isDenormal() const { 5058 return getCategory() == fcNormal && 5059 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5060 // (double)(Hi + Lo) == Hi defines a normal number. 5061 Floats[0] != Floats[0] + Floats[1]); 5062 } 5063 5064 bool DoubleAPFloat::isSmallest() const { 5065 if (getCategory() != fcNormal) 5066 return false; 5067 DoubleAPFloat Tmp(*this); 5068 Tmp.makeSmallest(this->isNegative()); 5069 return Tmp.compare(*this) == cmpEqual; 5070 } 5071 5072 bool DoubleAPFloat::isSmallestNormalized() const { 5073 if (getCategory() != fcNormal) 5074 return false; 5075 5076 DoubleAPFloat Tmp(*this); 5077 Tmp.makeSmallestNormalized(this->isNegative()); 5078 return Tmp.compare(*this) == cmpEqual; 5079 } 5080 5081 bool DoubleAPFloat::isLargest() const { 5082 if (getCategory() != fcNormal) 5083 return false; 5084 DoubleAPFloat Tmp(*this); 5085 Tmp.makeLargest(this->isNegative()); 5086 return Tmp.compare(*this) == cmpEqual; 5087 } 5088 5089 bool DoubleAPFloat::isInteger() const { 5090 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5091 return Floats[0].isInteger() && Floats[1].isInteger(); 5092 } 5093 5094 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5095 unsigned FormatPrecision, 5096 unsigned FormatMaxPadding, 5097 bool TruncateZero) const { 5098 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5099 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5100 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5101 } 5102 5103 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5104 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5105 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5106 if (!inv) 5107 return Tmp.getExactInverse(nullptr); 5108 APFloat Inv(semPPCDoubleDoubleLegacy); 5109 auto Ret = Tmp.getExactInverse(&Inv); 5110 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5111 return Ret; 5112 } 5113 5114 int DoubleAPFloat::getExactLog2() const { 5115 // TODO: Implement me 5116 return INT_MIN; 5117 } 5118 5119 int DoubleAPFloat::getExactLog2Abs() const { 5120 // TODO: Implement me 5121 return INT_MIN; 5122 } 5123 5124 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5125 APFloat::roundingMode RM) { 5126 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5127 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5128 scalbn(Arg.Floats[1], Exp, RM)); 5129 } 5130 5131 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5132 APFloat::roundingMode RM) { 5133 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5134 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5135 APFloat Second = Arg.Floats[1]; 5136 if (Arg.getCategory() == APFloat::fcNormal) 5137 Second = scalbn(Second, -Exp, RM); 5138 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5139 } 5140 5141 } // namespace detail 5142 5143 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5144 if (usesLayout<IEEEFloat>(Semantics)) { 5145 new (&IEEE) IEEEFloat(std::move(F)); 5146 return; 5147 } 5148 if (usesLayout<DoubleAPFloat>(Semantics)) { 5149 const fltSemantics& S = F.getSemantics(); 5150 new (&Double) 5151 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5152 APFloat(semIEEEdouble)); 5153 return; 5154 } 5155 llvm_unreachable("Unexpected semantics"); 5156 } 5157 5158 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5159 roundingMode RM) { 5160 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5161 } 5162 5163 hash_code hash_value(const APFloat &Arg) { 5164 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5165 return hash_value(Arg.U.IEEE); 5166 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5167 return hash_value(Arg.U.Double); 5168 llvm_unreachable("Unexpected semantics"); 5169 } 5170 5171 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5172 : APFloat(Semantics) { 5173 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5174 assert(StatusOrErr && "Invalid floating point representation"); 5175 consumeError(StatusOrErr.takeError()); 5176 } 5177 5178 FPClassTest APFloat::classify() const { 5179 if (isZero()) 5180 return isNegative() ? fcNegZero : fcPosZero; 5181 if (isNormal()) 5182 return isNegative() ? fcNegNormal : fcPosNormal; 5183 if (isDenormal()) 5184 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5185 if (isInfinity()) 5186 return isNegative() ? fcNegInf : fcPosInf; 5187 assert(isNaN() && "Other class of FP constant"); 5188 return isSignaling() ? fcSNan : fcQNan; 5189 } 5190 5191 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5192 roundingMode RM, bool *losesInfo) { 5193 if (&getSemantics() == &ToSemantics) { 5194 *losesInfo = false; 5195 return opOK; 5196 } 5197 if (usesLayout<IEEEFloat>(getSemantics()) && 5198 usesLayout<IEEEFloat>(ToSemantics)) 5199 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5200 if (usesLayout<IEEEFloat>(getSemantics()) && 5201 usesLayout<DoubleAPFloat>(ToSemantics)) { 5202 assert(&ToSemantics == &semPPCDoubleDouble); 5203 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5204 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5205 return Ret; 5206 } 5207 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5208 usesLayout<IEEEFloat>(ToSemantics)) { 5209 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5210 *this = APFloat(std::move(getIEEE()), ToSemantics); 5211 return Ret; 5212 } 5213 llvm_unreachable("Unexpected semantics"); 5214 } 5215 5216 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5217 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5218 } 5219 5220 void APFloat::print(raw_ostream &OS) const { 5221 SmallVector<char, 16> Buffer; 5222 toString(Buffer); 5223 OS << Buffer << "\n"; 5224 } 5225 5226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5227 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5228 #endif 5229 5230 void APFloat::Profile(FoldingSetNodeID &NID) const { 5231 NID.Add(bitcastToAPInt()); 5232 } 5233 5234 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5235 an APSInt, whose initial bit-width and signed-ness are used to determine the 5236 precision of the conversion. 5237 */ 5238 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5239 roundingMode rounding_mode, 5240 bool *isExact) const { 5241 unsigned bitWidth = result.getBitWidth(); 5242 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5243 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5244 rounding_mode, isExact); 5245 // Keeps the original signed-ness. 5246 result = APInt(bitWidth, parts); 5247 return status; 5248 } 5249 5250 double APFloat::convertToDouble() const { 5251 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5252 return getIEEE().convertToDouble(); 5253 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5254 "Float semantics is not representable by IEEEdouble"); 5255 APFloat Temp = *this; 5256 bool LosesInfo; 5257 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5258 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5259 (void)St; 5260 return Temp.getIEEE().convertToDouble(); 5261 } 5262 5263 float APFloat::convertToFloat() const { 5264 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5265 return getIEEE().convertToFloat(); 5266 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5267 "Float semantics is not representable by IEEEsingle"); 5268 APFloat Temp = *this; 5269 bool LosesInfo; 5270 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5271 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5272 (void)St; 5273 return Temp.getIEEE().convertToFloat(); 5274 } 5275 5276 } // namespace llvm 5277 5278 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5279