1 //===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file declares a class to represent arbitrary precision floating point 11 /// values and provide a variety of arithmetic operations on them. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ADT_APFLOAT_H 16 #define LLVM_ADT_APFLOAT_H 17 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/FloatingPointMode.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/float128.h" 23 #include <memory> 24 25 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 26 do { \ 27 if (usesLayout<IEEEFloat>(getSemantics())) \ 28 return U.IEEE.METHOD_CALL; \ 29 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 30 return U.Double.METHOD_CALL; \ 31 llvm_unreachable("Unexpected semantics"); \ 32 } while (false) 33 34 namespace llvm { 35 36 struct fltSemantics; 37 class APSInt; 38 class StringRef; 39 class APFloat; 40 class raw_ostream; 41 42 template <typename T> class Expected; 43 template <typename T> class SmallVectorImpl; 44 45 /// Enum that represents what fraction of the LSB truncated bits of an fp number 46 /// represent. 47 /// 48 /// This essentially combines the roles of guard and sticky bits. 49 enum lostFraction { // Example of truncated bits: 50 lfExactlyZero, // 000000 51 lfLessThanHalf, // 0xxxxx x's not all zero 52 lfExactlyHalf, // 100000 53 lfMoreThanHalf // 1xxxxx x's not all zero 54 }; 55 56 /// A self-contained host- and target-independent arbitrary-precision 57 /// floating-point software implementation. 58 /// 59 /// APFloat uses bignum integer arithmetic as provided by static functions in 60 /// the APInt class. The library will work with bignum integers whose parts are 61 /// any unsigned type at least 16 bits wide, but 64 bits is recommended. 62 /// 63 /// Written for clarity rather than speed, in particular with a view to use in 64 /// the front-end of a cross compiler so that target arithmetic can be correctly 65 /// performed on the host. Performance should nonetheless be reasonable, 66 /// particularly for its intended use. It may be useful as a base 67 /// implementation for a run-time library during development of a faster 68 /// target-specific one. 69 /// 70 /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all 71 /// implemented operations. Currently implemented operations are add, subtract, 72 /// multiply, divide, fused-multiply-add, conversion-to-float, 73 /// conversion-to-integer and conversion-from-integer. New rounding modes 74 /// (e.g. away from zero) can be added with three or four lines of code. 75 /// 76 /// Four formats are built-in: IEEE single precision, double precision, 77 /// quadruple precision, and x87 80-bit extended double (when operating with 78 /// full extended precision). Adding a new format that obeys IEEE semantics 79 /// only requires adding two lines of code: a declaration and definition of the 80 /// format. 81 /// 82 /// All operations return the status of that operation as an exception bit-mask, 83 /// so multiple operations can be done consecutively with their results or-ed 84 /// together. The returned status can be useful for compiler diagnostics; e.g., 85 /// inexact, underflow and overflow can be easily diagnosed on constant folding, 86 /// and compiler optimizers can determine what exceptions would be raised by 87 /// folding operations and optimize, or perhaps not optimize, accordingly. 88 /// 89 /// At present, underflow tininess is detected after rounding; it should be 90 /// straight forward to add support for the before-rounding case too. 91 /// 92 /// The library reads hexadecimal floating point numbers as per C99, and 93 /// correctly rounds if necessary according to the specified rounding mode. 94 /// Syntax is required to have been validated by the caller. It also converts 95 /// floating point numbers to hexadecimal text as per the C99 %a and %A 96 /// conversions. The output precision (or alternatively the natural minimal 97 /// precision) can be specified; if the requested precision is less than the 98 /// natural precision the output is correctly rounded for the specified rounding 99 /// mode. 100 /// 101 /// It also reads decimal floating point numbers and correctly rounds according 102 /// to the specified rounding mode. 103 /// 104 /// Conversion to decimal text is not currently implemented. 105 /// 106 /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit 107 /// signed exponent, and the significand as an array of integer parts. After 108 /// normalization of a number of precision P the exponent is within the range of 109 /// the format, and if the number is not denormal the P-th bit of the 110 /// significand is set as an explicit integer bit. For denormals the most 111 /// significant bit is shifted right so that the exponent is maintained at the 112 /// format's minimum, so that the smallest denormal has just the least 113 /// significant bit of the significand set. The sign of zeroes and infinities 114 /// is significant; the exponent and significand of such numbers is not stored, 115 /// but has a known implicit (deterministic) value: 0 for the significands, 0 116 /// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and 117 /// significand are deterministic, although not really meaningful, and preserved 118 /// in non-conversion operations. The exponent is implicitly all 1 bits. 119 /// 120 /// APFloat does not provide any exception handling beyond default exception 121 /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause 122 /// by encoding Signaling NaNs with the first bit of its trailing significand as 123 /// 0. 124 /// 125 /// TODO 126 /// ==== 127 /// 128 /// Some features that may or may not be worth adding: 129 /// 130 /// Binary to decimal conversion (hard). 131 /// 132 /// Optional ability to detect underflow tininess before rounding. 133 /// 134 /// New formats: x87 in single and double precision mode (IEEE apart from 135 /// extended exponent range) (hard). 136 /// 137 /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward. 138 /// 139 140 // This is the common type definitions shared by APFloat and its internal 141 // implementation classes. This struct should not define any non-static data 142 // members. 143 struct APFloatBase { 144 typedef APInt::WordType integerPart; 145 static constexpr unsigned integerPartWidth = APInt::APINT_BITS_PER_WORD; 146 147 /// A signed type to represent a floating point numbers unbiased exponent. 148 typedef int32_t ExponentType; 149 150 /// \name Floating Point Semantics. 151 /// @{ 152 enum Semantics { 153 S_IEEEhalf, 154 S_BFloat, 155 S_IEEEsingle, 156 S_IEEEdouble, 157 S_IEEEquad, 158 S_PPCDoubleDouble, 159 // 8-bit floating point number following IEEE-754 conventions with bit 160 // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433. 161 S_Float8E5M2, 162 // 8-bit floating point number mostly following IEEE-754 conventions 163 // and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915, 164 // with expanded range and with no infinity or signed zero. 165 // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero). 166 // This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1) 167 // that IEEE precedent would imply. 168 S_Float8E5M2FNUZ, 169 // 8-bit floating point number following IEEE-754 conventions with bit 170 // layout S1E4M3. 171 S_Float8E4M3, 172 // 8-bit floating point number mostly following IEEE-754 conventions with 173 // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433. 174 // Unlike IEEE-754 types, there are no infinity values, and NaN is 175 // represented with the exponent and mantissa bits set to all 1s. 176 S_Float8E4M3FN, 177 // 8-bit floating point number mostly following IEEE-754 conventions 178 // and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915, 179 // with expanded range and with no infinity or signed zero. 180 // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero). 181 // This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1) 182 // that IEEE precedent would imply. 183 S_Float8E4M3FNUZ, 184 // 8-bit floating point number mostly following IEEE-754 conventions 185 // and bit layout S1E4M3 with expanded range and with no infinity or signed 186 // zero. 187 // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero). 188 // This format's exponent bias is 11, instead of the 7 (2 ** (4 - 1) - 1) 189 // that IEEE precedent would imply. 190 S_Float8E4M3B11FNUZ, 191 // Floating point number that occupies 32 bits or less of storage, providing 192 // improved range compared to half (16-bit) formats, at (potentially) 193 // greater throughput than single precision (32-bit) formats. 194 S_FloatTF32, 195 // 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754 196 // types, there are no infinity or NaN values. The format is detailed in 197 // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf 198 S_Float6E3M2FN, 199 // 6-bit floating point number with bit layout S1E2M3. Unlike IEEE-754 200 // types, there are no infinity or NaN values. The format is detailed in 201 // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf 202 S_Float6E2M3FN, 203 // 4-bit floating point number with bit layout S1E2M1. Unlike IEEE-754 204 // types, there are no infinity or NaN values. The format is detailed in 205 // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf 206 S_Float4E2M1FN, 207 208 S_x87DoubleExtended, 209 S_MaxSemantics = S_x87DoubleExtended, 210 }; 211 212 static const llvm::fltSemantics &EnumToSemantics(Semantics S); 213 static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem); 214 215 static const fltSemantics &IEEEhalf() LLVM_READNONE; 216 static const fltSemantics &BFloat() LLVM_READNONE; 217 static const fltSemantics &IEEEsingle() LLVM_READNONE; 218 static const fltSemantics &IEEEdouble() LLVM_READNONE; 219 static const fltSemantics &IEEEquad() LLVM_READNONE; 220 static const fltSemantics &PPCDoubleDouble() LLVM_READNONE; 221 static const fltSemantics &Float8E5M2() LLVM_READNONE; 222 static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE; 223 static const fltSemantics &Float8E4M3() LLVM_READNONE; 224 static const fltSemantics &Float8E4M3FN() LLVM_READNONE; 225 static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE; 226 static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE; 227 static const fltSemantics &FloatTF32() LLVM_READNONE; 228 static const fltSemantics &Float6E3M2FN() LLVM_READNONE; 229 static const fltSemantics &Float6E2M3FN() LLVM_READNONE; 230 static const fltSemantics &Float4E2M1FN() LLVM_READNONE; 231 static const fltSemantics &x87DoubleExtended() LLVM_READNONE; 232 233 /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with 234 /// anything real. 235 static const fltSemantics &Bogus() LLVM_READNONE; 236 237 /// @} 238 239 /// IEEE-754R 5.11: Floating Point Comparison Relations. 240 enum cmpResult { 241 cmpLessThan, 242 cmpEqual, 243 cmpGreaterThan, 244 cmpUnordered 245 }; 246 247 /// IEEE-754R 4.3: Rounding-direction attributes. 248 using roundingMode = llvm::RoundingMode; 249 250 static constexpr roundingMode rmNearestTiesToEven = 251 RoundingMode::NearestTiesToEven; 252 static constexpr roundingMode rmTowardPositive = RoundingMode::TowardPositive; 253 static constexpr roundingMode rmTowardNegative = RoundingMode::TowardNegative; 254 static constexpr roundingMode rmTowardZero = RoundingMode::TowardZero; 255 static constexpr roundingMode rmNearestTiesToAway = 256 RoundingMode::NearestTiesToAway; 257 258 /// IEEE-754R 7: Default exception handling. 259 /// 260 /// opUnderflow or opOverflow are always returned or-ed with opInexact. 261 /// 262 /// APFloat models this behavior specified by IEEE-754: 263 /// "For operations producing results in floating-point format, the default 264 /// result of an operation that signals the invalid operation exception 265 /// shall be a quiet NaN." 266 enum opStatus { 267 opOK = 0x00, 268 opInvalidOp = 0x01, 269 opDivByZero = 0x02, 270 opOverflow = 0x04, 271 opUnderflow = 0x08, 272 opInexact = 0x10 273 }; 274 275 /// Category of internally-represented number. 276 enum fltCategory { 277 fcInfinity, 278 fcNaN, 279 fcNormal, 280 fcZero 281 }; 282 283 /// Convenience enum used to construct an uninitialized APFloat. 284 enum uninitializedTag { 285 uninitialized 286 }; 287 288 /// Enumeration of \c ilogb error results. 289 enum IlogbErrorKinds { 290 IEK_Zero = INT_MIN + 1, 291 IEK_NaN = INT_MIN, 292 IEK_Inf = INT_MAX 293 }; 294 295 static unsigned int semanticsPrecision(const fltSemantics &); 296 static ExponentType semanticsMinExponent(const fltSemantics &); 297 static ExponentType semanticsMaxExponent(const fltSemantics &); 298 static unsigned int semanticsSizeInBits(const fltSemantics &); 299 static unsigned int semanticsIntSizeInBits(const fltSemantics&, bool); 300 301 // Returns true if any number described by \p Src can be precisely represented 302 // by a normal (not subnormal) value in \p Dst. 303 static bool isRepresentableAsNormalIn(const fltSemantics &Src, 304 const fltSemantics &Dst); 305 306 /// Returns the size of the floating point number (in bits) in the given 307 /// semantics. 308 static unsigned getSizeInBits(const fltSemantics &Sem); 309 }; 310 311 namespace detail { 312 313 class IEEEFloat final : public APFloatBase { 314 public: 315 /// \name Constructors 316 /// @{ 317 318 IEEEFloat(const fltSemantics &); // Default construct to +0.0 319 IEEEFloat(const fltSemantics &, integerPart); 320 IEEEFloat(const fltSemantics &, uninitializedTag); 321 IEEEFloat(const fltSemantics &, const APInt &); 322 explicit IEEEFloat(double d); 323 explicit IEEEFloat(float f); 324 IEEEFloat(const IEEEFloat &); 325 IEEEFloat(IEEEFloat &&); 326 ~IEEEFloat(); 327 328 /// @} 329 330 /// Returns whether this instance allocated memory. 331 bool needsCleanup() const { return partCount() > 1; } 332 333 /// \name Convenience "constructors" 334 /// @{ 335 336 /// @} 337 338 /// \name Arithmetic 339 /// @{ 340 341 opStatus add(const IEEEFloat &, roundingMode); 342 opStatus subtract(const IEEEFloat &, roundingMode); 343 opStatus multiply(const IEEEFloat &, roundingMode); 344 opStatus divide(const IEEEFloat &, roundingMode); 345 /// IEEE remainder. 346 opStatus remainder(const IEEEFloat &); 347 /// C fmod, or llvm frem. 348 opStatus mod(const IEEEFloat &); 349 opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode); 350 opStatus roundToIntegral(roundingMode); 351 /// IEEE-754R 5.3.1: nextUp/nextDown. 352 opStatus next(bool nextDown); 353 354 /// @} 355 356 /// \name Sign operations. 357 /// @{ 358 359 void changeSign(); 360 361 /// @} 362 363 /// \name Conversions 364 /// @{ 365 366 opStatus convert(const fltSemantics &, roundingMode, bool *); 367 opStatus convertToInteger(MutableArrayRef<integerPart>, unsigned int, bool, 368 roundingMode, bool *) const; 369 opStatus convertFromAPInt(const APInt &, bool, roundingMode); 370 opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int, 371 bool, roundingMode); 372 opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int, 373 bool, roundingMode); 374 Expected<opStatus> convertFromString(StringRef, roundingMode); 375 APInt bitcastToAPInt() const; 376 double convertToDouble() const; 377 #ifdef HAS_IEE754_FLOAT128 378 float128 convertToQuad() const; 379 #endif 380 float convertToFloat() const; 381 382 /// @} 383 384 /// The definition of equality is not straightforward for floating point, so 385 /// we won't use operator==. Use one of the following, or write whatever it 386 /// is you really mean. 387 bool operator==(const IEEEFloat &) const = delete; 388 389 /// IEEE comparison with another floating point number (NaNs compare 390 /// unordered, 0==-0). 391 cmpResult compare(const IEEEFloat &) const; 392 393 /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0). 394 bool bitwiseIsEqual(const IEEEFloat &) const; 395 396 /// Write out a hexadecimal representation of the floating point value to DST, 397 /// which must be of sufficient size, in the C99 form [-]0xh.hhhhp[+-]d. 398 /// Return the number of characters written, excluding the terminating NUL. 399 unsigned int convertToHexString(char *dst, unsigned int hexDigits, 400 bool upperCase, roundingMode) const; 401 402 /// \name IEEE-754R 5.7.2 General operations. 403 /// @{ 404 405 /// IEEE-754R isSignMinus: Returns true if and only if the current value is 406 /// negative. 407 /// 408 /// This applies to zeros and NaNs as well. 409 bool isNegative() const { return sign; } 410 411 /// IEEE-754R isNormal: Returns true if and only if the current value is normal. 412 /// 413 /// This implies that the current value of the float is not zero, subnormal, 414 /// infinite, or NaN following the definition of normality from IEEE-754R. 415 bool isNormal() const { return !isDenormal() && isFiniteNonZero(); } 416 417 /// Returns true if and only if the current value is zero, subnormal, or 418 /// normal. 419 /// 420 /// This means that the value is not infinite or NaN. 421 bool isFinite() const { return !isNaN() && !isInfinity(); } 422 423 /// Returns true if and only if the float is plus or minus zero. 424 bool isZero() const { return category == fcZero; } 425 426 /// IEEE-754R isSubnormal(): Returns true if and only if the float is a 427 /// denormal. 428 bool isDenormal() const; 429 430 /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity. 431 bool isInfinity() const { return category == fcInfinity; } 432 433 /// Returns true if and only if the float is a quiet or signaling NaN. 434 bool isNaN() const { return category == fcNaN; } 435 436 /// Returns true if and only if the float is a signaling NaN. 437 bool isSignaling() const; 438 439 /// @} 440 441 /// \name Simple Queries 442 /// @{ 443 444 fltCategory getCategory() const { return category; } 445 const fltSemantics &getSemantics() const { return *semantics; } 446 bool isNonZero() const { return category != fcZero; } 447 bool isFiniteNonZero() const { return isFinite() && !isZero(); } 448 bool isPosZero() const { return isZero() && !isNegative(); } 449 bool isNegZero() const { return isZero() && isNegative(); } 450 451 /// Returns true if and only if the number has the smallest possible non-zero 452 /// magnitude in the current semantics. 453 bool isSmallest() const; 454 455 /// Returns true if this is the smallest (by magnitude) normalized finite 456 /// number in the given semantics. 457 bool isSmallestNormalized() const; 458 459 /// Returns true if and only if the number has the largest possible finite 460 /// magnitude in the current semantics. 461 bool isLargest() const; 462 463 /// Returns true if and only if the number is an exact integer. 464 bool isInteger() const; 465 466 /// @} 467 468 IEEEFloat &operator=(const IEEEFloat &); 469 IEEEFloat &operator=(IEEEFloat &&); 470 471 /// Overload to compute a hash code for an APFloat value. 472 /// 473 /// Note that the use of hash codes for floating point values is in general 474 /// frought with peril. Equality is hard to define for these values. For 475 /// example, should negative and positive zero hash to different codes? Are 476 /// they equal or not? This hash value implementation specifically 477 /// emphasizes producing different codes for different inputs in order to 478 /// be used in canonicalization and memoization. As such, equality is 479 /// bitwiseIsEqual, and 0 != -0. 480 friend hash_code hash_value(const IEEEFloat &Arg); 481 482 /// Converts this value into a decimal string. 483 /// 484 /// \param FormatPrecision The maximum number of digits of 485 /// precision to output. If there are fewer digits available, 486 /// zero padding will not be used unless the value is 487 /// integral and small enough to be expressed in 488 /// FormatPrecision digits. 0 means to use the natural 489 /// precision of the number. 490 /// \param FormatMaxPadding The maximum number of zeros to 491 /// consider inserting before falling back to scientific 492 /// notation. 0 means to always use scientific notation. 493 /// 494 /// \param TruncateZero Indicate whether to remove the trailing zero in 495 /// fraction part or not. Also setting this parameter to false forcing 496 /// producing of output more similar to default printf behavior. 497 /// Specifically the lower e is used as exponent delimiter and exponent 498 /// always contains no less than two digits. 499 /// 500 /// Number Precision MaxPadding Result 501 /// ------ --------- ---------- ------ 502 /// 1.01E+4 5 2 10100 503 /// 1.01E+4 4 2 1.01E+4 504 /// 1.01E+4 5 1 1.01E+4 505 /// 1.01E-2 5 2 0.0101 506 /// 1.01E-2 4 2 0.0101 507 /// 1.01E-2 4 1 1.01E-2 508 void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0, 509 unsigned FormatMaxPadding = 3, bool TruncateZero = true) const; 510 511 /// If this value has an exact multiplicative inverse, store it in inv and 512 /// return true. 513 bool getExactInverse(APFloat *inv) const; 514 515 // If this is an exact power of two, return the exponent while ignoring the 516 // sign bit. If it's not an exact power of 2, return INT_MIN 517 LLVM_READONLY 518 int getExactLog2Abs() const; 519 520 // If this is an exact power of two, return the exponent. If it's not an exact 521 // power of 2, return INT_MIN 522 LLVM_READONLY 523 int getExactLog2() const { 524 return isNegative() ? INT_MIN : getExactLog2Abs(); 525 } 526 527 /// Returns the exponent of the internal representation of the APFloat. 528 /// 529 /// Because the radix of APFloat is 2, this is equivalent to floor(log2(x)). 530 /// For special APFloat values, this returns special error codes: 531 /// 532 /// NaN -> \c IEK_NaN 533 /// 0 -> \c IEK_Zero 534 /// Inf -> \c IEK_Inf 535 /// 536 friend int ilogb(const IEEEFloat &Arg); 537 538 /// Returns: X * 2^Exp for integral exponents. 539 friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode); 540 541 friend IEEEFloat frexp(const IEEEFloat &X, int &Exp, roundingMode); 542 543 /// \name Special value setters. 544 /// @{ 545 546 void makeLargest(bool Neg = false); 547 void makeSmallest(bool Neg = false); 548 void makeNaN(bool SNaN = false, bool Neg = false, 549 const APInt *fill = nullptr); 550 void makeInf(bool Neg = false); 551 void makeZero(bool Neg = false); 552 void makeQuiet(); 553 554 /// Returns the smallest (by magnitude) normalized finite number in the given 555 /// semantics. 556 /// 557 /// \param Negative - True iff the number should be negative 558 void makeSmallestNormalized(bool Negative = false); 559 560 /// @} 561 562 cmpResult compareAbsoluteValue(const IEEEFloat &) const; 563 564 private: 565 /// \name Simple Queries 566 /// @{ 567 568 integerPart *significandParts(); 569 const integerPart *significandParts() const; 570 unsigned int partCount() const; 571 572 /// @} 573 574 /// \name Significand operations. 575 /// @{ 576 577 integerPart addSignificand(const IEEEFloat &); 578 integerPart subtractSignificand(const IEEEFloat &, integerPart); 579 lostFraction addOrSubtractSignificand(const IEEEFloat &, bool subtract); 580 lostFraction multiplySignificand(const IEEEFloat &, IEEEFloat); 581 lostFraction multiplySignificand(const IEEEFloat&); 582 lostFraction divideSignificand(const IEEEFloat &); 583 void incrementSignificand(); 584 void initialize(const fltSemantics *); 585 void shiftSignificandLeft(unsigned int); 586 lostFraction shiftSignificandRight(unsigned int); 587 unsigned int significandLSB() const; 588 unsigned int significandMSB() const; 589 void zeroSignificand(); 590 /// Return true if the significand excluding the integral bit is all ones. 591 bool isSignificandAllOnes() const; 592 bool isSignificandAllOnesExceptLSB() const; 593 /// Return true if the significand excluding the integral bit is all zeros. 594 bool isSignificandAllZeros() const; 595 bool isSignificandAllZerosExceptMSB() const; 596 597 /// @} 598 599 /// \name Arithmetic on special values. 600 /// @{ 601 602 opStatus addOrSubtractSpecials(const IEEEFloat &, bool subtract); 603 opStatus divideSpecials(const IEEEFloat &); 604 opStatus multiplySpecials(const IEEEFloat &); 605 opStatus modSpecials(const IEEEFloat &); 606 opStatus remainderSpecials(const IEEEFloat&); 607 608 /// @} 609 610 /// \name Miscellany 611 /// @{ 612 613 bool convertFromStringSpecials(StringRef str); 614 opStatus normalize(roundingMode, lostFraction); 615 opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract); 616 opStatus handleOverflow(roundingMode); 617 bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const; 618 opStatus convertToSignExtendedInteger(MutableArrayRef<integerPart>, 619 unsigned int, bool, roundingMode, 620 bool *) const; 621 opStatus convertFromUnsignedParts(const integerPart *, unsigned int, 622 roundingMode); 623 Expected<opStatus> convertFromHexadecimalString(StringRef, roundingMode); 624 Expected<opStatus> convertFromDecimalString(StringRef, roundingMode); 625 char *convertNormalToHexString(char *, unsigned int, bool, 626 roundingMode) const; 627 opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int, 628 roundingMode); 629 ExponentType exponentNaN() const; 630 ExponentType exponentInf() const; 631 ExponentType exponentZero() const; 632 633 /// @} 634 635 template <const fltSemantics &S> APInt convertIEEEFloatToAPInt() const; 636 APInt convertHalfAPFloatToAPInt() const; 637 APInt convertBFloatAPFloatToAPInt() const; 638 APInt convertFloatAPFloatToAPInt() const; 639 APInt convertDoubleAPFloatToAPInt() const; 640 APInt convertQuadrupleAPFloatToAPInt() const; 641 APInt convertF80LongDoubleAPFloatToAPInt() const; 642 APInt convertPPCDoubleDoubleAPFloatToAPInt() const; 643 APInt convertFloat8E5M2APFloatToAPInt() const; 644 APInt convertFloat8E5M2FNUZAPFloatToAPInt() const; 645 APInt convertFloat8E4M3APFloatToAPInt() const; 646 APInt convertFloat8E4M3FNAPFloatToAPInt() const; 647 APInt convertFloat8E4M3FNUZAPFloatToAPInt() const; 648 APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const; 649 APInt convertFloatTF32APFloatToAPInt() const; 650 APInt convertFloat6E3M2FNAPFloatToAPInt() const; 651 APInt convertFloat6E2M3FNAPFloatToAPInt() const; 652 APInt convertFloat4E2M1FNAPFloatToAPInt() const; 653 void initFromAPInt(const fltSemantics *Sem, const APInt &api); 654 template <const fltSemantics &S> void initFromIEEEAPInt(const APInt &api); 655 void initFromHalfAPInt(const APInt &api); 656 void initFromBFloatAPInt(const APInt &api); 657 void initFromFloatAPInt(const APInt &api); 658 void initFromDoubleAPInt(const APInt &api); 659 void initFromQuadrupleAPInt(const APInt &api); 660 void initFromF80LongDoubleAPInt(const APInt &api); 661 void initFromPPCDoubleDoubleAPInt(const APInt &api); 662 void initFromFloat8E5M2APInt(const APInt &api); 663 void initFromFloat8E5M2FNUZAPInt(const APInt &api); 664 void initFromFloat8E4M3APInt(const APInt &api); 665 void initFromFloat8E4M3FNAPInt(const APInt &api); 666 void initFromFloat8E4M3FNUZAPInt(const APInt &api); 667 void initFromFloat8E4M3B11FNUZAPInt(const APInt &api); 668 void initFromFloatTF32APInt(const APInt &api); 669 void initFromFloat6E3M2FNAPInt(const APInt &api); 670 void initFromFloat6E2M3FNAPInt(const APInt &api); 671 void initFromFloat4E2M1FNAPInt(const APInt &api); 672 673 void assign(const IEEEFloat &); 674 void copySignificand(const IEEEFloat &); 675 void freeSignificand(); 676 677 /// Note: this must be the first data member. 678 /// The semantics that this value obeys. 679 const fltSemantics *semantics; 680 681 /// A binary fraction with an explicit integer bit. 682 /// 683 /// The significand must be at least one bit wider than the target precision. 684 union Significand { 685 integerPart part; 686 integerPart *parts; 687 } significand; 688 689 /// The signed unbiased exponent of the value. 690 ExponentType exponent; 691 692 /// What kind of floating point number this is. 693 /// 694 /// Only 2 bits are required, but VisualStudio incorrectly sign extends it. 695 /// Using the extra bit keeps it from failing under VisualStudio. 696 fltCategory category : 3; 697 698 /// Sign bit of the number. 699 unsigned int sign : 1; 700 }; 701 702 hash_code hash_value(const IEEEFloat &Arg); 703 int ilogb(const IEEEFloat &Arg); 704 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode); 705 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM); 706 707 // This mode implements more precise float in terms of two APFloats. 708 // The interface and layout is designed for arbitrary underlying semantics, 709 // though currently only PPCDoubleDouble semantics are supported, whose 710 // corresponding underlying semantics are IEEEdouble. 711 class DoubleAPFloat final : public APFloatBase { 712 // Note: this must be the first data member. 713 const fltSemantics *Semantics; 714 std::unique_ptr<APFloat[]> Floats; 715 716 opStatus addImpl(const APFloat &a, const APFloat &aa, const APFloat &c, 717 const APFloat &cc, roundingMode RM); 718 719 opStatus addWithSpecial(const DoubleAPFloat &LHS, const DoubleAPFloat &RHS, 720 DoubleAPFloat &Out, roundingMode RM); 721 722 public: 723 DoubleAPFloat(const fltSemantics &S); 724 DoubleAPFloat(const fltSemantics &S, uninitializedTag); 725 DoubleAPFloat(const fltSemantics &S, integerPart); 726 DoubleAPFloat(const fltSemantics &S, const APInt &I); 727 DoubleAPFloat(const fltSemantics &S, APFloat &&First, APFloat &&Second); 728 DoubleAPFloat(const DoubleAPFloat &RHS); 729 DoubleAPFloat(DoubleAPFloat &&RHS); 730 731 DoubleAPFloat &operator=(const DoubleAPFloat &RHS); 732 inline DoubleAPFloat &operator=(DoubleAPFloat &&RHS); 733 734 bool needsCleanup() const { return Floats != nullptr; } 735 736 inline APFloat &getFirst(); 737 inline const APFloat &getFirst() const; 738 inline APFloat &getSecond(); 739 inline const APFloat &getSecond() const; 740 741 opStatus add(const DoubleAPFloat &RHS, roundingMode RM); 742 opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM); 743 opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM); 744 opStatus divide(const DoubleAPFloat &RHS, roundingMode RM); 745 opStatus remainder(const DoubleAPFloat &RHS); 746 opStatus mod(const DoubleAPFloat &RHS); 747 opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 748 const DoubleAPFloat &Addend, roundingMode RM); 749 opStatus roundToIntegral(roundingMode RM); 750 void changeSign(); 751 cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const; 752 753 fltCategory getCategory() const; 754 bool isNegative() const; 755 756 void makeInf(bool Neg); 757 void makeZero(bool Neg); 758 void makeLargest(bool Neg); 759 void makeSmallest(bool Neg); 760 void makeSmallestNormalized(bool Neg); 761 void makeNaN(bool SNaN, bool Neg, const APInt *fill); 762 763 cmpResult compare(const DoubleAPFloat &RHS) const; 764 bool bitwiseIsEqual(const DoubleAPFloat &RHS) const; 765 APInt bitcastToAPInt() const; 766 Expected<opStatus> convertFromString(StringRef, roundingMode); 767 opStatus next(bool nextDown); 768 769 opStatus convertToInteger(MutableArrayRef<integerPart> Input, 770 unsigned int Width, bool IsSigned, roundingMode RM, 771 bool *IsExact) const; 772 opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM); 773 opStatus convertFromSignExtendedInteger(const integerPart *Input, 774 unsigned int InputSize, bool IsSigned, 775 roundingMode RM); 776 opStatus convertFromZeroExtendedInteger(const integerPart *Input, 777 unsigned int InputSize, bool IsSigned, 778 roundingMode RM); 779 unsigned int convertToHexString(char *DST, unsigned int HexDigits, 780 bool UpperCase, roundingMode RM) const; 781 782 bool isDenormal() const; 783 bool isSmallest() const; 784 bool isSmallestNormalized() const; 785 bool isLargest() const; 786 bool isInteger() const; 787 788 void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 789 unsigned FormatMaxPadding, bool TruncateZero = true) const; 790 791 bool getExactInverse(APFloat *inv) const; 792 793 LLVM_READONLY 794 int getExactLog2() const; 795 LLVM_READONLY 796 int getExactLog2Abs() const; 797 798 friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode); 799 friend DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, roundingMode); 800 friend hash_code hash_value(const DoubleAPFloat &Arg); 801 }; 802 803 hash_code hash_value(const DoubleAPFloat &Arg); 804 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, IEEEFloat::roundingMode RM); 805 DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, IEEEFloat::roundingMode); 806 807 } // End detail namespace 808 809 // This is a interface class that is currently forwarding functionalities from 810 // detail::IEEEFloat. 811 class APFloat : public APFloatBase { 812 typedef detail::IEEEFloat IEEEFloat; 813 typedef detail::DoubleAPFloat DoubleAPFloat; 814 815 static_assert(std::is_standard_layout<IEEEFloat>::value); 816 817 union Storage { 818 const fltSemantics *semantics; 819 IEEEFloat IEEE; 820 DoubleAPFloat Double; 821 822 explicit Storage(IEEEFloat F, const fltSemantics &S); 823 explicit Storage(DoubleAPFloat F, const fltSemantics &S) 824 : Double(std::move(F)) { 825 assert(&S == &PPCDoubleDouble()); 826 } 827 828 template <typename... ArgTypes> 829 Storage(const fltSemantics &Semantics, ArgTypes &&... Args) { 830 if (usesLayout<IEEEFloat>(Semantics)) { 831 new (&IEEE) IEEEFloat(Semantics, std::forward<ArgTypes>(Args)...); 832 return; 833 } 834 if (usesLayout<DoubleAPFloat>(Semantics)) { 835 new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...); 836 return; 837 } 838 llvm_unreachable("Unexpected semantics"); 839 } 840 841 ~Storage() { 842 if (usesLayout<IEEEFloat>(*semantics)) { 843 IEEE.~IEEEFloat(); 844 return; 845 } 846 if (usesLayout<DoubleAPFloat>(*semantics)) { 847 Double.~DoubleAPFloat(); 848 return; 849 } 850 llvm_unreachable("Unexpected semantics"); 851 } 852 853 Storage(const Storage &RHS) { 854 if (usesLayout<IEEEFloat>(*RHS.semantics)) { 855 new (this) IEEEFloat(RHS.IEEE); 856 return; 857 } 858 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { 859 new (this) DoubleAPFloat(RHS.Double); 860 return; 861 } 862 llvm_unreachable("Unexpected semantics"); 863 } 864 865 Storage(Storage &&RHS) { 866 if (usesLayout<IEEEFloat>(*RHS.semantics)) { 867 new (this) IEEEFloat(std::move(RHS.IEEE)); 868 return; 869 } 870 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) { 871 new (this) DoubleAPFloat(std::move(RHS.Double)); 872 return; 873 } 874 llvm_unreachable("Unexpected semantics"); 875 } 876 877 Storage &operator=(const Storage &RHS) { 878 if (usesLayout<IEEEFloat>(*semantics) && 879 usesLayout<IEEEFloat>(*RHS.semantics)) { 880 IEEE = RHS.IEEE; 881 } else if (usesLayout<DoubleAPFloat>(*semantics) && 882 usesLayout<DoubleAPFloat>(*RHS.semantics)) { 883 Double = RHS.Double; 884 } else if (this != &RHS) { 885 this->~Storage(); 886 new (this) Storage(RHS); 887 } 888 return *this; 889 } 890 891 Storage &operator=(Storage &&RHS) { 892 if (usesLayout<IEEEFloat>(*semantics) && 893 usesLayout<IEEEFloat>(*RHS.semantics)) { 894 IEEE = std::move(RHS.IEEE); 895 } else if (usesLayout<DoubleAPFloat>(*semantics) && 896 usesLayout<DoubleAPFloat>(*RHS.semantics)) { 897 Double = std::move(RHS.Double); 898 } else if (this != &RHS) { 899 this->~Storage(); 900 new (this) Storage(std::move(RHS)); 901 } 902 return *this; 903 } 904 } U; 905 906 template <typename T> static bool usesLayout(const fltSemantics &Semantics) { 907 static_assert(std::is_same<T, IEEEFloat>::value || 908 std::is_same<T, DoubleAPFloat>::value); 909 if (std::is_same<T, DoubleAPFloat>::value) { 910 return &Semantics == &PPCDoubleDouble(); 911 } 912 return &Semantics != &PPCDoubleDouble(); 913 } 914 915 IEEEFloat &getIEEE() { 916 if (usesLayout<IEEEFloat>(*U.semantics)) 917 return U.IEEE; 918 if (usesLayout<DoubleAPFloat>(*U.semantics)) 919 return U.Double.getFirst().U.IEEE; 920 llvm_unreachable("Unexpected semantics"); 921 } 922 923 const IEEEFloat &getIEEE() const { 924 if (usesLayout<IEEEFloat>(*U.semantics)) 925 return U.IEEE; 926 if (usesLayout<DoubleAPFloat>(*U.semantics)) 927 return U.Double.getFirst().U.IEEE; 928 llvm_unreachable("Unexpected semantics"); 929 } 930 931 void makeZero(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeZero(Neg)); } 932 933 void makeInf(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeInf(Neg)); } 934 935 void makeNaN(bool SNaN, bool Neg, const APInt *fill) { 936 APFLOAT_DISPATCH_ON_SEMANTICS(makeNaN(SNaN, Neg, fill)); 937 } 938 939 void makeLargest(bool Neg) { 940 APFLOAT_DISPATCH_ON_SEMANTICS(makeLargest(Neg)); 941 } 942 943 void makeSmallest(bool Neg) { 944 APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallest(Neg)); 945 } 946 947 void makeSmallestNormalized(bool Neg) { 948 APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallestNormalized(Neg)); 949 } 950 951 explicit APFloat(IEEEFloat F, const fltSemantics &S) : U(std::move(F), S) {} 952 explicit APFloat(DoubleAPFloat F, const fltSemantics &S) 953 : U(std::move(F), S) {} 954 955 cmpResult compareAbsoluteValue(const APFloat &RHS) const { 956 assert(&getSemantics() == &RHS.getSemantics() && 957 "Should only compare APFloats with the same semantics"); 958 if (usesLayout<IEEEFloat>(getSemantics())) 959 return U.IEEE.compareAbsoluteValue(RHS.U.IEEE); 960 if (usesLayout<DoubleAPFloat>(getSemantics())) 961 return U.Double.compareAbsoluteValue(RHS.U.Double); 962 llvm_unreachable("Unexpected semantics"); 963 } 964 965 public: 966 APFloat(const fltSemantics &Semantics) : U(Semantics) {} 967 APFloat(const fltSemantics &Semantics, StringRef S); 968 APFloat(const fltSemantics &Semantics, integerPart I) : U(Semantics, I) {} 969 template <typename T, 970 typename = std::enable_if_t<std::is_floating_point<T>::value>> 971 APFloat(const fltSemantics &Semantics, T V) = delete; 972 // TODO: Remove this constructor. This isn't faster than the first one. 973 APFloat(const fltSemantics &Semantics, uninitializedTag) 974 : U(Semantics, uninitialized) {} 975 APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {} 976 explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {} 977 explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {} 978 APFloat(const APFloat &RHS) = default; 979 APFloat(APFloat &&RHS) = default; 980 981 ~APFloat() = default; 982 983 bool needsCleanup() const { APFLOAT_DISPATCH_ON_SEMANTICS(needsCleanup()); } 984 985 /// Factory for Positive and Negative Zero. 986 /// 987 /// \param Negative True iff the number should be negative. 988 static APFloat getZero(const fltSemantics &Sem, bool Negative = false) { 989 APFloat Val(Sem, uninitialized); 990 Val.makeZero(Negative); 991 return Val; 992 } 993 994 /// Factory for Positive and Negative One. 995 /// 996 /// \param Negative True iff the number should be negative. 997 static APFloat getOne(const fltSemantics &Sem, bool Negative = false) { 998 return APFloat(Sem, Negative ? -1 : 1); 999 } 1000 1001 /// Factory for Positive and Negative Infinity. 1002 /// 1003 /// \param Negative True iff the number should be negative. 1004 static APFloat getInf(const fltSemantics &Sem, bool Negative = false) { 1005 APFloat Val(Sem, uninitialized); 1006 Val.makeInf(Negative); 1007 return Val; 1008 } 1009 1010 /// Factory for NaN values. 1011 /// 1012 /// \param Negative - True iff the NaN generated should be negative. 1013 /// \param payload - The unspecified fill bits for creating the NaN, 0 by 1014 /// default. The value is truncated as necessary. 1015 static APFloat getNaN(const fltSemantics &Sem, bool Negative = false, 1016 uint64_t payload = 0) { 1017 if (payload) { 1018 APInt intPayload(64, payload); 1019 return getQNaN(Sem, Negative, &intPayload); 1020 } else { 1021 return getQNaN(Sem, Negative, nullptr); 1022 } 1023 } 1024 1025 /// Factory for QNaN values. 1026 static APFloat getQNaN(const fltSemantics &Sem, bool Negative = false, 1027 const APInt *payload = nullptr) { 1028 APFloat Val(Sem, uninitialized); 1029 Val.makeNaN(false, Negative, payload); 1030 return Val; 1031 } 1032 1033 /// Factory for SNaN values. 1034 static APFloat getSNaN(const fltSemantics &Sem, bool Negative = false, 1035 const APInt *payload = nullptr) { 1036 APFloat Val(Sem, uninitialized); 1037 Val.makeNaN(true, Negative, payload); 1038 return Val; 1039 } 1040 1041 /// Returns the largest finite number in the given semantics. 1042 /// 1043 /// \param Negative - True iff the number should be negative 1044 static APFloat getLargest(const fltSemantics &Sem, bool Negative = false) { 1045 APFloat Val(Sem, uninitialized); 1046 Val.makeLargest(Negative); 1047 return Val; 1048 } 1049 1050 /// Returns the smallest (by magnitude) finite number in the given semantics. 1051 /// Might be denormalized, which implies a relative loss of precision. 1052 /// 1053 /// \param Negative - True iff the number should be negative 1054 static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false) { 1055 APFloat Val(Sem, uninitialized); 1056 Val.makeSmallest(Negative); 1057 return Val; 1058 } 1059 1060 /// Returns the smallest (by magnitude) normalized finite number in the given 1061 /// semantics. 1062 /// 1063 /// \param Negative - True iff the number should be negative 1064 static APFloat getSmallestNormalized(const fltSemantics &Sem, 1065 bool Negative = false) { 1066 APFloat Val(Sem, uninitialized); 1067 Val.makeSmallestNormalized(Negative); 1068 return Val; 1069 } 1070 1071 /// Returns a float which is bitcasted from an all one value int. 1072 /// 1073 /// \param Semantics - type float semantics 1074 static APFloat getAllOnesValue(const fltSemantics &Semantics); 1075 1076 static bool hasNanOrInf(const fltSemantics &Sem) { 1077 switch (SemanticsToEnum(Sem)) { 1078 default: 1079 return true; 1080 // Below Semantics do not support {NaN or Inf} 1081 case APFloat::S_Float6E3M2FN: 1082 case APFloat::S_Float6E2M3FN: 1083 case APFloat::S_Float4E2M1FN: 1084 return false; 1085 } 1086 } 1087 1088 /// Used to insert APFloat objects, or objects that contain APFloat objects, 1089 /// into FoldingSets. 1090 void Profile(FoldingSetNodeID &NID) const; 1091 1092 opStatus add(const APFloat &RHS, roundingMode RM) { 1093 assert(&getSemantics() == &RHS.getSemantics() && 1094 "Should only call on two APFloats with the same semantics"); 1095 if (usesLayout<IEEEFloat>(getSemantics())) 1096 return U.IEEE.add(RHS.U.IEEE, RM); 1097 if (usesLayout<DoubleAPFloat>(getSemantics())) 1098 return U.Double.add(RHS.U.Double, RM); 1099 llvm_unreachable("Unexpected semantics"); 1100 } 1101 opStatus subtract(const APFloat &RHS, roundingMode RM) { 1102 assert(&getSemantics() == &RHS.getSemantics() && 1103 "Should only call on two APFloats with the same semantics"); 1104 if (usesLayout<IEEEFloat>(getSemantics())) 1105 return U.IEEE.subtract(RHS.U.IEEE, RM); 1106 if (usesLayout<DoubleAPFloat>(getSemantics())) 1107 return U.Double.subtract(RHS.U.Double, RM); 1108 llvm_unreachable("Unexpected semantics"); 1109 } 1110 opStatus multiply(const APFloat &RHS, roundingMode RM) { 1111 assert(&getSemantics() == &RHS.getSemantics() && 1112 "Should only call on two APFloats with the same semantics"); 1113 if (usesLayout<IEEEFloat>(getSemantics())) 1114 return U.IEEE.multiply(RHS.U.IEEE, RM); 1115 if (usesLayout<DoubleAPFloat>(getSemantics())) 1116 return U.Double.multiply(RHS.U.Double, RM); 1117 llvm_unreachable("Unexpected semantics"); 1118 } 1119 opStatus divide(const APFloat &RHS, roundingMode RM) { 1120 assert(&getSemantics() == &RHS.getSemantics() && 1121 "Should only call on two APFloats with the same semantics"); 1122 if (usesLayout<IEEEFloat>(getSemantics())) 1123 return U.IEEE.divide(RHS.U.IEEE, RM); 1124 if (usesLayout<DoubleAPFloat>(getSemantics())) 1125 return U.Double.divide(RHS.U.Double, RM); 1126 llvm_unreachable("Unexpected semantics"); 1127 } 1128 opStatus remainder(const APFloat &RHS) { 1129 assert(&getSemantics() == &RHS.getSemantics() && 1130 "Should only call on two APFloats with the same semantics"); 1131 if (usesLayout<IEEEFloat>(getSemantics())) 1132 return U.IEEE.remainder(RHS.U.IEEE); 1133 if (usesLayout<DoubleAPFloat>(getSemantics())) 1134 return U.Double.remainder(RHS.U.Double); 1135 llvm_unreachable("Unexpected semantics"); 1136 } 1137 opStatus mod(const APFloat &RHS) { 1138 assert(&getSemantics() == &RHS.getSemantics() && 1139 "Should only call on two APFloats with the same semantics"); 1140 if (usesLayout<IEEEFloat>(getSemantics())) 1141 return U.IEEE.mod(RHS.U.IEEE); 1142 if (usesLayout<DoubleAPFloat>(getSemantics())) 1143 return U.Double.mod(RHS.U.Double); 1144 llvm_unreachable("Unexpected semantics"); 1145 } 1146 opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, 1147 roundingMode RM) { 1148 assert(&getSemantics() == &Multiplicand.getSemantics() && 1149 "Should only call on APFloats with the same semantics"); 1150 assert(&getSemantics() == &Addend.getSemantics() && 1151 "Should only call on APFloats with the same semantics"); 1152 if (usesLayout<IEEEFloat>(getSemantics())) 1153 return U.IEEE.fusedMultiplyAdd(Multiplicand.U.IEEE, Addend.U.IEEE, RM); 1154 if (usesLayout<DoubleAPFloat>(getSemantics())) 1155 return U.Double.fusedMultiplyAdd(Multiplicand.U.Double, Addend.U.Double, 1156 RM); 1157 llvm_unreachable("Unexpected semantics"); 1158 } 1159 opStatus roundToIntegral(roundingMode RM) { 1160 APFLOAT_DISPATCH_ON_SEMANTICS(roundToIntegral(RM)); 1161 } 1162 1163 // TODO: bool parameters are not readable and a source of bugs. 1164 // Do something. 1165 opStatus next(bool nextDown) { 1166 APFLOAT_DISPATCH_ON_SEMANTICS(next(nextDown)); 1167 } 1168 1169 /// Negate an APFloat. 1170 APFloat operator-() const { 1171 APFloat Result(*this); 1172 Result.changeSign(); 1173 return Result; 1174 } 1175 1176 /// Add two APFloats, rounding ties to the nearest even. 1177 /// No error checking. 1178 APFloat operator+(const APFloat &RHS) const { 1179 APFloat Result(*this); 1180 (void)Result.add(RHS, rmNearestTiesToEven); 1181 return Result; 1182 } 1183 1184 /// Subtract two APFloats, rounding ties to the nearest even. 1185 /// No error checking. 1186 APFloat operator-(const APFloat &RHS) const { 1187 APFloat Result(*this); 1188 (void)Result.subtract(RHS, rmNearestTiesToEven); 1189 return Result; 1190 } 1191 1192 /// Multiply two APFloats, rounding ties to the nearest even. 1193 /// No error checking. 1194 APFloat operator*(const APFloat &RHS) const { 1195 APFloat Result(*this); 1196 (void)Result.multiply(RHS, rmNearestTiesToEven); 1197 return Result; 1198 } 1199 1200 /// Divide the first APFloat by the second, rounding ties to the nearest even. 1201 /// No error checking. 1202 APFloat operator/(const APFloat &RHS) const { 1203 APFloat Result(*this); 1204 (void)Result.divide(RHS, rmNearestTiesToEven); 1205 return Result; 1206 } 1207 1208 void changeSign() { APFLOAT_DISPATCH_ON_SEMANTICS(changeSign()); } 1209 void clearSign() { 1210 if (isNegative()) 1211 changeSign(); 1212 } 1213 void copySign(const APFloat &RHS) { 1214 if (isNegative() != RHS.isNegative()) 1215 changeSign(); 1216 } 1217 1218 /// A static helper to produce a copy of an APFloat value with its sign 1219 /// copied from some other APFloat. 1220 static APFloat copySign(APFloat Value, const APFloat &Sign) { 1221 Value.copySign(Sign); 1222 return Value; 1223 } 1224 1225 /// Assuming this is an IEEE-754 NaN value, quiet its signaling bit. 1226 /// This preserves the sign and payload bits. 1227 APFloat makeQuiet() const { 1228 APFloat Result(*this); 1229 Result.getIEEE().makeQuiet(); 1230 return Result; 1231 } 1232 1233 opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, 1234 bool *losesInfo); 1235 opStatus convertToInteger(MutableArrayRef<integerPart> Input, 1236 unsigned int Width, bool IsSigned, roundingMode RM, 1237 bool *IsExact) const { 1238 APFLOAT_DISPATCH_ON_SEMANTICS( 1239 convertToInteger(Input, Width, IsSigned, RM, IsExact)); 1240 } 1241 opStatus convertToInteger(APSInt &Result, roundingMode RM, 1242 bool *IsExact) const; 1243 opStatus convertFromAPInt(const APInt &Input, bool IsSigned, 1244 roundingMode RM) { 1245 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromAPInt(Input, IsSigned, RM)); 1246 } 1247 opStatus convertFromSignExtendedInteger(const integerPart *Input, 1248 unsigned int InputSize, bool IsSigned, 1249 roundingMode RM) { 1250 APFLOAT_DISPATCH_ON_SEMANTICS( 1251 convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM)); 1252 } 1253 opStatus convertFromZeroExtendedInteger(const integerPart *Input, 1254 unsigned int InputSize, bool IsSigned, 1255 roundingMode RM) { 1256 APFLOAT_DISPATCH_ON_SEMANTICS( 1257 convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM)); 1258 } 1259 Expected<opStatus> convertFromString(StringRef, roundingMode); 1260 APInt bitcastToAPInt() const { 1261 APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt()); 1262 } 1263 1264 /// Converts this APFloat to host double value. 1265 /// 1266 /// \pre The APFloat must be built using semantics, that can be represented by 1267 /// the host double type without loss of precision. It can be IEEEdouble and 1268 /// shorter semantics, like IEEEsingle and others. 1269 double convertToDouble() const; 1270 1271 /// Converts this APFloat to host float value. 1272 /// 1273 /// \pre The APFloat must be built using semantics, that can be represented by 1274 /// the host float type without loss of precision. It can be IEEEquad and 1275 /// shorter semantics, like IEEEdouble and others. 1276 #ifdef HAS_IEE754_FLOAT128 1277 float128 convertToQuad() const; 1278 #endif 1279 1280 /// Converts this APFloat to host float value. 1281 /// 1282 /// \pre The APFloat must be built using semantics, that can be represented by 1283 /// the host float type without loss of precision. It can be IEEEsingle and 1284 /// shorter semantics, like IEEEhalf. 1285 float convertToFloat() const; 1286 1287 bool operator==(const APFloat &RHS) const { return compare(RHS) == cmpEqual; } 1288 1289 bool operator!=(const APFloat &RHS) const { return compare(RHS) != cmpEqual; } 1290 1291 bool operator<(const APFloat &RHS) const { 1292 return compare(RHS) == cmpLessThan; 1293 } 1294 1295 bool operator>(const APFloat &RHS) const { 1296 return compare(RHS) == cmpGreaterThan; 1297 } 1298 1299 bool operator<=(const APFloat &RHS) const { 1300 cmpResult Res = compare(RHS); 1301 return Res == cmpLessThan || Res == cmpEqual; 1302 } 1303 1304 bool operator>=(const APFloat &RHS) const { 1305 cmpResult Res = compare(RHS); 1306 return Res == cmpGreaterThan || Res == cmpEqual; 1307 } 1308 1309 cmpResult compare(const APFloat &RHS) const { 1310 assert(&getSemantics() == &RHS.getSemantics() && 1311 "Should only compare APFloats with the same semantics"); 1312 if (usesLayout<IEEEFloat>(getSemantics())) 1313 return U.IEEE.compare(RHS.U.IEEE); 1314 if (usesLayout<DoubleAPFloat>(getSemantics())) 1315 return U.Double.compare(RHS.U.Double); 1316 llvm_unreachable("Unexpected semantics"); 1317 } 1318 1319 bool bitwiseIsEqual(const APFloat &RHS) const { 1320 if (&getSemantics() != &RHS.getSemantics()) 1321 return false; 1322 if (usesLayout<IEEEFloat>(getSemantics())) 1323 return U.IEEE.bitwiseIsEqual(RHS.U.IEEE); 1324 if (usesLayout<DoubleAPFloat>(getSemantics())) 1325 return U.Double.bitwiseIsEqual(RHS.U.Double); 1326 llvm_unreachable("Unexpected semantics"); 1327 } 1328 1329 /// We don't rely on operator== working on double values, as 1330 /// it returns true for things that are clearly not equal, like -0.0 and 0.0. 1331 /// As such, this method can be used to do an exact bit-for-bit comparison of 1332 /// two floating point values. 1333 /// 1334 /// We leave the version with the double argument here because it's just so 1335 /// convenient to write "2.0" and the like. Without this function we'd 1336 /// have to duplicate its logic everywhere it's called. 1337 bool isExactlyValue(double V) const { 1338 bool ignored; 1339 APFloat Tmp(V); 1340 Tmp.convert(getSemantics(), APFloat::rmNearestTiesToEven, &ignored); 1341 return bitwiseIsEqual(Tmp); 1342 } 1343 1344 unsigned int convertToHexString(char *DST, unsigned int HexDigits, 1345 bool UpperCase, roundingMode RM) const { 1346 APFLOAT_DISPATCH_ON_SEMANTICS( 1347 convertToHexString(DST, HexDigits, UpperCase, RM)); 1348 } 1349 1350 bool isZero() const { return getCategory() == fcZero; } 1351 bool isInfinity() const { return getCategory() == fcInfinity; } 1352 bool isNaN() const { return getCategory() == fcNaN; } 1353 1354 bool isNegative() const { return getIEEE().isNegative(); } 1355 bool isDenormal() const { APFLOAT_DISPATCH_ON_SEMANTICS(isDenormal()); } 1356 bool isSignaling() const { return getIEEE().isSignaling(); } 1357 1358 bool isNormal() const { return !isDenormal() && isFiniteNonZero(); } 1359 bool isFinite() const { return !isNaN() && !isInfinity(); } 1360 1361 fltCategory getCategory() const { return getIEEE().getCategory(); } 1362 const fltSemantics &getSemantics() const { return *U.semantics; } 1363 bool isNonZero() const { return !isZero(); } 1364 bool isFiniteNonZero() const { return isFinite() && !isZero(); } 1365 bool isPosZero() const { return isZero() && !isNegative(); } 1366 bool isNegZero() const { return isZero() && isNegative(); } 1367 bool isPosInfinity() const { return isInfinity() && !isNegative(); } 1368 bool isNegInfinity() const { return isInfinity() && isNegative(); } 1369 bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); } 1370 bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); } 1371 bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); } 1372 bool isIEEE() const { return usesLayout<IEEEFloat>(getSemantics()); } 1373 1374 bool isSmallestNormalized() const { 1375 APFLOAT_DISPATCH_ON_SEMANTICS(isSmallestNormalized()); 1376 } 1377 1378 /// Return the FPClassTest which will return true for the value. 1379 FPClassTest classify() const; 1380 1381 APFloat &operator=(const APFloat &RHS) = default; 1382 APFloat &operator=(APFloat &&RHS) = default; 1383 1384 void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0, 1385 unsigned FormatMaxPadding = 3, bool TruncateZero = true) const { 1386 APFLOAT_DISPATCH_ON_SEMANTICS( 1387 toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero)); 1388 } 1389 1390 void print(raw_ostream &) const; 1391 void dump() const; 1392 1393 bool getExactInverse(APFloat *inv) const { 1394 APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv)); 1395 } 1396 1397 LLVM_READONLY 1398 int getExactLog2Abs() const { 1399 APFLOAT_DISPATCH_ON_SEMANTICS(getExactLog2Abs()); 1400 } 1401 1402 LLVM_READONLY 1403 int getExactLog2() const { 1404 APFLOAT_DISPATCH_ON_SEMANTICS(getExactLog2()); 1405 } 1406 1407 friend hash_code hash_value(const APFloat &Arg); 1408 friend int ilogb(const APFloat &Arg) { return ilogb(Arg.getIEEE()); } 1409 friend APFloat scalbn(APFloat X, int Exp, roundingMode RM); 1410 friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM); 1411 friend IEEEFloat; 1412 friend DoubleAPFloat; 1413 }; 1414 1415 /// See friend declarations above. 1416 /// 1417 /// These additional declarations are required in order to compile LLVM with IBM 1418 /// xlC compiler. 1419 hash_code hash_value(const APFloat &Arg); 1420 inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) { 1421 if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics())) 1422 return APFloat(scalbn(X.U.IEEE, Exp, RM), X.getSemantics()); 1423 if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics())) 1424 return APFloat(scalbn(X.U.Double, Exp, RM), X.getSemantics()); 1425 llvm_unreachable("Unexpected semantics"); 1426 } 1427 1428 /// Equivalent of C standard library function. 1429 /// 1430 /// While the C standard says Exp is an unspecified value for infinity and nan, 1431 /// this returns INT_MAX for infinities, and INT_MIN for NaNs. 1432 inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) { 1433 if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics())) 1434 return APFloat(frexp(X.U.IEEE, Exp, RM), X.getSemantics()); 1435 if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics())) 1436 return APFloat(frexp(X.U.Double, Exp, RM), X.getSemantics()); 1437 llvm_unreachable("Unexpected semantics"); 1438 } 1439 /// Returns the absolute value of the argument. 1440 inline APFloat abs(APFloat X) { 1441 X.clearSign(); 1442 return X; 1443 } 1444 1445 /// Returns the negated value of the argument. 1446 inline APFloat neg(APFloat X) { 1447 X.changeSign(); 1448 return X; 1449 } 1450 1451 /// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the 1452 /// 2 arguments if both are not NaN. If either argument is a NaN, returns the 1453 /// other argument. -0 is treated as ordered less than +0. 1454 LLVM_READONLY 1455 inline APFloat minnum(const APFloat &A, const APFloat &B) { 1456 if (A.isNaN()) 1457 return B; 1458 if (B.isNaN()) 1459 return A; 1460 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1461 return A.isNegative() ? A : B; 1462 return B < A ? B : A; 1463 } 1464 1465 /// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the 1466 /// 2 arguments if both are not NaN. If either argument is a NaN, returns the 1467 /// other argument. +0 is treated as ordered greater than -0. 1468 LLVM_READONLY 1469 inline APFloat maxnum(const APFloat &A, const APFloat &B) { 1470 if (A.isNaN()) 1471 return B; 1472 if (B.isNaN()) 1473 return A; 1474 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1475 return A.isNegative() ? B : A; 1476 return A < B ? B : A; 1477 } 1478 1479 /// Implements IEEE 754-2019 minimum semantics. Returns the smaller of 2 1480 /// arguments, propagating NaNs and treating -0 as less than +0. 1481 LLVM_READONLY 1482 inline APFloat minimum(const APFloat &A, const APFloat &B) { 1483 if (A.isNaN()) 1484 return A; 1485 if (B.isNaN()) 1486 return B; 1487 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1488 return A.isNegative() ? A : B; 1489 return B < A ? B : A; 1490 } 1491 1492 /// Implements IEEE 754-2019 minimumNumber semantics. Returns the smaller 1493 /// of 2 arguments, not propagating NaNs and treating -0 as less than +0. 1494 LLVM_READONLY 1495 inline APFloat minimumnum(const APFloat &A, const APFloat &B) { 1496 if (A.isNaN()) 1497 return B.isNaN() ? B.makeQuiet() : B; 1498 if (B.isNaN()) 1499 return A; 1500 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1501 return A.isNegative() ? A : B; 1502 return B < A ? B : A; 1503 } 1504 1505 /// Implements IEEE 754-2019 maximum semantics. Returns the larger of 2 1506 /// arguments, propagating NaNs and treating -0 as less than +0. 1507 LLVM_READONLY 1508 inline APFloat maximum(const APFloat &A, const APFloat &B) { 1509 if (A.isNaN()) 1510 return A; 1511 if (B.isNaN()) 1512 return B; 1513 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1514 return A.isNegative() ? B : A; 1515 return A < B ? B : A; 1516 } 1517 1518 /// Implements IEEE 754-2019 maximumNumber semantics. Returns the larger 1519 /// of 2 arguments, not propagating NaNs and treating -0 as less than +0. 1520 LLVM_READONLY 1521 inline APFloat maximumnum(const APFloat &A, const APFloat &B) { 1522 if (A.isNaN()) 1523 return B.isNaN() ? B.makeQuiet() : B; 1524 if (B.isNaN()) 1525 return A; 1526 if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative())) 1527 return A.isNegative() ? B : A; 1528 return A < B ? B : A; 1529 } 1530 1531 // We want the following functions to be available in the header for inlining. 1532 // We cannot define them inline in the class definition of `DoubleAPFloat` 1533 // because doing so would instantiate `std::unique_ptr<APFloat[]>` before 1534 // `APFloat` is defined, and that would be undefined behavior. 1535 namespace detail { 1536 1537 DoubleAPFloat &DoubleAPFloat::operator=(DoubleAPFloat &&RHS) { 1538 if (this != &RHS) { 1539 this->~DoubleAPFloat(); 1540 new (this) DoubleAPFloat(std::move(RHS)); 1541 } 1542 return *this; 1543 } 1544 1545 APFloat &DoubleAPFloat::getFirst() { return Floats[0]; } 1546 const APFloat &DoubleAPFloat::getFirst() const { return Floats[0]; } 1547 APFloat &DoubleAPFloat::getSecond() { return Floats[1]; } 1548 const APFloat &DoubleAPFloat::getSecond() const { return Floats[1]; } 1549 1550 } // namespace detail 1551 1552 } // namespace llvm 1553 1554 #undef APFLOAT_DISPATCH_ON_SEMANTICS 1555 #endif // LLVM_ADT_APFLOAT_H 1556