1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 123 /* Whether this semantics has an encoding for Zero */ 124 bool hasZero = true; 125 126 /* Whether this semantics can represent signed values */ 127 bool hasSignedRepr = true; 128 129 // Returns true if any number described by this semantics can be precisely 130 // represented by the specified semantics. Does not take into account 131 // the value of fltNonfiniteBehavior. 132 bool isRepresentableBy(const fltSemantics &S) const { 133 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 134 precision <= S.precision; 135 } 136 }; 137 138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 144 static constexpr fltSemantics semFloat8E5M2FNUZ = { 145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 147 static constexpr fltSemantics semFloat8E4M3FN = { 148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 149 static constexpr fltSemantics semFloat8E4M3FNUZ = { 150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; 154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 155 static constexpr fltSemantics semFloat8E8M0FNU = { 156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes, 157 false, false}; 158 159 static constexpr fltSemantics semFloat6E3M2FN = { 160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 161 static constexpr fltSemantics semFloat6E2M3FN = { 162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 163 static constexpr fltSemantics semFloat4E2M1FN = { 164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 166 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 167 168 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 169 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 170 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 171 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 172 to each other, and two 11-bit exponents. 173 174 Note: we need to make the value different from semBogus as otherwise 175 an unsafe optimization may collapse both values to a single address, 176 and we heavily rely on them having distinct addresses. */ 177 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 178 179 /* These are legacy semantics for the fallback, inaccrurate implementation of 180 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 181 operation. It's equivalent to having an IEEE number with consecutive 106 182 bits of mantissa and 11 bits of exponent. 183 184 It's not equivalent to IBM double-double. For example, a legit IBM 185 double-double, 1 + epsilon: 186 187 1 + epsilon = 1 + (1 >> 1076) 188 189 is not representable by a consecutive 106 bits of mantissa. 190 191 Currently, these semantics are used in the following way: 192 193 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 194 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 195 semPPCDoubleDoubleLegacy -> IEEE operations 196 197 We use bitcastToAPInt() to get the bit representation (in APInt) of the 198 underlying IEEEdouble, then use the APInt constructor to construct the 199 legacy IEEE float. 200 201 TODO: Implement all operations in semPPCDoubleDouble, and delete these 202 semantics. */ 203 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 204 53 + 53, 128}; 205 206 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 207 switch (S) { 208 case S_IEEEhalf: 209 return IEEEhalf(); 210 case S_BFloat: 211 return BFloat(); 212 case S_IEEEsingle: 213 return IEEEsingle(); 214 case S_IEEEdouble: 215 return IEEEdouble(); 216 case S_IEEEquad: 217 return IEEEquad(); 218 case S_PPCDoubleDouble: 219 return PPCDoubleDouble(); 220 case S_Float8E5M2: 221 return Float8E5M2(); 222 case S_Float8E5M2FNUZ: 223 return Float8E5M2FNUZ(); 224 case S_Float8E4M3: 225 return Float8E4M3(); 226 case S_Float8E4M3FN: 227 return Float8E4M3FN(); 228 case S_Float8E4M3FNUZ: 229 return Float8E4M3FNUZ(); 230 case S_Float8E4M3B11FNUZ: 231 return Float8E4M3B11FNUZ(); 232 case S_Float8E3M4: 233 return Float8E3M4(); 234 case S_FloatTF32: 235 return FloatTF32(); 236 case S_Float8E8M0FNU: 237 return Float8E8M0FNU(); 238 case S_Float6E3M2FN: 239 return Float6E3M2FN(); 240 case S_Float6E2M3FN: 241 return Float6E2M3FN(); 242 case S_Float4E2M1FN: 243 return Float4E2M1FN(); 244 case S_x87DoubleExtended: 245 return x87DoubleExtended(); 246 } 247 llvm_unreachable("Unrecognised floating semantics"); 248 } 249 250 APFloatBase::Semantics 251 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 252 if (&Sem == &llvm::APFloat::IEEEhalf()) 253 return S_IEEEhalf; 254 else if (&Sem == &llvm::APFloat::BFloat()) 255 return S_BFloat; 256 else if (&Sem == &llvm::APFloat::IEEEsingle()) 257 return S_IEEEsingle; 258 else if (&Sem == &llvm::APFloat::IEEEdouble()) 259 return S_IEEEdouble; 260 else if (&Sem == &llvm::APFloat::IEEEquad()) 261 return S_IEEEquad; 262 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 263 return S_PPCDoubleDouble; 264 else if (&Sem == &llvm::APFloat::Float8E5M2()) 265 return S_Float8E5M2; 266 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 267 return S_Float8E5M2FNUZ; 268 else if (&Sem == &llvm::APFloat::Float8E4M3()) 269 return S_Float8E4M3; 270 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 271 return S_Float8E4M3FN; 272 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 273 return S_Float8E4M3FNUZ; 274 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 275 return S_Float8E4M3B11FNUZ; 276 else if (&Sem == &llvm::APFloat::Float8E3M4()) 277 return S_Float8E3M4; 278 else if (&Sem == &llvm::APFloat::FloatTF32()) 279 return S_FloatTF32; 280 else if (&Sem == &llvm::APFloat::Float8E8M0FNU()) 281 return S_Float8E8M0FNU; 282 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 283 return S_Float6E3M2FN; 284 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 285 return S_Float6E2M3FN; 286 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 287 return S_Float4E2M1FN; 288 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 289 return S_x87DoubleExtended; 290 else 291 llvm_unreachable("Unknown floating semantics"); 292 } 293 294 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 295 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 296 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 297 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 298 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 299 const fltSemantics &APFloatBase::PPCDoubleDouble() { 300 return semPPCDoubleDouble; 301 } 302 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 303 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 304 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 305 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 306 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 307 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 308 return semFloat8E4M3B11FNUZ; 309 } 310 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } 311 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 312 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; } 313 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 314 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 315 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 316 const fltSemantics &APFloatBase::x87DoubleExtended() { 317 return semX87DoubleExtended; 318 } 319 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 320 321 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 322 constexpr RoundingMode APFloatBase::rmTowardPositive; 323 constexpr RoundingMode APFloatBase::rmTowardNegative; 324 constexpr RoundingMode APFloatBase::rmTowardZero; 325 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 326 327 /* A tight upper bound on number of parts required to hold the value 328 pow(5, power) is 329 330 power * 815 / (351 * integerPartWidth) + 1 331 332 However, whilst the result may require only this many parts, 333 because we are multiplying two values to get it, the 334 multiplication may require an extra part with the excess part 335 being zero (consider the trivial case of 1 * 1, tcFullMultiply 336 requires two parts to hold the single-part result). So we add an 337 extra one to guarantee enough space whilst multiplying. */ 338 const unsigned int maxExponent = 16383; 339 const unsigned int maxPrecision = 113; 340 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 341 const unsigned int maxPowerOfFiveParts = 342 2 + 343 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 344 345 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 346 return semantics.precision; 347 } 348 APFloatBase::ExponentType 349 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 350 return semantics.maxExponent; 351 } 352 APFloatBase::ExponentType 353 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 354 return semantics.minExponent; 355 } 356 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 357 return semantics.sizeInBits; 358 } 359 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 360 bool isSigned) { 361 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 362 // at least one more bit than the MaxExponent to hold the max FP value. 363 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 364 // Extra sign bit needed. 365 if (isSigned) 366 ++MinBitWidth; 367 return MinBitWidth; 368 } 369 370 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 371 const fltSemantics &Dst) { 372 // Exponent range must be larger. 373 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 374 return false; 375 376 // If the mantissa is long enough, the result value could still be denormal 377 // with a larger exponent range. 378 // 379 // FIXME: This condition is probably not accurate but also shouldn't be a 380 // practical concern with existing types. 381 return Dst.precision >= Src.precision; 382 } 383 384 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 385 return Sem.sizeInBits; 386 } 387 388 static constexpr APFloatBase::ExponentType 389 exponentZero(const fltSemantics &semantics) { 390 return semantics.minExponent - 1; 391 } 392 393 static constexpr APFloatBase::ExponentType 394 exponentInf(const fltSemantics &semantics) { 395 return semantics.maxExponent + 1; 396 } 397 398 static constexpr APFloatBase::ExponentType 399 exponentNaN(const fltSemantics &semantics) { 400 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 401 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 402 return exponentZero(semantics); 403 return semantics.maxExponent; 404 } 405 return semantics.maxExponent + 1; 406 } 407 408 /* A bunch of private, handy routines. */ 409 410 static inline Error createError(const Twine &Err) { 411 return make_error<StringError>(Err, inconvertibleErrorCode()); 412 } 413 414 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 415 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) / 416 APFloatBase::integerPartWidth); 417 } 418 419 /* Returns 0U-9U. Return values >= 10U are not digits. */ 420 static inline unsigned int 421 decDigitValue(unsigned int c) 422 { 423 return c - '0'; 424 } 425 426 /* Return the value of a decimal exponent of the form 427 [+-]ddddddd. 428 429 If the exponent overflows, returns a large exponent with the 430 appropriate sign. */ 431 static Expected<int> readExponent(StringRef::iterator begin, 432 StringRef::iterator end) { 433 bool isNegative; 434 unsigned int absExponent; 435 const unsigned int overlargeExponent = 24000; /* FIXME. */ 436 StringRef::iterator p = begin; 437 438 // Treat no exponent as 0 to match binutils 439 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 440 return 0; 441 } 442 443 isNegative = (*p == '-'); 444 if (*p == '-' || *p == '+') { 445 p++; 446 if (p == end) 447 return createError("Exponent has no digits"); 448 } 449 450 absExponent = decDigitValue(*p++); 451 if (absExponent >= 10U) 452 return createError("Invalid character in exponent"); 453 454 for (; p != end; ++p) { 455 unsigned int value; 456 457 value = decDigitValue(*p); 458 if (value >= 10U) 459 return createError("Invalid character in exponent"); 460 461 absExponent = absExponent * 10U + value; 462 if (absExponent >= overlargeExponent) { 463 absExponent = overlargeExponent; 464 break; 465 } 466 } 467 468 if (isNegative) 469 return -(int) absExponent; 470 else 471 return (int) absExponent; 472 } 473 474 /* This is ugly and needs cleaning up, but I don't immediately see 475 how whilst remaining safe. */ 476 static Expected<int> totalExponent(StringRef::iterator p, 477 StringRef::iterator end, 478 int exponentAdjustment) { 479 int unsignedExponent; 480 bool negative, overflow; 481 int exponent = 0; 482 483 if (p == end) 484 return createError("Exponent has no digits"); 485 486 negative = *p == '-'; 487 if (*p == '-' || *p == '+') { 488 p++; 489 if (p == end) 490 return createError("Exponent has no digits"); 491 } 492 493 unsignedExponent = 0; 494 overflow = false; 495 for (; p != end; ++p) { 496 unsigned int value; 497 498 value = decDigitValue(*p); 499 if (value >= 10U) 500 return createError("Invalid character in exponent"); 501 502 unsignedExponent = unsignedExponent * 10 + value; 503 if (unsignedExponent > 32767) { 504 overflow = true; 505 break; 506 } 507 } 508 509 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 510 overflow = true; 511 512 if (!overflow) { 513 exponent = unsignedExponent; 514 if (negative) 515 exponent = -exponent; 516 exponent += exponentAdjustment; 517 if (exponent > 32767 || exponent < -32768) 518 overflow = true; 519 } 520 521 if (overflow) 522 exponent = negative ? -32768: 32767; 523 524 return exponent; 525 } 526 527 static Expected<StringRef::iterator> 528 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 529 StringRef::iterator *dot) { 530 StringRef::iterator p = begin; 531 *dot = end; 532 while (p != end && *p == '0') 533 p++; 534 535 if (p != end && *p == '.') { 536 *dot = p++; 537 538 if (end - begin == 1) 539 return createError("Significand has no digits"); 540 541 while (p != end && *p == '0') 542 p++; 543 } 544 545 return p; 546 } 547 548 /* Given a normal decimal floating point number of the form 549 550 dddd.dddd[eE][+-]ddd 551 552 where the decimal point and exponent are optional, fill out the 553 structure D. Exponent is appropriate if the significand is 554 treated as an integer, and normalizedExponent if the significand 555 is taken to have the decimal point after a single leading 556 non-zero digit. 557 558 If the value is zero, V->firstSigDigit points to a non-digit, and 559 the return exponent is zero. 560 */ 561 struct decimalInfo { 562 const char *firstSigDigit; 563 const char *lastSigDigit; 564 int exponent; 565 int normalizedExponent; 566 }; 567 568 static Error interpretDecimal(StringRef::iterator begin, 569 StringRef::iterator end, decimalInfo *D) { 570 StringRef::iterator dot = end; 571 572 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 573 if (!PtrOrErr) 574 return PtrOrErr.takeError(); 575 StringRef::iterator p = *PtrOrErr; 576 577 D->firstSigDigit = p; 578 D->exponent = 0; 579 D->normalizedExponent = 0; 580 581 for (; p != end; ++p) { 582 if (*p == '.') { 583 if (dot != end) 584 return createError("String contains multiple dots"); 585 dot = p++; 586 if (p == end) 587 break; 588 } 589 if (decDigitValue(*p) >= 10U) 590 break; 591 } 592 593 if (p != end) { 594 if (*p != 'e' && *p != 'E') 595 return createError("Invalid character in significand"); 596 if (p == begin) 597 return createError("Significand has no digits"); 598 if (dot != end && p - begin == 1) 599 return createError("Significand has no digits"); 600 601 /* p points to the first non-digit in the string */ 602 auto ExpOrErr = readExponent(p + 1, end); 603 if (!ExpOrErr) 604 return ExpOrErr.takeError(); 605 D->exponent = *ExpOrErr; 606 607 /* Implied decimal point? */ 608 if (dot == end) 609 dot = p; 610 } 611 612 /* If number is all zeroes accept any exponent. */ 613 if (p != D->firstSigDigit) { 614 /* Drop insignificant trailing zeroes. */ 615 if (p != begin) { 616 do 617 do 618 p--; 619 while (p != begin && *p == '0'); 620 while (p != begin && *p == '.'); 621 } 622 623 /* Adjust the exponents for any decimal point. */ 624 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 625 D->normalizedExponent = (D->exponent + 626 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 627 - (dot > D->firstSigDigit && dot < p))); 628 } 629 630 D->lastSigDigit = p; 631 return Error::success(); 632 } 633 634 /* Return the trailing fraction of a hexadecimal number. 635 DIGITVALUE is the first hex digit of the fraction, P points to 636 the next digit. */ 637 static Expected<lostFraction> 638 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 639 unsigned int digitValue) { 640 unsigned int hexDigit; 641 642 /* If the first trailing digit isn't 0 or 8 we can work out the 643 fraction immediately. */ 644 if (digitValue > 8) 645 return lfMoreThanHalf; 646 else if (digitValue < 8 && digitValue > 0) 647 return lfLessThanHalf; 648 649 // Otherwise we need to find the first non-zero digit. 650 while (p != end && (*p == '0' || *p == '.')) 651 p++; 652 653 if (p == end) 654 return createError("Invalid trailing hexadecimal fraction!"); 655 656 hexDigit = hexDigitValue(*p); 657 658 /* If we ran off the end it is exactly zero or one-half, otherwise 659 a little more. */ 660 if (hexDigit == UINT_MAX) 661 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 662 else 663 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 664 } 665 666 /* Return the fraction lost were a bignum truncated losing the least 667 significant BITS bits. */ 668 static lostFraction 669 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 670 unsigned int partCount, 671 unsigned int bits) 672 { 673 unsigned int lsb; 674 675 lsb = APInt::tcLSB(parts, partCount); 676 677 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 678 if (bits <= lsb) 679 return lfExactlyZero; 680 if (bits == lsb + 1) 681 return lfExactlyHalf; 682 if (bits <= partCount * APFloatBase::integerPartWidth && 683 APInt::tcExtractBit(parts, bits - 1)) 684 return lfMoreThanHalf; 685 686 return lfLessThanHalf; 687 } 688 689 /* Shift DST right BITS bits noting lost fraction. */ 690 static lostFraction 691 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 692 { 693 lostFraction lost_fraction; 694 695 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 696 697 APInt::tcShiftRight(dst, parts, bits); 698 699 return lost_fraction; 700 } 701 702 /* Combine the effect of two lost fractions. */ 703 static lostFraction 704 combineLostFractions(lostFraction moreSignificant, 705 lostFraction lessSignificant) 706 { 707 if (lessSignificant != lfExactlyZero) { 708 if (moreSignificant == lfExactlyZero) 709 moreSignificant = lfLessThanHalf; 710 else if (moreSignificant == lfExactlyHalf) 711 moreSignificant = lfMoreThanHalf; 712 } 713 714 return moreSignificant; 715 } 716 717 /* The error from the true value, in half-ulps, on multiplying two 718 floating point numbers, which differ from the value they 719 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 720 than the returned value. 721 722 See "How to Read Floating Point Numbers Accurately" by William D 723 Clinger. */ 724 static unsigned int 725 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 726 { 727 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 728 729 if (HUerr1 + HUerr2 == 0) 730 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 731 else 732 return inexactMultiply + 2 * (HUerr1 + HUerr2); 733 } 734 735 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 736 when the least significant BITS are truncated. BITS cannot be 737 zero. */ 738 static APFloatBase::integerPart 739 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 740 bool isNearest) { 741 unsigned int count, partBits; 742 APFloatBase::integerPart part, boundary; 743 744 assert(bits != 0); 745 746 bits--; 747 count = bits / APFloatBase::integerPartWidth; 748 partBits = bits % APFloatBase::integerPartWidth + 1; 749 750 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 751 752 if (isNearest) 753 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 754 else 755 boundary = 0; 756 757 if (count == 0) { 758 if (part - boundary <= boundary - part) 759 return part - boundary; 760 else 761 return boundary - part; 762 } 763 764 if (part == boundary) { 765 while (--count) 766 if (parts[count]) 767 return ~(APFloatBase::integerPart) 0; /* A lot. */ 768 769 return parts[0]; 770 } else if (part == boundary - 1) { 771 while (--count) 772 if (~parts[count]) 773 return ~(APFloatBase::integerPart) 0; /* A lot. */ 774 775 return -parts[0]; 776 } 777 778 return ~(APFloatBase::integerPart) 0; /* A lot. */ 779 } 780 781 /* Place pow(5, power) in DST, and return the number of parts used. 782 DST must be at least one part larger than size of the answer. */ 783 static unsigned int 784 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 785 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 786 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 787 pow5s[0] = 78125 * 5; 788 789 unsigned int partsCount = 1; 790 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 791 unsigned int result; 792 assert(power <= maxExponent); 793 794 p1 = dst; 795 p2 = scratch; 796 797 *p1 = firstEightPowers[power & 7]; 798 power >>= 3; 799 800 result = 1; 801 pow5 = pow5s; 802 803 for (unsigned int n = 0; power; power >>= 1, n++) { 804 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 805 if (n != 0) { 806 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 807 partsCount, partsCount); 808 partsCount *= 2; 809 if (pow5[partsCount - 1] == 0) 810 partsCount--; 811 } 812 813 if (power & 1) { 814 APFloatBase::integerPart *tmp; 815 816 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 817 result += partsCount; 818 if (p2[result - 1] == 0) 819 result--; 820 821 /* Now result is in p1 with partsCount parts and p2 is scratch 822 space. */ 823 tmp = p1; 824 p1 = p2; 825 p2 = tmp; 826 } 827 828 pow5 += partsCount; 829 } 830 831 if (p1 != dst) 832 APInt::tcAssign(dst, p1, result); 833 834 return result; 835 } 836 837 /* Zero at the end to avoid modular arithmetic when adding one; used 838 when rounding up during hexadecimal output. */ 839 static const char hexDigitsLower[] = "0123456789abcdef0"; 840 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 841 static const char infinityL[] = "infinity"; 842 static const char infinityU[] = "INFINITY"; 843 static const char NaNL[] = "nan"; 844 static const char NaNU[] = "NAN"; 845 846 /* Write out an integerPart in hexadecimal, starting with the most 847 significant nibble. Write out exactly COUNT hexdigits, return 848 COUNT. */ 849 static unsigned int 850 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 851 const char *hexDigitChars) 852 { 853 unsigned int result = count; 854 855 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 856 857 part >>= (APFloatBase::integerPartWidth - 4 * count); 858 while (count--) { 859 dst[count] = hexDigitChars[part & 0xf]; 860 part >>= 4; 861 } 862 863 return result; 864 } 865 866 /* Write out an unsigned decimal integer. */ 867 static char * 868 writeUnsignedDecimal (char *dst, unsigned int n) 869 { 870 char buff[40], *p; 871 872 p = buff; 873 do 874 *p++ = '0' + n % 10; 875 while (n /= 10); 876 877 do 878 *dst++ = *--p; 879 while (p != buff); 880 881 return dst; 882 } 883 884 /* Write out a signed decimal integer. */ 885 static char * 886 writeSignedDecimal (char *dst, int value) 887 { 888 if (value < 0) { 889 *dst++ = '-'; 890 dst = writeUnsignedDecimal(dst, -(unsigned) value); 891 } else 892 dst = writeUnsignedDecimal(dst, value); 893 894 return dst; 895 } 896 897 namespace detail { 898 /* Constructors. */ 899 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 900 unsigned int count; 901 902 semantics = ourSemantics; 903 count = partCount(); 904 if (count > 1) 905 significand.parts = new integerPart[count]; 906 } 907 908 void IEEEFloat::freeSignificand() { 909 if (needsCleanup()) 910 delete [] significand.parts; 911 } 912 913 void IEEEFloat::assign(const IEEEFloat &rhs) { 914 assert(semantics == rhs.semantics); 915 916 sign = rhs.sign; 917 category = rhs.category; 918 exponent = rhs.exponent; 919 if (isFiniteNonZero() || category == fcNaN) 920 copySignificand(rhs); 921 } 922 923 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 924 assert(isFiniteNonZero() || category == fcNaN); 925 assert(rhs.partCount() >= partCount()); 926 927 APInt::tcAssign(significandParts(), rhs.significandParts(), 928 partCount()); 929 } 930 931 /* Make this number a NaN, with an arbitrary but deterministic value 932 for the significand. If double or longer, this is a signalling NaN, 933 which may not be ideal. If float, this is QNaN(0). */ 934 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 935 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 936 llvm_unreachable("This floating point format does not support NaN"); 937 938 if (Negative && !semantics->hasSignedRepr) 939 llvm_unreachable( 940 "This floating point format does not support signed values"); 941 942 category = fcNaN; 943 sign = Negative; 944 exponent = exponentNaN(); 945 946 integerPart *significand = significandParts(); 947 unsigned numParts = partCount(); 948 949 APInt fill_storage; 950 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 951 // Finite-only types do not distinguish signalling and quiet NaN, so 952 // make them all signalling. 953 SNaN = false; 954 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 955 sign = true; 956 fill_storage = APInt::getZero(semantics->precision - 1); 957 } else { 958 fill_storage = APInt::getAllOnes(semantics->precision - 1); 959 } 960 fill = &fill_storage; 961 } 962 963 // Set the significand bits to the fill. 964 if (!fill || fill->getNumWords() < numParts) 965 APInt::tcSet(significand, 0, numParts); 966 if (fill) { 967 APInt::tcAssign(significand, fill->getRawData(), 968 std::min(fill->getNumWords(), numParts)); 969 970 // Zero out the excess bits of the significand. 971 unsigned bitsToPreserve = semantics->precision - 1; 972 unsigned part = bitsToPreserve / 64; 973 bitsToPreserve %= 64; 974 significand[part] &= ((1ULL << bitsToPreserve) - 1); 975 for (part++; part != numParts; ++part) 976 significand[part] = 0; 977 } 978 979 unsigned QNaNBit = 980 (semantics->precision >= 2) ? (semantics->precision - 2) : 0; 981 982 if (SNaN) { 983 // We always have to clear the QNaN bit to make it an SNaN. 984 APInt::tcClearBit(significand, QNaNBit); 985 986 // If there are no bits set in the payload, we have to set 987 // *something* to make it a NaN instead of an infinity; 988 // conventionally, this is the next bit down from the QNaN bit. 989 if (APInt::tcIsZero(significand, numParts)) 990 APInt::tcSetBit(significand, QNaNBit - 1); 991 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 992 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 993 // Do nothing. 994 } else { 995 // We always have to set the QNaN bit to make it a QNaN. 996 APInt::tcSetBit(significand, QNaNBit); 997 } 998 999 // For x87 extended precision, we want to make a NaN, not a 1000 // pseudo-NaN. Maybe we should expose the ability to make 1001 // pseudo-NaNs? 1002 if (semantics == &semX87DoubleExtended) 1003 APInt::tcSetBit(significand, QNaNBit + 1); 1004 } 1005 1006 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 1007 if (this != &rhs) { 1008 if (semantics != rhs.semantics) { 1009 freeSignificand(); 1010 initialize(rhs.semantics); 1011 } 1012 assign(rhs); 1013 } 1014 1015 return *this; 1016 } 1017 1018 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 1019 freeSignificand(); 1020 1021 semantics = rhs.semantics; 1022 significand = rhs.significand; 1023 exponent = rhs.exponent; 1024 category = rhs.category; 1025 sign = rhs.sign; 1026 1027 rhs.semantics = &semBogus; 1028 return *this; 1029 } 1030 1031 bool IEEEFloat::isDenormal() const { 1032 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1033 (APInt::tcExtractBit(significandParts(), 1034 semantics->precision - 1) == 0); 1035 } 1036 1037 bool IEEEFloat::isSmallest() const { 1038 // The smallest number by magnitude in our format will be the smallest 1039 // denormal, i.e. the floating point number with exponent being minimum 1040 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1041 return isFiniteNonZero() && exponent == semantics->minExponent && 1042 significandMSB() == 0; 1043 } 1044 1045 bool IEEEFloat::isSmallestNormalized() const { 1046 return getCategory() == fcNormal && exponent == semantics->minExponent && 1047 isSignificandAllZerosExceptMSB(); 1048 } 1049 1050 unsigned int IEEEFloat::getNumHighBits() const { 1051 const unsigned int PartCount = partCountForBits(semantics->precision); 1052 const unsigned int Bits = PartCount * integerPartWidth; 1053 1054 // Compute how many bits are used in the final word. 1055 // When precision is just 1, it represents the 'Pth' 1056 // Precision bit and not the actual significand bit. 1057 const unsigned int NumHighBits = (semantics->precision > 1) 1058 ? (Bits - semantics->precision + 1) 1059 : (Bits - semantics->precision); 1060 return NumHighBits; 1061 } 1062 1063 bool IEEEFloat::isSignificandAllOnes() const { 1064 // Test if the significand excluding the integral bit is all ones. This allows 1065 // us to test for binade boundaries. 1066 const integerPart *Parts = significandParts(); 1067 const unsigned PartCount = partCountForBits(semantics->precision); 1068 for (unsigned i = 0; i < PartCount - 1; i++) 1069 if (~Parts[i]) 1070 return false; 1071 1072 // Set the unused high bits to all ones when we compare. 1073 const unsigned NumHighBits = getNumHighBits(); 1074 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1075 "Can not have more high bits to fill than integerPartWidth"); 1076 const integerPart HighBitFill = 1077 ~integerPart(0) << (integerPartWidth - NumHighBits); 1078 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill))) 1079 return false; 1080 1081 return true; 1082 } 1083 1084 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1085 // Test if the significand excluding the integral bit is all ones except for 1086 // the least significant bit. 1087 const integerPart *Parts = significandParts(); 1088 1089 if (Parts[0] & 1) 1090 return false; 1091 1092 const unsigned PartCount = partCountForBits(semantics->precision); 1093 for (unsigned i = 0; i < PartCount - 1; i++) { 1094 if (~Parts[i] & ~unsigned{!i}) 1095 return false; 1096 } 1097 1098 // Set the unused high bits to all ones when we compare. 1099 const unsigned NumHighBits = getNumHighBits(); 1100 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1101 "Can not have more high bits to fill than integerPartWidth"); 1102 const integerPart HighBitFill = ~integerPart(0) 1103 << (integerPartWidth - NumHighBits); 1104 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1105 return false; 1106 1107 return true; 1108 } 1109 1110 bool IEEEFloat::isSignificandAllZeros() const { 1111 // Test if the significand excluding the integral bit is all zeros. This 1112 // allows us to test for binade boundaries. 1113 const integerPart *Parts = significandParts(); 1114 const unsigned PartCount = partCountForBits(semantics->precision); 1115 1116 for (unsigned i = 0; i < PartCount - 1; i++) 1117 if (Parts[i]) 1118 return false; 1119 1120 // Compute how many bits are used in the final word. 1121 const unsigned NumHighBits = getNumHighBits(); 1122 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1123 "clear than integerPartWidth"); 1124 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1125 1126 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask)) 1127 return false; 1128 1129 return true; 1130 } 1131 1132 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1133 const integerPart *Parts = significandParts(); 1134 const unsigned PartCount = partCountForBits(semantics->precision); 1135 1136 for (unsigned i = 0; i < PartCount - 1; i++) { 1137 if (Parts[i]) 1138 return false; 1139 } 1140 1141 const unsigned NumHighBits = getNumHighBits(); 1142 const integerPart MSBMask = integerPart(1) 1143 << (integerPartWidth - NumHighBits); 1144 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask)); 1145 } 1146 1147 bool IEEEFloat::isLargest() const { 1148 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent; 1149 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1150 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1151 // The largest number by magnitude in our format will be the floating point 1152 // number with maximum exponent and with significand that is all ones except 1153 // the LSB. 1154 return (IsMaxExp && APFloat::hasSignificand(*semantics)) 1155 ? isSignificandAllOnesExceptLSB() 1156 : IsMaxExp; 1157 } else { 1158 // The largest number by magnitude in our format will be the floating point 1159 // number with maximum exponent and with significand that is all ones. 1160 return IsMaxExp && isSignificandAllOnes(); 1161 } 1162 } 1163 1164 bool IEEEFloat::isInteger() const { 1165 // This could be made more efficient; I'm going for obviously correct. 1166 if (!isFinite()) return false; 1167 IEEEFloat truncated = *this; 1168 truncated.roundToIntegral(rmTowardZero); 1169 return compare(truncated) == cmpEqual; 1170 } 1171 1172 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1173 if (this == &rhs) 1174 return true; 1175 if (semantics != rhs.semantics || 1176 category != rhs.category || 1177 sign != rhs.sign) 1178 return false; 1179 if (category==fcZero || category==fcInfinity) 1180 return true; 1181 1182 if (isFiniteNonZero() && exponent != rhs.exponent) 1183 return false; 1184 1185 return std::equal(significandParts(), significandParts() + partCount(), 1186 rhs.significandParts()); 1187 } 1188 1189 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1190 initialize(&ourSemantics); 1191 sign = 0; 1192 category = fcNormal; 1193 zeroSignificand(); 1194 exponent = ourSemantics.precision - 1; 1195 significandParts()[0] = value; 1196 normalize(rmNearestTiesToEven, lfExactlyZero); 1197 } 1198 1199 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1200 initialize(&ourSemantics); 1201 // The Float8E8MOFNU format does not have a representation 1202 // for zero. So, use the closest representation instead. 1203 // Moreover, the all-zero encoding represents a valid 1204 // normal value (which is the smallestNormalized here). 1205 // Hence, we call makeSmallestNormalized (where category is 1206 // 'fcNormal') instead of makeZero (where category is 'fcZero'). 1207 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false); 1208 } 1209 1210 // Delegate to the previous constructor, because later copy constructor may 1211 // actually inspects category, which can't be garbage. 1212 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1213 : IEEEFloat(ourSemantics) {} 1214 1215 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1216 initialize(rhs.semantics); 1217 assign(rhs); 1218 } 1219 1220 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1221 *this = std::move(rhs); 1222 } 1223 1224 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1225 1226 unsigned int IEEEFloat::partCount() const { 1227 return partCountForBits(semantics->precision + 1); 1228 } 1229 1230 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1231 return const_cast<IEEEFloat *>(this)->significandParts(); 1232 } 1233 1234 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1235 if (partCount() > 1) 1236 return significand.parts; 1237 else 1238 return &significand.part; 1239 } 1240 1241 void IEEEFloat::zeroSignificand() { 1242 APInt::tcSet(significandParts(), 0, partCount()); 1243 } 1244 1245 /* Increment an fcNormal floating point number's significand. */ 1246 void IEEEFloat::incrementSignificand() { 1247 integerPart carry; 1248 1249 carry = APInt::tcIncrement(significandParts(), partCount()); 1250 1251 /* Our callers should never cause us to overflow. */ 1252 assert(carry == 0); 1253 (void)carry; 1254 } 1255 1256 /* Add the significand of the RHS. Returns the carry flag. */ 1257 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1258 integerPart *parts; 1259 1260 parts = significandParts(); 1261 1262 assert(semantics == rhs.semantics); 1263 assert(exponent == rhs.exponent); 1264 1265 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1266 } 1267 1268 /* Subtract the significand of the RHS with a borrow flag. Returns 1269 the borrow flag. */ 1270 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1271 integerPart borrow) { 1272 integerPart *parts; 1273 1274 parts = significandParts(); 1275 1276 assert(semantics == rhs.semantics); 1277 assert(exponent == rhs.exponent); 1278 1279 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1280 partCount()); 1281 } 1282 1283 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1284 on to the full-precision result of the multiplication. Returns the 1285 lost fraction. */ 1286 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1287 IEEEFloat addend, 1288 bool ignoreAddend) { 1289 unsigned int omsb; // One, not zero, based MSB. 1290 unsigned int partsCount, newPartsCount, precision; 1291 integerPart *lhsSignificand; 1292 integerPart scratch[4]; 1293 integerPart *fullSignificand; 1294 lostFraction lost_fraction; 1295 bool ignored; 1296 1297 assert(semantics == rhs.semantics); 1298 1299 precision = semantics->precision; 1300 1301 // Allocate space for twice as many bits as the original significand, plus one 1302 // extra bit for the addition to overflow into. 1303 newPartsCount = partCountForBits(precision * 2 + 1); 1304 1305 if (newPartsCount > 4) 1306 fullSignificand = new integerPart[newPartsCount]; 1307 else 1308 fullSignificand = scratch; 1309 1310 lhsSignificand = significandParts(); 1311 partsCount = partCount(); 1312 1313 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1314 rhs.significandParts(), partsCount, partsCount); 1315 1316 lost_fraction = lfExactlyZero; 1317 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1318 exponent += rhs.exponent; 1319 1320 // Assume the operands involved in the multiplication are single-precision 1321 // FP, and the two multiplicants are: 1322 // *this = a23 . a22 ... a0 * 2^e1 1323 // rhs = b23 . b22 ... b0 * 2^e2 1324 // the result of multiplication is: 1325 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1326 // Note that there are three significant bits at the left-hand side of the 1327 // radix point: two for the multiplication, and an overflow bit for the 1328 // addition (that will always be zero at this point). Move the radix point 1329 // toward left by two bits, and adjust exponent accordingly. 1330 exponent += 2; 1331 1332 if (!ignoreAddend && addend.isNonZero()) { 1333 // The intermediate result of the multiplication has "2 * precision" 1334 // signicant bit; adjust the addend to be consistent with mul result. 1335 // 1336 Significand savedSignificand = significand; 1337 const fltSemantics *savedSemantics = semantics; 1338 fltSemantics extendedSemantics; 1339 opStatus status; 1340 unsigned int extendedPrecision; 1341 1342 // Normalize our MSB to one below the top bit to allow for overflow. 1343 extendedPrecision = 2 * precision + 1; 1344 if (omsb != extendedPrecision - 1) { 1345 assert(extendedPrecision > omsb); 1346 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1347 (extendedPrecision - 1) - omsb); 1348 exponent -= (extendedPrecision - 1) - omsb; 1349 } 1350 1351 /* Create new semantics. */ 1352 extendedSemantics = *semantics; 1353 extendedSemantics.precision = extendedPrecision; 1354 1355 if (newPartsCount == 1) 1356 significand.part = fullSignificand[0]; 1357 else 1358 significand.parts = fullSignificand; 1359 semantics = &extendedSemantics; 1360 1361 // Make a copy so we can convert it to the extended semantics. 1362 // Note that we cannot convert the addend directly, as the extendedSemantics 1363 // is a local variable (which we take a reference to). 1364 IEEEFloat extendedAddend(addend); 1365 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1366 assert(status == opOK); 1367 (void)status; 1368 1369 // Shift the significand of the addend right by one bit. This guarantees 1370 // that the high bit of the significand is zero (same as fullSignificand), 1371 // so the addition will overflow (if it does overflow at all) into the top bit. 1372 lost_fraction = extendedAddend.shiftSignificandRight(1); 1373 assert(lost_fraction == lfExactlyZero && 1374 "Lost precision while shifting addend for fused-multiply-add."); 1375 1376 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1377 1378 /* Restore our state. */ 1379 if (newPartsCount == 1) 1380 fullSignificand[0] = significand.part; 1381 significand = savedSignificand; 1382 semantics = savedSemantics; 1383 1384 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1385 } 1386 1387 // Convert the result having "2 * precision" significant-bits back to the one 1388 // having "precision" significant-bits. First, move the radix point from 1389 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1390 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1391 exponent -= precision + 1; 1392 1393 // In case MSB resides at the left-hand side of radix point, shift the 1394 // mantissa right by some amount to make sure the MSB reside right before 1395 // the radix point (i.e. "MSB . rest-significant-bits"). 1396 // 1397 // Note that the result is not normalized when "omsb < precision". So, the 1398 // caller needs to call IEEEFloat::normalize() if normalized value is 1399 // expected. 1400 if (omsb > precision) { 1401 unsigned int bits, significantParts; 1402 lostFraction lf; 1403 1404 bits = omsb - precision; 1405 significantParts = partCountForBits(omsb); 1406 lf = shiftRight(fullSignificand, significantParts, bits); 1407 lost_fraction = combineLostFractions(lf, lost_fraction); 1408 exponent += bits; 1409 } 1410 1411 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1412 1413 if (newPartsCount > 4) 1414 delete [] fullSignificand; 1415 1416 return lost_fraction; 1417 } 1418 1419 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1420 // When the given semantics has zero, the addend here is a zero. 1421 // i.e . it belongs to the 'fcZero' category. 1422 // But when the semantics does not support zero, we need to 1423 // explicitly convey that this addend should be ignored 1424 // for multiplication. 1425 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero); 1426 } 1427 1428 /* Multiply the significands of LHS and RHS to DST. */ 1429 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1430 unsigned int bit, i, partsCount; 1431 const integerPart *rhsSignificand; 1432 integerPart *lhsSignificand, *dividend, *divisor; 1433 integerPart scratch[4]; 1434 lostFraction lost_fraction; 1435 1436 assert(semantics == rhs.semantics); 1437 1438 lhsSignificand = significandParts(); 1439 rhsSignificand = rhs.significandParts(); 1440 partsCount = partCount(); 1441 1442 if (partsCount > 2) 1443 dividend = new integerPart[partsCount * 2]; 1444 else 1445 dividend = scratch; 1446 1447 divisor = dividend + partsCount; 1448 1449 /* Copy the dividend and divisor as they will be modified in-place. */ 1450 for (i = 0; i < partsCount; i++) { 1451 dividend[i] = lhsSignificand[i]; 1452 divisor[i] = rhsSignificand[i]; 1453 lhsSignificand[i] = 0; 1454 } 1455 1456 exponent -= rhs.exponent; 1457 1458 unsigned int precision = semantics->precision; 1459 1460 /* Normalize the divisor. */ 1461 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1462 if (bit) { 1463 exponent += bit; 1464 APInt::tcShiftLeft(divisor, partsCount, bit); 1465 } 1466 1467 /* Normalize the dividend. */ 1468 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1469 if (bit) { 1470 exponent -= bit; 1471 APInt::tcShiftLeft(dividend, partsCount, bit); 1472 } 1473 1474 /* Ensure the dividend >= divisor initially for the loop below. 1475 Incidentally, this means that the division loop below is 1476 guaranteed to set the integer bit to one. */ 1477 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1478 exponent--; 1479 APInt::tcShiftLeft(dividend, partsCount, 1); 1480 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1481 } 1482 1483 /* Long division. */ 1484 for (bit = precision; bit; bit -= 1) { 1485 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1486 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1487 APInt::tcSetBit(lhsSignificand, bit - 1); 1488 } 1489 1490 APInt::tcShiftLeft(dividend, partsCount, 1); 1491 } 1492 1493 /* Figure out the lost fraction. */ 1494 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1495 1496 if (cmp > 0) 1497 lost_fraction = lfMoreThanHalf; 1498 else if (cmp == 0) 1499 lost_fraction = lfExactlyHalf; 1500 else if (APInt::tcIsZero(dividend, partsCount)) 1501 lost_fraction = lfExactlyZero; 1502 else 1503 lost_fraction = lfLessThanHalf; 1504 1505 if (partsCount > 2) 1506 delete [] dividend; 1507 1508 return lost_fraction; 1509 } 1510 1511 unsigned int IEEEFloat::significandMSB() const { 1512 return APInt::tcMSB(significandParts(), partCount()); 1513 } 1514 1515 unsigned int IEEEFloat::significandLSB() const { 1516 return APInt::tcLSB(significandParts(), partCount()); 1517 } 1518 1519 /* Note that a zero result is NOT normalized to fcZero. */ 1520 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1521 /* Our exponent should not overflow. */ 1522 assert((ExponentType) (exponent + bits) >= exponent); 1523 1524 exponent += bits; 1525 1526 return shiftRight(significandParts(), partCount(), bits); 1527 } 1528 1529 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1530 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1531 assert(bits < semantics->precision || 1532 (semantics->precision == 1 && bits <= 1)); 1533 1534 if (bits) { 1535 unsigned int partsCount = partCount(); 1536 1537 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1538 exponent -= bits; 1539 1540 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1541 } 1542 } 1543 1544 IEEEFloat::cmpResult 1545 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1546 int compare; 1547 1548 assert(semantics == rhs.semantics); 1549 assert(isFiniteNonZero()); 1550 assert(rhs.isFiniteNonZero()); 1551 1552 compare = exponent - rhs.exponent; 1553 1554 /* If exponents are equal, do an unsigned bignum comparison of the 1555 significands. */ 1556 if (compare == 0) 1557 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1558 partCount()); 1559 1560 if (compare > 0) 1561 return cmpGreaterThan; 1562 else if (compare < 0) 1563 return cmpLessThan; 1564 else 1565 return cmpEqual; 1566 } 1567 1568 /* Set the least significant BITS bits of a bignum, clear the 1569 rest. */ 1570 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1571 unsigned bits) { 1572 unsigned i = 0; 1573 while (bits > APInt::APINT_BITS_PER_WORD) { 1574 dst[i++] = ~(APInt::WordType)0; 1575 bits -= APInt::APINT_BITS_PER_WORD; 1576 } 1577 1578 if (bits) 1579 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1580 1581 while (i < parts) 1582 dst[i++] = 0; 1583 } 1584 1585 /* Handle overflow. Sign is preserved. We either become infinity or 1586 the largest finite number. */ 1587 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1588 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1589 /* Infinity? */ 1590 if (rounding_mode == rmNearestTiesToEven || 1591 rounding_mode == rmNearestTiesToAway || 1592 (rounding_mode == rmTowardPositive && !sign) || 1593 (rounding_mode == rmTowardNegative && sign)) { 1594 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1595 makeNaN(false, sign); 1596 else 1597 category = fcInfinity; 1598 return static_cast<opStatus>(opOverflow | opInexact); 1599 } 1600 } 1601 1602 /* Otherwise we become the largest finite number. */ 1603 category = fcNormal; 1604 exponent = semantics->maxExponent; 1605 tcSetLeastSignificantBits(significandParts(), partCount(), 1606 semantics->precision); 1607 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1608 semantics->nanEncoding == fltNanEncoding::AllOnes) 1609 APInt::tcClearBit(significandParts(), 0); 1610 1611 return opInexact; 1612 } 1613 1614 /* Returns TRUE if, when truncating the current number, with BIT the 1615 new LSB, with the given lost fraction and rounding mode, the result 1616 would need to be rounded away from zero (i.e., by increasing the 1617 signficand). This routine must work for fcZero of both signs, and 1618 fcNormal numbers. */ 1619 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1620 lostFraction lost_fraction, 1621 unsigned int bit) const { 1622 /* NaNs and infinities should not have lost fractions. */ 1623 assert(isFiniteNonZero() || category == fcZero); 1624 1625 /* Current callers never pass this so we don't handle it. */ 1626 assert(lost_fraction != lfExactlyZero); 1627 1628 switch (rounding_mode) { 1629 case rmNearestTiesToAway: 1630 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1631 1632 case rmNearestTiesToEven: 1633 if (lost_fraction == lfMoreThanHalf) 1634 return true; 1635 1636 /* Our zeroes don't have a significand to test. */ 1637 if (lost_fraction == lfExactlyHalf && category != fcZero) 1638 return APInt::tcExtractBit(significandParts(), bit); 1639 1640 return false; 1641 1642 case rmTowardZero: 1643 return false; 1644 1645 case rmTowardPositive: 1646 return !sign; 1647 1648 case rmTowardNegative: 1649 return sign; 1650 1651 default: 1652 break; 1653 } 1654 llvm_unreachable("Invalid rounding mode found"); 1655 } 1656 1657 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1658 lostFraction lost_fraction) { 1659 unsigned int omsb; /* One, not zero, based MSB. */ 1660 int exponentChange; 1661 1662 if (!isFiniteNonZero()) 1663 return opOK; 1664 1665 /* Before rounding normalize the exponent of fcNormal numbers. */ 1666 omsb = significandMSB() + 1; 1667 1668 if (omsb) { 1669 /* OMSB is numbered from 1. We want to place it in the integer 1670 bit numbered PRECISION if possible, with a compensating change in 1671 the exponent. */ 1672 exponentChange = omsb - semantics->precision; 1673 1674 /* If the resulting exponent is too high, overflow according to 1675 the rounding mode. */ 1676 if (exponent + exponentChange > semantics->maxExponent) 1677 return handleOverflow(rounding_mode); 1678 1679 /* Subnormal numbers have exponent minExponent, and their MSB 1680 is forced based on that. */ 1681 if (exponent + exponentChange < semantics->minExponent) 1682 exponentChange = semantics->minExponent - exponent; 1683 1684 /* Shifting left is easy as we don't lose precision. */ 1685 if (exponentChange < 0) { 1686 assert(lost_fraction == lfExactlyZero); 1687 1688 shiftSignificandLeft(-exponentChange); 1689 1690 return opOK; 1691 } 1692 1693 if (exponentChange > 0) { 1694 lostFraction lf; 1695 1696 /* Shift right and capture any new lost fraction. */ 1697 lf = shiftSignificandRight(exponentChange); 1698 1699 lost_fraction = combineLostFractions(lf, lost_fraction); 1700 1701 /* Keep OMSB up-to-date. */ 1702 if (omsb > (unsigned) exponentChange) 1703 omsb -= exponentChange; 1704 else 1705 omsb = 0; 1706 } 1707 } 1708 1709 // The all-ones values is an overflow if NaN is all ones. If NaN is 1710 // represented by negative zero, then it is a valid finite value. 1711 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1712 semantics->nanEncoding == fltNanEncoding::AllOnes && 1713 exponent == semantics->maxExponent && isSignificandAllOnes()) 1714 return handleOverflow(rounding_mode); 1715 1716 /* Now round the number according to rounding_mode given the lost 1717 fraction. */ 1718 1719 /* As specified in IEEE 754, since we do not trap we do not report 1720 underflow for exact results. */ 1721 if (lost_fraction == lfExactlyZero) { 1722 /* Canonicalize zeroes. */ 1723 if (omsb == 0) { 1724 category = fcZero; 1725 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1726 sign = false; 1727 if (!semantics->hasZero) 1728 makeSmallestNormalized(false); 1729 } 1730 1731 return opOK; 1732 } 1733 1734 /* Increment the significand if we're rounding away from zero. */ 1735 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1736 if (omsb == 0) 1737 exponent = semantics->minExponent; 1738 1739 incrementSignificand(); 1740 omsb = significandMSB() + 1; 1741 1742 /* Did the significand increment overflow? */ 1743 if (omsb == (unsigned) semantics->precision + 1) { 1744 /* Renormalize by incrementing the exponent and shifting our 1745 significand right one. However if we already have the 1746 maximum exponent we overflow to infinity. */ 1747 if (exponent == semantics->maxExponent) 1748 // Invoke overflow handling with a rounding mode that will guarantee 1749 // that the result gets turned into the correct infinity representation. 1750 // This is needed instead of just setting the category to infinity to 1751 // account for 8-bit floating point types that have no inf, only NaN. 1752 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1753 1754 shiftSignificandRight(1); 1755 1756 return opInexact; 1757 } 1758 1759 // The all-ones values is an overflow if NaN is all ones. If NaN is 1760 // represented by negative zero, then it is a valid finite value. 1761 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1762 semantics->nanEncoding == fltNanEncoding::AllOnes && 1763 exponent == semantics->maxExponent && isSignificandAllOnes()) 1764 return handleOverflow(rounding_mode); 1765 } 1766 1767 /* The normal case - we were and are not denormal, and any 1768 significand increment above didn't overflow. */ 1769 if (omsb == semantics->precision) 1770 return opInexact; 1771 1772 /* We have a non-zero denormal. */ 1773 assert(omsb < semantics->precision); 1774 1775 /* Canonicalize zeroes. */ 1776 if (omsb == 0) { 1777 category = fcZero; 1778 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1779 sign = false; 1780 // This condition handles the case where the semantics 1781 // does not have zero but uses the all-zero encoding 1782 // to represent the smallest normal value. 1783 if (!semantics->hasZero) 1784 makeSmallestNormalized(false); 1785 } 1786 1787 /* The fcZero case is a denormal that underflowed to zero. */ 1788 return (opStatus) (opUnderflow | opInexact); 1789 } 1790 1791 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1792 bool subtract) { 1793 switch (PackCategoriesIntoKey(category, rhs.category)) { 1794 default: 1795 llvm_unreachable(nullptr); 1796 1797 case PackCategoriesIntoKey(fcZero, fcNaN): 1798 case PackCategoriesIntoKey(fcNormal, fcNaN): 1799 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1800 assign(rhs); 1801 [[fallthrough]]; 1802 case PackCategoriesIntoKey(fcNaN, fcZero): 1803 case PackCategoriesIntoKey(fcNaN, fcNormal): 1804 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1805 case PackCategoriesIntoKey(fcNaN, fcNaN): 1806 if (isSignaling()) { 1807 makeQuiet(); 1808 return opInvalidOp; 1809 } 1810 return rhs.isSignaling() ? opInvalidOp : opOK; 1811 1812 case PackCategoriesIntoKey(fcNormal, fcZero): 1813 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1814 case PackCategoriesIntoKey(fcInfinity, fcZero): 1815 return opOK; 1816 1817 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1818 case PackCategoriesIntoKey(fcZero, fcInfinity): 1819 category = fcInfinity; 1820 sign = rhs.sign ^ subtract; 1821 return opOK; 1822 1823 case PackCategoriesIntoKey(fcZero, fcNormal): 1824 assign(rhs); 1825 sign = rhs.sign ^ subtract; 1826 return opOK; 1827 1828 case PackCategoriesIntoKey(fcZero, fcZero): 1829 /* Sign depends on rounding mode; handled by caller. */ 1830 return opOK; 1831 1832 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1833 /* Differently signed infinities can only be validly 1834 subtracted. */ 1835 if (((sign ^ rhs.sign)!=0) != subtract) { 1836 makeNaN(); 1837 return opInvalidOp; 1838 } 1839 1840 return opOK; 1841 1842 case PackCategoriesIntoKey(fcNormal, fcNormal): 1843 return opDivByZero; 1844 } 1845 } 1846 1847 /* Add or subtract two normal numbers. */ 1848 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1849 bool subtract) { 1850 integerPart carry; 1851 lostFraction lost_fraction; 1852 int bits; 1853 1854 /* Determine if the operation on the absolute values is effectively 1855 an addition or subtraction. */ 1856 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1857 1858 /* Are we bigger exponent-wise than the RHS? */ 1859 bits = exponent - rhs.exponent; 1860 1861 /* Subtraction is more subtle than one might naively expect. */ 1862 if (subtract) { 1863 if ((bits < 0) && !semantics->hasSignedRepr) 1864 llvm_unreachable( 1865 "This floating point format does not support signed values"); 1866 1867 IEEEFloat temp_rhs(rhs); 1868 1869 if (bits == 0) 1870 lost_fraction = lfExactlyZero; 1871 else if (bits > 0) { 1872 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1873 shiftSignificandLeft(1); 1874 } else { 1875 lost_fraction = shiftSignificandRight(-bits - 1); 1876 temp_rhs.shiftSignificandLeft(1); 1877 } 1878 1879 // Should we reverse the subtraction. 1880 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1881 carry = temp_rhs.subtractSignificand 1882 (*this, lost_fraction != lfExactlyZero); 1883 copySignificand(temp_rhs); 1884 sign = !sign; 1885 } else { 1886 carry = subtractSignificand 1887 (temp_rhs, lost_fraction != lfExactlyZero); 1888 } 1889 1890 /* Invert the lost fraction - it was on the RHS and 1891 subtracted. */ 1892 if (lost_fraction == lfLessThanHalf) 1893 lost_fraction = lfMoreThanHalf; 1894 else if (lost_fraction == lfMoreThanHalf) 1895 lost_fraction = lfLessThanHalf; 1896 1897 /* The code above is intended to ensure that no borrow is 1898 necessary. */ 1899 assert(!carry); 1900 (void)carry; 1901 } else { 1902 if (bits > 0) { 1903 IEEEFloat temp_rhs(rhs); 1904 1905 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1906 carry = addSignificand(temp_rhs); 1907 } else { 1908 lost_fraction = shiftSignificandRight(-bits); 1909 carry = addSignificand(rhs); 1910 } 1911 1912 /* We have a guard bit; generating a carry cannot happen. */ 1913 assert(!carry); 1914 (void)carry; 1915 } 1916 1917 return lost_fraction; 1918 } 1919 1920 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1921 switch (PackCategoriesIntoKey(category, rhs.category)) { 1922 default: 1923 llvm_unreachable(nullptr); 1924 1925 case PackCategoriesIntoKey(fcZero, fcNaN): 1926 case PackCategoriesIntoKey(fcNormal, fcNaN): 1927 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1928 assign(rhs); 1929 sign = false; 1930 [[fallthrough]]; 1931 case PackCategoriesIntoKey(fcNaN, fcZero): 1932 case PackCategoriesIntoKey(fcNaN, fcNormal): 1933 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1934 case PackCategoriesIntoKey(fcNaN, fcNaN): 1935 sign ^= rhs.sign; // restore the original sign 1936 if (isSignaling()) { 1937 makeQuiet(); 1938 return opInvalidOp; 1939 } 1940 return rhs.isSignaling() ? opInvalidOp : opOK; 1941 1942 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1943 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1944 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1945 category = fcInfinity; 1946 return opOK; 1947 1948 case PackCategoriesIntoKey(fcZero, fcNormal): 1949 case PackCategoriesIntoKey(fcNormal, fcZero): 1950 case PackCategoriesIntoKey(fcZero, fcZero): 1951 category = fcZero; 1952 return opOK; 1953 1954 case PackCategoriesIntoKey(fcZero, fcInfinity): 1955 case PackCategoriesIntoKey(fcInfinity, fcZero): 1956 makeNaN(); 1957 return opInvalidOp; 1958 1959 case PackCategoriesIntoKey(fcNormal, fcNormal): 1960 return opOK; 1961 } 1962 } 1963 1964 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1965 switch (PackCategoriesIntoKey(category, rhs.category)) { 1966 default: 1967 llvm_unreachable(nullptr); 1968 1969 case PackCategoriesIntoKey(fcZero, fcNaN): 1970 case PackCategoriesIntoKey(fcNormal, fcNaN): 1971 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1972 assign(rhs); 1973 sign = false; 1974 [[fallthrough]]; 1975 case PackCategoriesIntoKey(fcNaN, fcZero): 1976 case PackCategoriesIntoKey(fcNaN, fcNormal): 1977 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1978 case PackCategoriesIntoKey(fcNaN, fcNaN): 1979 sign ^= rhs.sign; // restore the original sign 1980 if (isSignaling()) { 1981 makeQuiet(); 1982 return opInvalidOp; 1983 } 1984 return rhs.isSignaling() ? opInvalidOp : opOK; 1985 1986 case PackCategoriesIntoKey(fcInfinity, fcZero): 1987 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1988 case PackCategoriesIntoKey(fcZero, fcInfinity): 1989 case PackCategoriesIntoKey(fcZero, fcNormal): 1990 return opOK; 1991 1992 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1993 category = fcZero; 1994 return opOK; 1995 1996 case PackCategoriesIntoKey(fcNormal, fcZero): 1997 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1998 makeNaN(false, sign); 1999 else 2000 category = fcInfinity; 2001 return opDivByZero; 2002 2003 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2004 case PackCategoriesIntoKey(fcZero, fcZero): 2005 makeNaN(); 2006 return opInvalidOp; 2007 2008 case PackCategoriesIntoKey(fcNormal, fcNormal): 2009 return opOK; 2010 } 2011 } 2012 2013 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 2014 switch (PackCategoriesIntoKey(category, rhs.category)) { 2015 default: 2016 llvm_unreachable(nullptr); 2017 2018 case PackCategoriesIntoKey(fcZero, fcNaN): 2019 case PackCategoriesIntoKey(fcNormal, fcNaN): 2020 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2021 assign(rhs); 2022 [[fallthrough]]; 2023 case PackCategoriesIntoKey(fcNaN, fcZero): 2024 case PackCategoriesIntoKey(fcNaN, fcNormal): 2025 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2026 case PackCategoriesIntoKey(fcNaN, fcNaN): 2027 if (isSignaling()) { 2028 makeQuiet(); 2029 return opInvalidOp; 2030 } 2031 return rhs.isSignaling() ? opInvalidOp : opOK; 2032 2033 case PackCategoriesIntoKey(fcZero, fcInfinity): 2034 case PackCategoriesIntoKey(fcZero, fcNormal): 2035 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2036 return opOK; 2037 2038 case PackCategoriesIntoKey(fcNormal, fcZero): 2039 case PackCategoriesIntoKey(fcInfinity, fcZero): 2040 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2041 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2042 case PackCategoriesIntoKey(fcZero, fcZero): 2043 makeNaN(); 2044 return opInvalidOp; 2045 2046 case PackCategoriesIntoKey(fcNormal, fcNormal): 2047 return opOK; 2048 } 2049 } 2050 2051 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 2052 switch (PackCategoriesIntoKey(category, rhs.category)) { 2053 default: 2054 llvm_unreachable(nullptr); 2055 2056 case PackCategoriesIntoKey(fcZero, fcNaN): 2057 case PackCategoriesIntoKey(fcNormal, fcNaN): 2058 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2059 assign(rhs); 2060 [[fallthrough]]; 2061 case PackCategoriesIntoKey(fcNaN, fcZero): 2062 case PackCategoriesIntoKey(fcNaN, fcNormal): 2063 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2064 case PackCategoriesIntoKey(fcNaN, fcNaN): 2065 if (isSignaling()) { 2066 makeQuiet(); 2067 return opInvalidOp; 2068 } 2069 return rhs.isSignaling() ? opInvalidOp : opOK; 2070 2071 case PackCategoriesIntoKey(fcZero, fcInfinity): 2072 case PackCategoriesIntoKey(fcZero, fcNormal): 2073 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2074 return opOK; 2075 2076 case PackCategoriesIntoKey(fcNormal, fcZero): 2077 case PackCategoriesIntoKey(fcInfinity, fcZero): 2078 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2079 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2080 case PackCategoriesIntoKey(fcZero, fcZero): 2081 makeNaN(); 2082 return opInvalidOp; 2083 2084 case PackCategoriesIntoKey(fcNormal, fcNormal): 2085 return opDivByZero; // fake status, indicating this is not a special case 2086 } 2087 } 2088 2089 /* Change sign. */ 2090 void IEEEFloat::changeSign() { 2091 // With NaN-as-negative-zero, neither NaN or negative zero can change 2092 // their signs. 2093 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2094 (isZero() || isNaN())) 2095 return; 2096 /* Look mummy, this one's easy. */ 2097 sign = !sign; 2098 } 2099 2100 /* Normalized addition or subtraction. */ 2101 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2102 roundingMode rounding_mode, 2103 bool subtract) { 2104 opStatus fs; 2105 2106 fs = addOrSubtractSpecials(rhs, subtract); 2107 2108 /* This return code means it was not a simple case. */ 2109 if (fs == opDivByZero) { 2110 lostFraction lost_fraction; 2111 2112 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2113 fs = normalize(rounding_mode, lost_fraction); 2114 2115 /* Can only be zero if we lost no fraction. */ 2116 assert(category != fcZero || lost_fraction == lfExactlyZero); 2117 } 2118 2119 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2120 positive zero unless rounding to minus infinity, except that 2121 adding two like-signed zeroes gives that zero. */ 2122 if (category == fcZero) { 2123 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2124 sign = (rounding_mode == rmTowardNegative); 2125 // NaN-in-negative-zero means zeros need to be normalized to +0. 2126 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2127 sign = false; 2128 } 2129 2130 return fs; 2131 } 2132 2133 /* Normalized addition. */ 2134 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2135 roundingMode rounding_mode) { 2136 return addOrSubtract(rhs, rounding_mode, false); 2137 } 2138 2139 /* Normalized subtraction. */ 2140 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2141 roundingMode rounding_mode) { 2142 return addOrSubtract(rhs, rounding_mode, true); 2143 } 2144 2145 /* Normalized multiply. */ 2146 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2147 roundingMode rounding_mode) { 2148 opStatus fs; 2149 2150 sign ^= rhs.sign; 2151 fs = multiplySpecials(rhs); 2152 2153 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2154 sign = false; 2155 if (isFiniteNonZero()) { 2156 lostFraction lost_fraction = multiplySignificand(rhs); 2157 fs = normalize(rounding_mode, lost_fraction); 2158 if (lost_fraction != lfExactlyZero) 2159 fs = (opStatus) (fs | opInexact); 2160 } 2161 2162 return fs; 2163 } 2164 2165 /* Normalized divide. */ 2166 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2167 roundingMode rounding_mode) { 2168 opStatus fs; 2169 2170 sign ^= rhs.sign; 2171 fs = divideSpecials(rhs); 2172 2173 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2174 sign = false; 2175 if (isFiniteNonZero()) { 2176 lostFraction lost_fraction = divideSignificand(rhs); 2177 fs = normalize(rounding_mode, lost_fraction); 2178 if (lost_fraction != lfExactlyZero) 2179 fs = (opStatus) (fs | opInexact); 2180 } 2181 2182 return fs; 2183 } 2184 2185 /* Normalized remainder. */ 2186 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2187 opStatus fs; 2188 unsigned int origSign = sign; 2189 2190 // First handle the special cases. 2191 fs = remainderSpecials(rhs); 2192 if (fs != opDivByZero) 2193 return fs; 2194 2195 fs = opOK; 2196 2197 // Make sure the current value is less than twice the denom. If the addition 2198 // did not succeed (an overflow has happened), which means that the finite 2199 // value we currently posses must be less than twice the denom (as we are 2200 // using the same semantics). 2201 IEEEFloat P2 = rhs; 2202 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2203 fs = mod(P2); 2204 assert(fs == opOK); 2205 } 2206 2207 // Lets work with absolute numbers. 2208 IEEEFloat P = rhs; 2209 P.sign = false; 2210 sign = false; 2211 2212 // 2213 // To calculate the remainder we use the following scheme. 2214 // 2215 // The remainder is defained as follows: 2216 // 2217 // remainder = numer - rquot * denom = x - r * p 2218 // 2219 // Where r is the result of: x/p, rounded toward the nearest integral value 2220 // (with halfway cases rounded toward the even number). 2221 // 2222 // Currently, (after x mod 2p): 2223 // r is the number of 2p's present inside x, which is inherently, an even 2224 // number of p's. 2225 // 2226 // We may split the remaining calculation into 4 options: 2227 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2228 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2229 // are done as well. 2230 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2231 // to subtract 1p at least once. 2232 // - if x >= p then we must subtract p at least once, as x must be a 2233 // remainder. 2234 // 2235 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2236 // 2237 // We can now split the remaining calculation to the following 3 options: 2238 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2239 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2240 // must round up to the next even number. so we must subtract p once more. 2241 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2242 // integral, and subtract p once more. 2243 // 2244 2245 // Extend the semantics to prevent an overflow/underflow or inexact result. 2246 bool losesInfo; 2247 fltSemantics extendedSemantics = *semantics; 2248 extendedSemantics.maxExponent++; 2249 extendedSemantics.minExponent--; 2250 extendedSemantics.precision += 2; 2251 2252 IEEEFloat VEx = *this; 2253 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2254 assert(fs == opOK && !losesInfo); 2255 IEEEFloat PEx = P; 2256 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2257 assert(fs == opOK && !losesInfo); 2258 2259 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2260 // any fraction. 2261 fs = VEx.add(VEx, rmNearestTiesToEven); 2262 assert(fs == opOK); 2263 2264 if (VEx.compare(PEx) == cmpGreaterThan) { 2265 fs = subtract(P, rmNearestTiesToEven); 2266 assert(fs == opOK); 2267 2268 // Make VEx = this.add(this), but because we have different semantics, we do 2269 // not want to `convert` again, so we just subtract PEx twice (which equals 2270 // to the desired value). 2271 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2272 assert(fs == opOK); 2273 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2274 assert(fs == opOK); 2275 2276 cmpResult result = VEx.compare(PEx); 2277 if (result == cmpGreaterThan || result == cmpEqual) { 2278 fs = subtract(P, rmNearestTiesToEven); 2279 assert(fs == opOK); 2280 } 2281 } 2282 2283 if (isZero()) { 2284 sign = origSign; // IEEE754 requires this 2285 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2286 // But some 8-bit floats only have positive 0. 2287 sign = false; 2288 } 2289 2290 else 2291 sign ^= origSign; 2292 return fs; 2293 } 2294 2295 /* Normalized llvm frem (C fmod). */ 2296 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2297 opStatus fs; 2298 fs = modSpecials(rhs); 2299 unsigned int origSign = sign; 2300 2301 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2302 compareAbsoluteValue(rhs) != cmpLessThan) { 2303 int Exp = ilogb(*this) - ilogb(rhs); 2304 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2305 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2306 // check for it. 2307 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2308 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2309 V.sign = sign; 2310 2311 fs = subtract(V, rmNearestTiesToEven); 2312 2313 // When the semantics supports zero, this loop's 2314 // exit-condition is handled by the 'isFiniteNonZero' 2315 // category check above. However, when the semantics 2316 // does not have 'fcZero' and we have reached the 2317 // minimum possible value, (and any further subtract 2318 // will underflow to the same value) explicitly 2319 // provide an exit-path here. 2320 if (!semantics->hasZero && this->isSmallest()) 2321 break; 2322 2323 assert(fs==opOK); 2324 } 2325 if (isZero()) { 2326 sign = origSign; // fmod requires this 2327 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2328 sign = false; 2329 } 2330 return fs; 2331 } 2332 2333 /* Normalized fused-multiply-add. */ 2334 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2335 const IEEEFloat &addend, 2336 roundingMode rounding_mode) { 2337 opStatus fs; 2338 2339 /* Post-multiplication sign, before addition. */ 2340 sign ^= multiplicand.sign; 2341 2342 /* If and only if all arguments are normal do we need to do an 2343 extended-precision calculation. */ 2344 if (isFiniteNonZero() && 2345 multiplicand.isFiniteNonZero() && 2346 addend.isFinite()) { 2347 lostFraction lost_fraction; 2348 2349 lost_fraction = multiplySignificand(multiplicand, addend); 2350 fs = normalize(rounding_mode, lost_fraction); 2351 if (lost_fraction != lfExactlyZero) 2352 fs = (opStatus) (fs | opInexact); 2353 2354 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2355 positive zero unless rounding to minus infinity, except that 2356 adding two like-signed zeroes gives that zero. */ 2357 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2358 sign = (rounding_mode == rmTowardNegative); 2359 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2360 sign = false; 2361 } 2362 } else { 2363 fs = multiplySpecials(multiplicand); 2364 2365 /* FS can only be opOK or opInvalidOp. There is no more work 2366 to do in the latter case. The IEEE-754R standard says it is 2367 implementation-defined in this case whether, if ADDEND is a 2368 quiet NaN, we raise invalid op; this implementation does so. 2369 2370 If we need to do the addition we can do so with normal 2371 precision. */ 2372 if (fs == opOK) 2373 fs = addOrSubtract(addend, rounding_mode, false); 2374 } 2375 2376 return fs; 2377 } 2378 2379 /* Rounding-mode correct round to integral value. */ 2380 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2381 opStatus fs; 2382 2383 if (isInfinity()) 2384 // [IEEE Std 754-2008 6.1]: 2385 // The behavior of infinity in floating-point arithmetic is derived from the 2386 // limiting cases of real arithmetic with operands of arbitrarily 2387 // large magnitude, when such a limit exists. 2388 // ... 2389 // Operations on infinite operands are usually exact and therefore signal no 2390 // exceptions ... 2391 return opOK; 2392 2393 if (isNaN()) { 2394 if (isSignaling()) { 2395 // [IEEE Std 754-2008 6.2]: 2396 // Under default exception handling, any operation signaling an invalid 2397 // operation exception and for which a floating-point result is to be 2398 // delivered shall deliver a quiet NaN. 2399 makeQuiet(); 2400 // [IEEE Std 754-2008 6.2]: 2401 // Signaling NaNs shall be reserved operands that, under default exception 2402 // handling, signal the invalid operation exception(see 7.2) for every 2403 // general-computational and signaling-computational operation except for 2404 // the conversions described in 5.12. 2405 return opInvalidOp; 2406 } else { 2407 // [IEEE Std 754-2008 6.2]: 2408 // For an operation with quiet NaN inputs, other than maximum and minimum 2409 // operations, if a floating-point result is to be delivered the result 2410 // shall be a quiet NaN which should be one of the input NaNs. 2411 // ... 2412 // Every general-computational and quiet-computational operation involving 2413 // one or more input NaNs, none of them signaling, shall signal no 2414 // exception, except fusedMultiplyAdd might signal the invalid operation 2415 // exception(see 7.2). 2416 return opOK; 2417 } 2418 } 2419 2420 if (isZero()) { 2421 // [IEEE Std 754-2008 6.3]: 2422 // ... the sign of the result of conversions, the quantize operation, the 2423 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2424 // the sign of the first or only operand. 2425 return opOK; 2426 } 2427 2428 // If the exponent is large enough, we know that this value is already 2429 // integral, and the arithmetic below would potentially cause it to saturate 2430 // to +/-Inf. Bail out early instead. 2431 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2432 return opOK; 2433 2434 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2435 // precision of our format, and then subtract it back off again. The choice 2436 // of rounding modes for the addition/subtraction determines the rounding mode 2437 // for our integral rounding as well. 2438 // NOTE: When the input value is negative, we do subtraction followed by 2439 // addition instead. 2440 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2441 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2442 IEEEFloat MagicConstant(*semantics); 2443 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2444 rmNearestTiesToEven); 2445 assert(fs == opOK); 2446 MagicConstant.sign = sign; 2447 2448 // Preserve the input sign so that we can handle the case of zero result 2449 // correctly. 2450 bool inputSign = isNegative(); 2451 2452 fs = add(MagicConstant, rounding_mode); 2453 2454 // Current value and 'MagicConstant' are both integers, so the result of the 2455 // subtraction is always exact according to Sterbenz' lemma. 2456 subtract(MagicConstant, rounding_mode); 2457 2458 // Restore the input sign. 2459 if (inputSign != isNegative()) 2460 changeSign(); 2461 2462 return fs; 2463 } 2464 2465 2466 /* Comparison requires normalized numbers. */ 2467 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2468 cmpResult result; 2469 2470 assert(semantics == rhs.semantics); 2471 2472 switch (PackCategoriesIntoKey(category, rhs.category)) { 2473 default: 2474 llvm_unreachable(nullptr); 2475 2476 case PackCategoriesIntoKey(fcNaN, fcZero): 2477 case PackCategoriesIntoKey(fcNaN, fcNormal): 2478 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2479 case PackCategoriesIntoKey(fcNaN, fcNaN): 2480 case PackCategoriesIntoKey(fcZero, fcNaN): 2481 case PackCategoriesIntoKey(fcNormal, fcNaN): 2482 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2483 return cmpUnordered; 2484 2485 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2486 case PackCategoriesIntoKey(fcInfinity, fcZero): 2487 case PackCategoriesIntoKey(fcNormal, fcZero): 2488 if (sign) 2489 return cmpLessThan; 2490 else 2491 return cmpGreaterThan; 2492 2493 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2494 case PackCategoriesIntoKey(fcZero, fcInfinity): 2495 case PackCategoriesIntoKey(fcZero, fcNormal): 2496 if (rhs.sign) 2497 return cmpGreaterThan; 2498 else 2499 return cmpLessThan; 2500 2501 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2502 if (sign == rhs.sign) 2503 return cmpEqual; 2504 else if (sign) 2505 return cmpLessThan; 2506 else 2507 return cmpGreaterThan; 2508 2509 case PackCategoriesIntoKey(fcZero, fcZero): 2510 return cmpEqual; 2511 2512 case PackCategoriesIntoKey(fcNormal, fcNormal): 2513 break; 2514 } 2515 2516 /* Two normal numbers. Do they have the same sign? */ 2517 if (sign != rhs.sign) { 2518 if (sign) 2519 result = cmpLessThan; 2520 else 2521 result = cmpGreaterThan; 2522 } else { 2523 /* Compare absolute values; invert result if negative. */ 2524 result = compareAbsoluteValue(rhs); 2525 2526 if (sign) { 2527 if (result == cmpLessThan) 2528 result = cmpGreaterThan; 2529 else if (result == cmpGreaterThan) 2530 result = cmpLessThan; 2531 } 2532 } 2533 2534 return result; 2535 } 2536 2537 /// IEEEFloat::convert - convert a value of one floating point type to another. 2538 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2539 /// records whether the transformation lost information, i.e. whether 2540 /// converting the result back to the original type will produce the 2541 /// original value (this is almost the same as return value==fsOK, but there 2542 /// are edge cases where this is not so). 2543 2544 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2545 roundingMode rounding_mode, 2546 bool *losesInfo) { 2547 lostFraction lostFraction; 2548 unsigned int newPartCount, oldPartCount; 2549 opStatus fs; 2550 int shift; 2551 const fltSemantics &fromSemantics = *semantics; 2552 bool is_signaling = isSignaling(); 2553 2554 lostFraction = lfExactlyZero; 2555 newPartCount = partCountForBits(toSemantics.precision + 1); 2556 oldPartCount = partCount(); 2557 shift = toSemantics.precision - fromSemantics.precision; 2558 2559 bool X86SpecialNan = false; 2560 if (&fromSemantics == &semX87DoubleExtended && 2561 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2562 (!(*significandParts() & 0x8000000000000000ULL) || 2563 !(*significandParts() & 0x4000000000000000ULL))) { 2564 // x86 has some unusual NaNs which cannot be represented in any other 2565 // format; note them here. 2566 X86SpecialNan = true; 2567 } 2568 2569 // If this is a truncation of a denormal number, and the target semantics 2570 // has larger exponent range than the source semantics (this can happen 2571 // when truncating from PowerPC double-double to double format), the 2572 // right shift could lose result mantissa bits. Adjust exponent instead 2573 // of performing excessive shift. 2574 // Also do a similar trick in case shifting denormal would produce zero 2575 // significand as this case isn't handled correctly by normalize. 2576 if (shift < 0 && isFiniteNonZero()) { 2577 int omsb = significandMSB() + 1; 2578 int exponentChange = omsb - fromSemantics.precision; 2579 if (exponent + exponentChange < toSemantics.minExponent) 2580 exponentChange = toSemantics.minExponent - exponent; 2581 if (exponentChange < shift) 2582 exponentChange = shift; 2583 if (exponentChange < 0) { 2584 shift -= exponentChange; 2585 exponent += exponentChange; 2586 } else if (omsb <= -shift) { 2587 exponentChange = omsb + shift - 1; // leave at least one bit set 2588 shift -= exponentChange; 2589 exponent += exponentChange; 2590 } 2591 } 2592 2593 // If this is a truncation, perform the shift before we narrow the storage. 2594 if (shift < 0 && (isFiniteNonZero() || 2595 (category == fcNaN && semantics->nonFiniteBehavior != 2596 fltNonfiniteBehavior::NanOnly))) 2597 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2598 2599 // Fix the storage so it can hold to new value. 2600 if (newPartCount > oldPartCount) { 2601 // The new type requires more storage; make it available. 2602 integerPart *newParts; 2603 newParts = new integerPart[newPartCount]; 2604 APInt::tcSet(newParts, 0, newPartCount); 2605 if (isFiniteNonZero() || category==fcNaN) 2606 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2607 freeSignificand(); 2608 significand.parts = newParts; 2609 } else if (newPartCount == 1 && oldPartCount != 1) { 2610 // Switch to built-in storage for a single part. 2611 integerPart newPart = 0; 2612 if (isFiniteNonZero() || category==fcNaN) 2613 newPart = significandParts()[0]; 2614 freeSignificand(); 2615 significand.part = newPart; 2616 } 2617 2618 // Now that we have the right storage, switch the semantics. 2619 semantics = &toSemantics; 2620 2621 // If this is an extension, perform the shift now that the storage is 2622 // available. 2623 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2624 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2625 2626 if (isFiniteNonZero()) { 2627 fs = normalize(rounding_mode, lostFraction); 2628 *losesInfo = (fs != opOK); 2629 } else if (category == fcNaN) { 2630 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2631 *losesInfo = 2632 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2633 makeNaN(false, sign); 2634 return is_signaling ? opInvalidOp : opOK; 2635 } 2636 2637 // If NaN is negative zero, we need to create a new NaN to avoid converting 2638 // NaN to -Inf. 2639 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2640 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2641 makeNaN(false, false); 2642 2643 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2644 2645 // For x87 extended precision, we want to make a NaN, not a special NaN if 2646 // the input wasn't special either. 2647 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2648 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2649 2650 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2651 // This also guarantees that a sNaN does not become Inf on a truncation 2652 // that loses all payload bits. 2653 if (is_signaling) { 2654 makeQuiet(); 2655 fs = opInvalidOp; 2656 } else { 2657 fs = opOK; 2658 } 2659 } else if (category == fcInfinity && 2660 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2661 makeNaN(false, sign); 2662 *losesInfo = true; 2663 fs = opInexact; 2664 } else if (category == fcZero && 2665 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2666 // Negative zero loses info, but positive zero doesn't. 2667 *losesInfo = 2668 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2669 fs = *losesInfo ? opInexact : opOK; 2670 // NaN is negative zero means -0 -> +0, which can lose information 2671 sign = false; 2672 } else { 2673 *losesInfo = false; 2674 fs = opOK; 2675 } 2676 2677 if (category == fcZero && !semantics->hasZero) 2678 makeSmallestNormalized(false); 2679 return fs; 2680 } 2681 2682 /* Convert a floating point number to an integer according to the 2683 rounding mode. If the rounded integer value is out of range this 2684 returns an invalid operation exception and the contents of the 2685 destination parts are unspecified. If the rounded value is in 2686 range but the floating point number is not the exact integer, the C 2687 standard doesn't require an inexact exception to be raised. IEEE 2688 854 does require it so we do that. 2689 2690 Note that for conversions to integer type the C standard requires 2691 round-to-zero to always be used. */ 2692 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2693 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2694 roundingMode rounding_mode, bool *isExact) const { 2695 lostFraction lost_fraction; 2696 const integerPart *src; 2697 unsigned int dstPartsCount, truncatedBits; 2698 2699 *isExact = false; 2700 2701 /* Handle the three special cases first. */ 2702 if (category == fcInfinity || category == fcNaN) 2703 return opInvalidOp; 2704 2705 dstPartsCount = partCountForBits(width); 2706 assert(dstPartsCount <= parts.size() && "Integer too big"); 2707 2708 if (category == fcZero) { 2709 APInt::tcSet(parts.data(), 0, dstPartsCount); 2710 // Negative zero can't be represented as an int. 2711 *isExact = !sign; 2712 return opOK; 2713 } 2714 2715 src = significandParts(); 2716 2717 /* Step 1: place our absolute value, with any fraction truncated, in 2718 the destination. */ 2719 if (exponent < 0) { 2720 /* Our absolute value is less than one; truncate everything. */ 2721 APInt::tcSet(parts.data(), 0, dstPartsCount); 2722 /* For exponent -1 the integer bit represents .5, look at that. 2723 For smaller exponents leftmost truncated bit is 0. */ 2724 truncatedBits = semantics->precision -1U - exponent; 2725 } else { 2726 /* We want the most significant (exponent + 1) bits; the rest are 2727 truncated. */ 2728 unsigned int bits = exponent + 1U; 2729 2730 /* Hopelessly large in magnitude? */ 2731 if (bits > width) 2732 return opInvalidOp; 2733 2734 if (bits < semantics->precision) { 2735 /* We truncate (semantics->precision - bits) bits. */ 2736 truncatedBits = semantics->precision - bits; 2737 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2738 } else { 2739 /* We want at least as many bits as are available. */ 2740 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2741 0); 2742 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2743 bits - semantics->precision); 2744 truncatedBits = 0; 2745 } 2746 } 2747 2748 /* Step 2: work out any lost fraction, and increment the absolute 2749 value if we would round away from zero. */ 2750 if (truncatedBits) { 2751 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2752 truncatedBits); 2753 if (lost_fraction != lfExactlyZero && 2754 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2755 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2756 return opInvalidOp; /* Overflow. */ 2757 } 2758 } else { 2759 lost_fraction = lfExactlyZero; 2760 } 2761 2762 /* Step 3: check if we fit in the destination. */ 2763 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2764 2765 if (sign) { 2766 if (!isSigned) { 2767 /* Negative numbers cannot be represented as unsigned. */ 2768 if (omsb != 0) 2769 return opInvalidOp; 2770 } else { 2771 /* It takes omsb bits to represent the unsigned integer value. 2772 We lose a bit for the sign, but care is needed as the 2773 maximally negative integer is a special case. */ 2774 if (omsb == width && 2775 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2776 return opInvalidOp; 2777 2778 /* This case can happen because of rounding. */ 2779 if (omsb > width) 2780 return opInvalidOp; 2781 } 2782 2783 APInt::tcNegate (parts.data(), dstPartsCount); 2784 } else { 2785 if (omsb >= width + !isSigned) 2786 return opInvalidOp; 2787 } 2788 2789 if (lost_fraction == lfExactlyZero) { 2790 *isExact = true; 2791 return opOK; 2792 } else 2793 return opInexact; 2794 } 2795 2796 /* Same as convertToSignExtendedInteger, except we provide 2797 deterministic values in case of an invalid operation exception, 2798 namely zero for NaNs and the minimal or maximal value respectively 2799 for underflow or overflow. 2800 The *isExact output tells whether the result is exact, in the sense 2801 that converting it back to the original floating point type produces 2802 the original value. This is almost equivalent to result==opOK, 2803 except for negative zeroes. 2804 */ 2805 IEEEFloat::opStatus 2806 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2807 unsigned int width, bool isSigned, 2808 roundingMode rounding_mode, bool *isExact) const { 2809 opStatus fs; 2810 2811 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2812 isExact); 2813 2814 if (fs == opInvalidOp) { 2815 unsigned int bits, dstPartsCount; 2816 2817 dstPartsCount = partCountForBits(width); 2818 assert(dstPartsCount <= parts.size() && "Integer too big"); 2819 2820 if (category == fcNaN) 2821 bits = 0; 2822 else if (sign) 2823 bits = isSigned; 2824 else 2825 bits = width - isSigned; 2826 2827 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2828 if (sign && isSigned) 2829 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2830 } 2831 2832 return fs; 2833 } 2834 2835 /* Convert an unsigned integer SRC to a floating point number, 2836 rounding according to ROUNDING_MODE. The sign of the floating 2837 point number is not modified. */ 2838 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2839 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2840 unsigned int omsb, precision, dstCount; 2841 integerPart *dst; 2842 lostFraction lost_fraction; 2843 2844 category = fcNormal; 2845 omsb = APInt::tcMSB(src, srcCount) + 1; 2846 dst = significandParts(); 2847 dstCount = partCount(); 2848 precision = semantics->precision; 2849 2850 /* We want the most significant PRECISION bits of SRC. There may not 2851 be that many; extract what we can. */ 2852 if (precision <= omsb) { 2853 exponent = omsb - 1; 2854 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2855 omsb - precision); 2856 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2857 } else { 2858 exponent = precision - 1; 2859 lost_fraction = lfExactlyZero; 2860 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2861 } 2862 2863 return normalize(rounding_mode, lost_fraction); 2864 } 2865 2866 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2867 roundingMode rounding_mode) { 2868 unsigned int partCount = Val.getNumWords(); 2869 APInt api = Val; 2870 2871 sign = false; 2872 if (isSigned && api.isNegative()) { 2873 sign = true; 2874 api = -api; 2875 } 2876 2877 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2878 } 2879 2880 /* Convert a two's complement integer SRC to a floating point number, 2881 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2882 integer is signed, in which case it must be sign-extended. */ 2883 IEEEFloat::opStatus 2884 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2885 unsigned int srcCount, bool isSigned, 2886 roundingMode rounding_mode) { 2887 opStatus status; 2888 2889 if (isSigned && 2890 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2891 integerPart *copy; 2892 2893 /* If we're signed and negative negate a copy. */ 2894 sign = true; 2895 copy = new integerPart[srcCount]; 2896 APInt::tcAssign(copy, src, srcCount); 2897 APInt::tcNegate(copy, srcCount); 2898 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2899 delete [] copy; 2900 } else { 2901 sign = false; 2902 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2903 } 2904 2905 return status; 2906 } 2907 2908 /* FIXME: should this just take a const APInt reference? */ 2909 IEEEFloat::opStatus 2910 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2911 unsigned int width, bool isSigned, 2912 roundingMode rounding_mode) { 2913 unsigned int partCount = partCountForBits(width); 2914 APInt api = APInt(width, ArrayRef(parts, partCount)); 2915 2916 sign = false; 2917 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2918 sign = true; 2919 api = -api; 2920 } 2921 2922 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2923 } 2924 2925 Expected<IEEEFloat::opStatus> 2926 IEEEFloat::convertFromHexadecimalString(StringRef s, 2927 roundingMode rounding_mode) { 2928 lostFraction lost_fraction = lfExactlyZero; 2929 2930 category = fcNormal; 2931 zeroSignificand(); 2932 exponent = 0; 2933 2934 integerPart *significand = significandParts(); 2935 unsigned partsCount = partCount(); 2936 unsigned bitPos = partsCount * integerPartWidth; 2937 bool computedTrailingFraction = false; 2938 2939 // Skip leading zeroes and any (hexa)decimal point. 2940 StringRef::iterator begin = s.begin(); 2941 StringRef::iterator end = s.end(); 2942 StringRef::iterator dot; 2943 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2944 if (!PtrOrErr) 2945 return PtrOrErr.takeError(); 2946 StringRef::iterator p = *PtrOrErr; 2947 StringRef::iterator firstSignificantDigit = p; 2948 2949 while (p != end) { 2950 integerPart hex_value; 2951 2952 if (*p == '.') { 2953 if (dot != end) 2954 return createError("String contains multiple dots"); 2955 dot = p++; 2956 continue; 2957 } 2958 2959 hex_value = hexDigitValue(*p); 2960 if (hex_value == UINT_MAX) 2961 break; 2962 2963 p++; 2964 2965 // Store the number while we have space. 2966 if (bitPos) { 2967 bitPos -= 4; 2968 hex_value <<= bitPos % integerPartWidth; 2969 significand[bitPos / integerPartWidth] |= hex_value; 2970 } else if (!computedTrailingFraction) { 2971 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2972 if (!FractOrErr) 2973 return FractOrErr.takeError(); 2974 lost_fraction = *FractOrErr; 2975 computedTrailingFraction = true; 2976 } 2977 } 2978 2979 /* Hex floats require an exponent but not a hexadecimal point. */ 2980 if (p == end) 2981 return createError("Hex strings require an exponent"); 2982 if (*p != 'p' && *p != 'P') 2983 return createError("Invalid character in significand"); 2984 if (p == begin) 2985 return createError("Significand has no digits"); 2986 if (dot != end && p - begin == 1) 2987 return createError("Significand has no digits"); 2988 2989 /* Ignore the exponent if we are zero. */ 2990 if (p != firstSignificantDigit) { 2991 int expAdjustment; 2992 2993 /* Implicit hexadecimal point? */ 2994 if (dot == end) 2995 dot = p; 2996 2997 /* Calculate the exponent adjustment implicit in the number of 2998 significant digits. */ 2999 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 3000 if (expAdjustment < 0) 3001 expAdjustment++; 3002 expAdjustment = expAdjustment * 4 - 1; 3003 3004 /* Adjust for writing the significand starting at the most 3005 significant nibble. */ 3006 expAdjustment += semantics->precision; 3007 expAdjustment -= partsCount * integerPartWidth; 3008 3009 /* Adjust for the given exponent. */ 3010 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 3011 if (!ExpOrErr) 3012 return ExpOrErr.takeError(); 3013 exponent = *ExpOrErr; 3014 } 3015 3016 return normalize(rounding_mode, lost_fraction); 3017 } 3018 3019 IEEEFloat::opStatus 3020 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 3021 unsigned sigPartCount, int exp, 3022 roundingMode rounding_mode) { 3023 unsigned int parts, pow5PartCount; 3024 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 3025 integerPart pow5Parts[maxPowerOfFiveParts]; 3026 bool isNearest; 3027 3028 isNearest = (rounding_mode == rmNearestTiesToEven || 3029 rounding_mode == rmNearestTiesToAway); 3030 3031 parts = partCountForBits(semantics->precision + 11); 3032 3033 /* Calculate pow(5, abs(exp)). */ 3034 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 3035 3036 for (;; parts *= 2) { 3037 opStatus sigStatus, powStatus; 3038 unsigned int excessPrecision, truncatedBits; 3039 3040 calcSemantics.precision = parts * integerPartWidth - 1; 3041 excessPrecision = calcSemantics.precision - semantics->precision; 3042 truncatedBits = excessPrecision; 3043 3044 IEEEFloat decSig(calcSemantics, uninitialized); 3045 decSig.makeZero(sign); 3046 IEEEFloat pow5(calcSemantics); 3047 3048 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 3049 rmNearestTiesToEven); 3050 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 3051 rmNearestTiesToEven); 3052 /* Add exp, as 10^n = 5^n * 2^n. */ 3053 decSig.exponent += exp; 3054 3055 lostFraction calcLostFraction; 3056 integerPart HUerr, HUdistance; 3057 unsigned int powHUerr; 3058 3059 if (exp >= 0) { 3060 /* multiplySignificand leaves the precision-th bit set to 1. */ 3061 calcLostFraction = decSig.multiplySignificand(pow5); 3062 powHUerr = powStatus != opOK; 3063 } else { 3064 calcLostFraction = decSig.divideSignificand(pow5); 3065 /* Denormal numbers have less precision. */ 3066 if (decSig.exponent < semantics->minExponent) { 3067 excessPrecision += (semantics->minExponent - decSig.exponent); 3068 truncatedBits = excessPrecision; 3069 if (excessPrecision > calcSemantics.precision) 3070 excessPrecision = calcSemantics.precision; 3071 } 3072 /* Extra half-ulp lost in reciprocal of exponent. */ 3073 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 3074 } 3075 3076 /* Both multiplySignificand and divideSignificand return the 3077 result with the integer bit set. */ 3078 assert(APInt::tcExtractBit 3079 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3080 3081 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3082 powHUerr); 3083 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3084 excessPrecision, isNearest); 3085 3086 /* Are we guaranteed to round correctly if we truncate? */ 3087 if (HUdistance >= HUerr) { 3088 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3089 calcSemantics.precision - excessPrecision, 3090 excessPrecision); 3091 /* Take the exponent of decSig. If we tcExtract-ed less bits 3092 above we must adjust our exponent to compensate for the 3093 implicit right shift. */ 3094 exponent = (decSig.exponent + semantics->precision 3095 - (calcSemantics.precision - excessPrecision)); 3096 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3097 decSig.partCount(), 3098 truncatedBits); 3099 return normalize(rounding_mode, calcLostFraction); 3100 } 3101 } 3102 } 3103 3104 Expected<IEEEFloat::opStatus> 3105 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3106 decimalInfo D; 3107 opStatus fs; 3108 3109 /* Scan the text. */ 3110 StringRef::iterator p = str.begin(); 3111 if (Error Err = interpretDecimal(p, str.end(), &D)) 3112 return std::move(Err); 3113 3114 /* Handle the quick cases. First the case of no significant digits, 3115 i.e. zero, and then exponents that are obviously too large or too 3116 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3117 definitely overflows if 3118 3119 (exp - 1) * L >= maxExponent 3120 3121 and definitely underflows to zero where 3122 3123 (exp + 1) * L <= minExponent - precision 3124 3125 With integer arithmetic the tightest bounds for L are 3126 3127 93/28 < L < 196/59 [ numerator <= 256 ] 3128 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3129 */ 3130 3131 // Test if we have a zero number allowing for strings with no null terminators 3132 // and zero decimals with non-zero exponents. 3133 // 3134 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3135 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3136 // be at most one dot. On the other hand, if we have a zero with a non-zero 3137 // exponent, then we know that D.firstSigDigit will be non-numeric. 3138 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3139 category = fcZero; 3140 fs = opOK; 3141 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3142 sign = false; 3143 if (!semantics->hasZero) 3144 makeSmallestNormalized(false); 3145 3146 /* Check whether the normalized exponent is high enough to overflow 3147 max during the log-rebasing in the max-exponent check below. */ 3148 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3149 fs = handleOverflow(rounding_mode); 3150 3151 /* If it wasn't, then it also wasn't high enough to overflow max 3152 during the log-rebasing in the min-exponent check. Check that it 3153 won't overflow min in either check, then perform the min-exponent 3154 check. */ 3155 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3156 (D.normalizedExponent + 1) * 28738 <= 3157 8651 * (semantics->minExponent - (int) semantics->precision)) { 3158 /* Underflow to zero and round. */ 3159 category = fcNormal; 3160 zeroSignificand(); 3161 fs = normalize(rounding_mode, lfLessThanHalf); 3162 3163 /* We can finally safely perform the max-exponent check. */ 3164 } else if ((D.normalizedExponent - 1) * 42039 3165 >= 12655 * semantics->maxExponent) { 3166 /* Overflow and round. */ 3167 fs = handleOverflow(rounding_mode); 3168 } else { 3169 integerPart *decSignificand; 3170 unsigned int partCount; 3171 3172 /* A tight upper bound on number of bits required to hold an 3173 N-digit decimal integer is N * 196 / 59. Allocate enough space 3174 to hold the full significand, and an extra part required by 3175 tcMultiplyPart. */ 3176 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3177 partCount = partCountForBits(1 + 196 * partCount / 59); 3178 decSignificand = new integerPart[partCount + 1]; 3179 partCount = 0; 3180 3181 /* Convert to binary efficiently - we do almost all multiplication 3182 in an integerPart. When this would overflow do we do a single 3183 bignum multiplication, and then revert again to multiplication 3184 in an integerPart. */ 3185 do { 3186 integerPart decValue, val, multiplier; 3187 3188 val = 0; 3189 multiplier = 1; 3190 3191 do { 3192 if (*p == '.') { 3193 p++; 3194 if (p == str.end()) { 3195 break; 3196 } 3197 } 3198 decValue = decDigitValue(*p++); 3199 if (decValue >= 10U) { 3200 delete[] decSignificand; 3201 return createError("Invalid character in significand"); 3202 } 3203 multiplier *= 10; 3204 val = val * 10 + decValue; 3205 /* The maximum number that can be multiplied by ten with any 3206 digit added without overflowing an integerPart. */ 3207 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3208 3209 /* Multiply out the current part. */ 3210 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3211 partCount, partCount + 1, false); 3212 3213 /* If we used another part (likely but not guaranteed), increase 3214 the count. */ 3215 if (decSignificand[partCount]) 3216 partCount++; 3217 } while (p <= D.lastSigDigit); 3218 3219 category = fcNormal; 3220 fs = roundSignificandWithExponent(decSignificand, partCount, 3221 D.exponent, rounding_mode); 3222 3223 delete [] decSignificand; 3224 } 3225 3226 return fs; 3227 } 3228 3229 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3230 const size_t MIN_NAME_SIZE = 3; 3231 3232 if (str.size() < MIN_NAME_SIZE) 3233 return false; 3234 3235 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3236 makeInf(false); 3237 return true; 3238 } 3239 3240 bool IsNegative = str.front() == '-'; 3241 if (IsNegative) { 3242 str = str.drop_front(); 3243 if (str.size() < MIN_NAME_SIZE) 3244 return false; 3245 3246 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3247 makeInf(true); 3248 return true; 3249 } 3250 } 3251 3252 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3253 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3254 if (IsSignaling) { 3255 str = str.drop_front(); 3256 if (str.size() < MIN_NAME_SIZE) 3257 return false; 3258 } 3259 3260 if (str.starts_with("nan") || str.starts_with("NaN")) { 3261 str = str.drop_front(3); 3262 3263 // A NaN without payload. 3264 if (str.empty()) { 3265 makeNaN(IsSignaling, IsNegative); 3266 return true; 3267 } 3268 3269 // Allow the payload to be inside parentheses. 3270 if (str.front() == '(') { 3271 // Parentheses should be balanced (and not empty). 3272 if (str.size() <= 2 || str.back() != ')') 3273 return false; 3274 3275 str = str.slice(1, str.size() - 1); 3276 } 3277 3278 // Determine the payload number's radix. 3279 unsigned Radix = 10; 3280 if (str[0] == '0') { 3281 if (str.size() > 1 && tolower(str[1]) == 'x') { 3282 str = str.drop_front(2); 3283 Radix = 16; 3284 } else 3285 Radix = 8; 3286 } 3287 3288 // Parse the payload and make the NaN. 3289 APInt Payload; 3290 if (!str.getAsInteger(Radix, Payload)) { 3291 makeNaN(IsSignaling, IsNegative, &Payload); 3292 return true; 3293 } 3294 } 3295 3296 return false; 3297 } 3298 3299 Expected<IEEEFloat::opStatus> 3300 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3301 if (str.empty()) 3302 return createError("Invalid string length"); 3303 3304 // Handle special cases. 3305 if (convertFromStringSpecials(str)) 3306 return opOK; 3307 3308 /* Handle a leading minus sign. */ 3309 StringRef::iterator p = str.begin(); 3310 size_t slen = str.size(); 3311 sign = *p == '-' ? 1 : 0; 3312 if (sign && !semantics->hasSignedRepr) 3313 llvm_unreachable( 3314 "This floating point format does not support signed values"); 3315 3316 if (*p == '-' || *p == '+') { 3317 p++; 3318 slen--; 3319 if (!slen) 3320 return createError("String has no digits"); 3321 } 3322 3323 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3324 if (slen == 2) 3325 return createError("Invalid string"); 3326 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3327 rounding_mode); 3328 } 3329 3330 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3331 } 3332 3333 /* Write out a hexadecimal representation of the floating point value 3334 to DST, which must be of sufficient size, in the C99 form 3335 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3336 excluding the terminating NUL. 3337 3338 If UPPERCASE, the output is in upper case, otherwise in lower case. 3339 3340 HEXDIGITS digits appear altogether, rounding the value if 3341 necessary. If HEXDIGITS is 0, the minimal precision to display the 3342 number precisely is used instead. If nothing would appear after 3343 the decimal point it is suppressed. 3344 3345 The decimal exponent is always printed and has at least one digit. 3346 Zero values display an exponent of zero. Infinities and NaNs 3347 appear as "infinity" or "nan" respectively. 3348 3349 The above rules are as specified by C99. There is ambiguity about 3350 what the leading hexadecimal digit should be. This implementation 3351 uses whatever is necessary so that the exponent is displayed as 3352 stored. This implies the exponent will fall within the IEEE format 3353 range, and the leading hexadecimal digit will be 0 (for denormals), 3354 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3355 any other digits zero). 3356 */ 3357 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3358 bool upperCase, 3359 roundingMode rounding_mode) const { 3360 char *p; 3361 3362 p = dst; 3363 if (sign) 3364 *dst++ = '-'; 3365 3366 switch (category) { 3367 case fcInfinity: 3368 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3369 dst += sizeof infinityL - 1; 3370 break; 3371 3372 case fcNaN: 3373 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3374 dst += sizeof NaNU - 1; 3375 break; 3376 3377 case fcZero: 3378 *dst++ = '0'; 3379 *dst++ = upperCase ? 'X': 'x'; 3380 *dst++ = '0'; 3381 if (hexDigits > 1) { 3382 *dst++ = '.'; 3383 memset (dst, '0', hexDigits - 1); 3384 dst += hexDigits - 1; 3385 } 3386 *dst++ = upperCase ? 'P': 'p'; 3387 *dst++ = '0'; 3388 break; 3389 3390 case fcNormal: 3391 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3392 break; 3393 } 3394 3395 *dst = 0; 3396 3397 return static_cast<unsigned int>(dst - p); 3398 } 3399 3400 /* Does the hard work of outputting the correctly rounded hexadecimal 3401 form of a normal floating point number with the specified number of 3402 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3403 digits necessary to print the value precisely is output. */ 3404 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3405 bool upperCase, 3406 roundingMode rounding_mode) const { 3407 unsigned int count, valueBits, shift, partsCount, outputDigits; 3408 const char *hexDigitChars; 3409 const integerPart *significand; 3410 char *p; 3411 bool roundUp; 3412 3413 *dst++ = '0'; 3414 *dst++ = upperCase ? 'X': 'x'; 3415 3416 roundUp = false; 3417 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3418 3419 significand = significandParts(); 3420 partsCount = partCount(); 3421 3422 /* +3 because the first digit only uses the single integer bit, so 3423 we have 3 virtual zero most-significant-bits. */ 3424 valueBits = semantics->precision + 3; 3425 shift = integerPartWidth - valueBits % integerPartWidth; 3426 3427 /* The natural number of digits required ignoring trailing 3428 insignificant zeroes. */ 3429 outputDigits = (valueBits - significandLSB () + 3) / 4; 3430 3431 /* hexDigits of zero means use the required number for the 3432 precision. Otherwise, see if we are truncating. If we are, 3433 find out if we need to round away from zero. */ 3434 if (hexDigits) { 3435 if (hexDigits < outputDigits) { 3436 /* We are dropping non-zero bits, so need to check how to round. 3437 "bits" is the number of dropped bits. */ 3438 unsigned int bits; 3439 lostFraction fraction; 3440 3441 bits = valueBits - hexDigits * 4; 3442 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3443 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3444 } 3445 outputDigits = hexDigits; 3446 } 3447 3448 /* Write the digits consecutively, and start writing in the location 3449 of the hexadecimal point. We move the most significant digit 3450 left and add the hexadecimal point later. */ 3451 p = ++dst; 3452 3453 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3454 3455 while (outputDigits && count) { 3456 integerPart part; 3457 3458 /* Put the most significant integerPartWidth bits in "part". */ 3459 if (--count == partsCount) 3460 part = 0; /* An imaginary higher zero part. */ 3461 else 3462 part = significand[count] << shift; 3463 3464 if (count && shift) 3465 part |= significand[count - 1] >> (integerPartWidth - shift); 3466 3467 /* Convert as much of "part" to hexdigits as we can. */ 3468 unsigned int curDigits = integerPartWidth / 4; 3469 3470 if (curDigits > outputDigits) 3471 curDigits = outputDigits; 3472 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3473 outputDigits -= curDigits; 3474 } 3475 3476 if (roundUp) { 3477 char *q = dst; 3478 3479 /* Note that hexDigitChars has a trailing '0'. */ 3480 do { 3481 q--; 3482 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3483 } while (*q == '0'); 3484 assert(q >= p); 3485 } else { 3486 /* Add trailing zeroes. */ 3487 memset (dst, '0', outputDigits); 3488 dst += outputDigits; 3489 } 3490 3491 /* Move the most significant digit to before the point, and if there 3492 is something after the decimal point add it. This must come 3493 after rounding above. */ 3494 p[-1] = p[0]; 3495 if (dst -1 == p) 3496 dst--; 3497 else 3498 p[0] = '.'; 3499 3500 /* Finally output the exponent. */ 3501 *dst++ = upperCase ? 'P': 'p'; 3502 3503 return writeSignedDecimal (dst, exponent); 3504 } 3505 3506 hash_code hash_value(const IEEEFloat &Arg) { 3507 if (!Arg.isFiniteNonZero()) 3508 return hash_combine((uint8_t)Arg.category, 3509 // NaN has no sign, fix it at zero. 3510 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3511 Arg.semantics->precision); 3512 3513 // Normal floats need their exponent and significand hashed. 3514 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3515 Arg.semantics->precision, Arg.exponent, 3516 hash_combine_range( 3517 Arg.significandParts(), 3518 Arg.significandParts() + Arg.partCount())); 3519 } 3520 3521 // Conversion from APFloat to/from host float/double. It may eventually be 3522 // possible to eliminate these and have everybody deal with APFloats, but that 3523 // will take a while. This approach will not easily extend to long double. 3524 // Current implementation requires integerPartWidth==64, which is correct at 3525 // the moment but could be made more general. 3526 3527 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3528 // the actual IEEE respresentations. We compensate for that here. 3529 3530 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3531 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3532 assert(partCount()==2); 3533 3534 uint64_t myexponent, mysignificand; 3535 3536 if (isFiniteNonZero()) { 3537 myexponent = exponent+16383; //bias 3538 mysignificand = significandParts()[0]; 3539 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3540 myexponent = 0; // denormal 3541 } else if (category==fcZero) { 3542 myexponent = 0; 3543 mysignificand = 0; 3544 } else if (category==fcInfinity) { 3545 myexponent = 0x7fff; 3546 mysignificand = 0x8000000000000000ULL; 3547 } else { 3548 assert(category == fcNaN && "Unknown category"); 3549 myexponent = 0x7fff; 3550 mysignificand = significandParts()[0]; 3551 } 3552 3553 uint64_t words[2]; 3554 words[0] = mysignificand; 3555 words[1] = ((uint64_t)(sign & 1) << 15) | 3556 (myexponent & 0x7fffLL); 3557 return APInt(80, words); 3558 } 3559 3560 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3561 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3562 assert(partCount()==2); 3563 3564 uint64_t words[2]; 3565 opStatus fs; 3566 bool losesInfo; 3567 3568 // Convert number to double. To avoid spurious underflows, we re- 3569 // normalize against the "double" minExponent first, and only *then* 3570 // truncate the mantissa. The result of that second conversion 3571 // may be inexact, but should never underflow. 3572 // Declare fltSemantics before APFloat that uses it (and 3573 // saves pointer to it) to ensure correct destruction order. 3574 fltSemantics extendedSemantics = *semantics; 3575 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3576 IEEEFloat extended(*this); 3577 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3578 assert(fs == opOK && !losesInfo); 3579 (void)fs; 3580 3581 IEEEFloat u(extended); 3582 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3583 assert(fs == opOK || fs == opInexact); 3584 (void)fs; 3585 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3586 3587 // If conversion was exact or resulted in a special case, we're done; 3588 // just set the second double to zero. Otherwise, re-convert back to 3589 // the extended format and compute the difference. This now should 3590 // convert exactly to double. 3591 if (u.isFiniteNonZero() && losesInfo) { 3592 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3593 assert(fs == opOK && !losesInfo); 3594 (void)fs; 3595 3596 IEEEFloat v(extended); 3597 v.subtract(u, rmNearestTiesToEven); 3598 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3599 assert(fs == opOK && !losesInfo); 3600 (void)fs; 3601 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3602 } else { 3603 words[1] = 0; 3604 } 3605 3606 return APInt(128, words); 3607 } 3608 3609 template <const fltSemantics &S> 3610 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3611 assert(semantics == &S); 3612 const int bias = 3613 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1); 3614 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3615 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3616 constexpr integerPart integer_bit = 3617 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3618 constexpr uint64_t significand_mask = integer_bit - 1; 3619 constexpr unsigned int exponent_bits = 3620 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits) 3621 : S.sizeInBits; 3622 static_assert(exponent_bits < 64); 3623 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3624 3625 uint64_t myexponent; 3626 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3627 mysignificand; 3628 3629 if (isFiniteNonZero()) { 3630 myexponent = exponent + bias; 3631 std::copy_n(significandParts(), mysignificand.size(), 3632 mysignificand.begin()); 3633 if (myexponent == 1 && 3634 !(significandParts()[integer_bit_part] & integer_bit)) 3635 myexponent = 0; // denormal 3636 } else if (category == fcZero) { 3637 if (!S.hasZero) 3638 llvm_unreachable("semantics does not support zero!"); 3639 myexponent = ::exponentZero(S) + bias; 3640 mysignificand.fill(0); 3641 } else if (category == fcInfinity) { 3642 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3643 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3644 llvm_unreachable("semantics don't support inf!"); 3645 myexponent = ::exponentInf(S) + bias; 3646 mysignificand.fill(0); 3647 } else { 3648 assert(category == fcNaN && "Unknown category!"); 3649 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3650 llvm_unreachable("semantics don't support NaN!"); 3651 myexponent = ::exponentNaN(S) + bias; 3652 std::copy_n(significandParts(), mysignificand.size(), 3653 mysignificand.begin()); 3654 } 3655 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3656 auto words_iter = 3657 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3658 if constexpr (significand_mask != 0) { 3659 // Clear the integer bit. 3660 words[mysignificand.size() - 1] &= significand_mask; 3661 } 3662 std::fill(words_iter, words.end(), uint64_t{0}); 3663 constexpr size_t last_word = words.size() - 1; 3664 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3665 << ((S.sizeInBits - 1) % 64); 3666 words[last_word] |= shifted_sign; 3667 uint64_t shifted_exponent = (myexponent & exponent_mask) 3668 << (trailing_significand_bits % 64); 3669 words[last_word] |= shifted_exponent; 3670 if constexpr (last_word == 0) { 3671 return APInt(S.sizeInBits, words[0]); 3672 } 3673 return APInt(S.sizeInBits, words); 3674 } 3675 3676 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3677 assert(partCount() == 2); 3678 return convertIEEEFloatToAPInt<semIEEEquad>(); 3679 } 3680 3681 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3682 assert(partCount()==1); 3683 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3684 } 3685 3686 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3687 assert(partCount()==1); 3688 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3689 } 3690 3691 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3692 assert(partCount() == 1); 3693 return convertIEEEFloatToAPInt<semBFloat>(); 3694 } 3695 3696 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3697 assert(partCount()==1); 3698 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3699 } 3700 3701 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3702 assert(partCount() == 1); 3703 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3704 } 3705 3706 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3707 assert(partCount() == 1); 3708 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3709 } 3710 3711 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3712 assert(partCount() == 1); 3713 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3714 } 3715 3716 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3717 assert(partCount() == 1); 3718 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3719 } 3720 3721 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3722 assert(partCount() == 1); 3723 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3724 } 3725 3726 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3727 assert(partCount() == 1); 3728 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3729 } 3730 3731 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { 3732 assert(partCount() == 1); 3733 return convertIEEEFloatToAPInt<semFloat8E3M4>(); 3734 } 3735 3736 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3737 assert(partCount() == 1); 3738 return convertIEEEFloatToAPInt<semFloatTF32>(); 3739 } 3740 3741 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const { 3742 assert(partCount() == 1); 3743 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>(); 3744 } 3745 3746 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3747 assert(partCount() == 1); 3748 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3749 } 3750 3751 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3752 assert(partCount() == 1); 3753 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3754 } 3755 3756 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3757 assert(partCount() == 1); 3758 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3759 } 3760 3761 // This function creates an APInt that is just a bit map of the floating 3762 // point constant as it would appear in memory. It is not a conversion, 3763 // and treating the result as a normal integer is unlikely to be useful. 3764 3765 APInt IEEEFloat::bitcastToAPInt() const { 3766 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3767 return convertHalfAPFloatToAPInt(); 3768 3769 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3770 return convertBFloatAPFloatToAPInt(); 3771 3772 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3773 return convertFloatAPFloatToAPInt(); 3774 3775 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3776 return convertDoubleAPFloatToAPInt(); 3777 3778 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3779 return convertQuadrupleAPFloatToAPInt(); 3780 3781 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3782 return convertPPCDoubleDoubleAPFloatToAPInt(); 3783 3784 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3785 return convertFloat8E5M2APFloatToAPInt(); 3786 3787 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3788 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3789 3790 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3791 return convertFloat8E4M3APFloatToAPInt(); 3792 3793 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3794 return convertFloat8E4M3FNAPFloatToAPInt(); 3795 3796 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3797 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3798 3799 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3800 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3801 3802 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) 3803 return convertFloat8E3M4APFloatToAPInt(); 3804 3805 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3806 return convertFloatTF32APFloatToAPInt(); 3807 3808 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU) 3809 return convertFloat8E8M0FNUAPFloatToAPInt(); 3810 3811 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3812 return convertFloat6E3M2FNAPFloatToAPInt(); 3813 3814 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3815 return convertFloat6E2M3FNAPFloatToAPInt(); 3816 3817 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3818 return convertFloat4E2M1FNAPFloatToAPInt(); 3819 3820 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3821 "unknown format!"); 3822 return convertF80LongDoubleAPFloatToAPInt(); 3823 } 3824 3825 float IEEEFloat::convertToFloat() const { 3826 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3827 "Float semantics are not IEEEsingle"); 3828 APInt api = bitcastToAPInt(); 3829 return api.bitsToFloat(); 3830 } 3831 3832 double IEEEFloat::convertToDouble() const { 3833 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3834 "Float semantics are not IEEEdouble"); 3835 APInt api = bitcastToAPInt(); 3836 return api.bitsToDouble(); 3837 } 3838 3839 #ifdef HAS_IEE754_FLOAT128 3840 float128 IEEEFloat::convertToQuad() const { 3841 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3842 "Float semantics are not IEEEquads"); 3843 APInt api = bitcastToAPInt(); 3844 return api.bitsToQuad(); 3845 } 3846 #endif 3847 3848 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3849 /// does not support these bit patterns: 3850 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3851 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3852 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3853 /// exponent = 0, integer bit 1 ("pseudodenormal") 3854 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3855 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3856 uint64_t i1 = api.getRawData()[0]; 3857 uint64_t i2 = api.getRawData()[1]; 3858 uint64_t myexponent = (i2 & 0x7fff); 3859 uint64_t mysignificand = i1; 3860 uint8_t myintegerbit = mysignificand >> 63; 3861 3862 initialize(&semX87DoubleExtended); 3863 assert(partCount()==2); 3864 3865 sign = static_cast<unsigned int>(i2>>15); 3866 if (myexponent == 0 && mysignificand == 0) { 3867 makeZero(sign); 3868 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3869 makeInf(sign); 3870 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3871 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3872 category = fcNaN; 3873 exponent = exponentNaN(); 3874 significandParts()[0] = mysignificand; 3875 significandParts()[1] = 0; 3876 } else { 3877 category = fcNormal; 3878 exponent = myexponent - 16383; 3879 significandParts()[0] = mysignificand; 3880 significandParts()[1] = 0; 3881 if (myexponent==0) // denormal 3882 exponent = -16382; 3883 } 3884 } 3885 3886 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3887 uint64_t i1 = api.getRawData()[0]; 3888 uint64_t i2 = api.getRawData()[1]; 3889 opStatus fs; 3890 bool losesInfo; 3891 3892 // Get the first double and convert to our format. 3893 initFromDoubleAPInt(APInt(64, i1)); 3894 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3895 assert(fs == opOK && !losesInfo); 3896 (void)fs; 3897 3898 // Unless we have a special case, add in second double. 3899 if (isFiniteNonZero()) { 3900 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3901 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3902 assert(fs == opOK && !losesInfo); 3903 (void)fs; 3904 3905 add(v, rmNearestTiesToEven); 3906 } 3907 } 3908 3909 // The E8M0 format has the following characteristics: 3910 // It is an 8-bit unsigned format with only exponents (no actual significand). 3911 // No encodings for {zero, infinities or denorms}. 3912 // NaN is represented by all 1's. 3913 // Bias is 127. 3914 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) { 3915 const uint64_t exponent_mask = 0xff; 3916 uint64_t val = api.getRawData()[0]; 3917 uint64_t myexponent = (val & exponent_mask); 3918 3919 initialize(&semFloat8E8M0FNU); 3920 assert(partCount() == 1); 3921 3922 // This format has unsigned representation only 3923 sign = 0; 3924 3925 // Set the significand 3926 // This format does not have any significand but the 'Pth' precision bit is 3927 // always set to 1 for consistency in APFloat's internal representation. 3928 uint64_t mysignificand = 1; 3929 significandParts()[0] = mysignificand; 3930 3931 // This format can either have a NaN or fcNormal 3932 // All 1's i.e. 255 is a NaN 3933 if (val == exponent_mask) { 3934 category = fcNaN; 3935 exponent = exponentNaN(); 3936 return; 3937 } 3938 // Handle fcNormal... 3939 category = fcNormal; 3940 exponent = myexponent - 127; // 127 is bias 3941 return; 3942 } 3943 template <const fltSemantics &S> 3944 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3945 assert(api.getBitWidth() == S.sizeInBits); 3946 constexpr integerPart integer_bit = integerPart{1} 3947 << ((S.precision - 1) % integerPartWidth); 3948 constexpr uint64_t significand_mask = integer_bit - 1; 3949 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3950 constexpr unsigned int stored_significand_parts = 3951 partCountForBits(trailing_significand_bits); 3952 constexpr unsigned int exponent_bits = 3953 S.sizeInBits - 1 - trailing_significand_bits; 3954 static_assert(exponent_bits < 64); 3955 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3956 constexpr int bias = -(S.minExponent - 1); 3957 3958 // Copy the bits of the significand. We need to clear out the exponent and 3959 // sign bit in the last word. 3960 std::array<integerPart, stored_significand_parts> mysignificand; 3961 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3962 if constexpr (significand_mask != 0) { 3963 mysignificand[mysignificand.size() - 1] &= significand_mask; 3964 } 3965 3966 // We assume the last word holds the sign bit, the exponent, and potentially 3967 // some of the trailing significand field. 3968 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3969 uint64_t myexponent = 3970 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3971 3972 initialize(&S); 3973 assert(partCount() == mysignificand.size()); 3974 3975 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3976 3977 bool all_zero_significand = 3978 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3979 3980 bool is_zero = myexponent == 0 && all_zero_significand; 3981 3982 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3983 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3984 makeInf(sign); 3985 return; 3986 } 3987 } 3988 3989 bool is_nan = false; 3990 3991 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3992 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3993 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3994 bool all_ones_significand = 3995 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3996 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3997 (!significand_mask || 3998 mysignificand[mysignificand.size() - 1] == significand_mask); 3999 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 4000 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 4001 is_nan = is_zero && sign; 4002 } 4003 4004 if (is_nan) { 4005 category = fcNaN; 4006 exponent = ::exponentNaN(S); 4007 std::copy_n(mysignificand.begin(), mysignificand.size(), 4008 significandParts()); 4009 return; 4010 } 4011 4012 if (is_zero) { 4013 makeZero(sign); 4014 return; 4015 } 4016 4017 category = fcNormal; 4018 exponent = myexponent - bias; 4019 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 4020 if (myexponent == 0) // denormal 4021 exponent = S.minExponent; 4022 else 4023 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 4024 } 4025 4026 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 4027 initFromIEEEAPInt<semIEEEquad>(api); 4028 } 4029 4030 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 4031 initFromIEEEAPInt<semIEEEdouble>(api); 4032 } 4033 4034 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 4035 initFromIEEEAPInt<semIEEEsingle>(api); 4036 } 4037 4038 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 4039 initFromIEEEAPInt<semBFloat>(api); 4040 } 4041 4042 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 4043 initFromIEEEAPInt<semIEEEhalf>(api); 4044 } 4045 4046 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 4047 initFromIEEEAPInt<semFloat8E5M2>(api); 4048 } 4049 4050 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 4051 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 4052 } 4053 4054 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 4055 initFromIEEEAPInt<semFloat8E4M3>(api); 4056 } 4057 4058 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 4059 initFromIEEEAPInt<semFloat8E4M3FN>(api); 4060 } 4061 4062 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 4063 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 4064 } 4065 4066 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 4067 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 4068 } 4069 4070 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { 4071 initFromIEEEAPInt<semFloat8E3M4>(api); 4072 } 4073 4074 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 4075 initFromIEEEAPInt<semFloatTF32>(api); 4076 } 4077 4078 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 4079 initFromIEEEAPInt<semFloat6E3M2FN>(api); 4080 } 4081 4082 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 4083 initFromIEEEAPInt<semFloat6E2M3FN>(api); 4084 } 4085 4086 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 4087 initFromIEEEAPInt<semFloat4E2M1FN>(api); 4088 } 4089 4090 /// Treat api as containing the bits of a floating point number. 4091 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 4092 assert(api.getBitWidth() == Sem->sizeInBits); 4093 if (Sem == &semIEEEhalf) 4094 return initFromHalfAPInt(api); 4095 if (Sem == &semBFloat) 4096 return initFromBFloatAPInt(api); 4097 if (Sem == &semIEEEsingle) 4098 return initFromFloatAPInt(api); 4099 if (Sem == &semIEEEdouble) 4100 return initFromDoubleAPInt(api); 4101 if (Sem == &semX87DoubleExtended) 4102 return initFromF80LongDoubleAPInt(api); 4103 if (Sem == &semIEEEquad) 4104 return initFromQuadrupleAPInt(api); 4105 if (Sem == &semPPCDoubleDoubleLegacy) 4106 return initFromPPCDoubleDoubleAPInt(api); 4107 if (Sem == &semFloat8E5M2) 4108 return initFromFloat8E5M2APInt(api); 4109 if (Sem == &semFloat8E5M2FNUZ) 4110 return initFromFloat8E5M2FNUZAPInt(api); 4111 if (Sem == &semFloat8E4M3) 4112 return initFromFloat8E4M3APInt(api); 4113 if (Sem == &semFloat8E4M3FN) 4114 return initFromFloat8E4M3FNAPInt(api); 4115 if (Sem == &semFloat8E4M3FNUZ) 4116 return initFromFloat8E4M3FNUZAPInt(api); 4117 if (Sem == &semFloat8E4M3B11FNUZ) 4118 return initFromFloat8E4M3B11FNUZAPInt(api); 4119 if (Sem == &semFloat8E3M4) 4120 return initFromFloat8E3M4APInt(api); 4121 if (Sem == &semFloatTF32) 4122 return initFromFloatTF32APInt(api); 4123 if (Sem == &semFloat8E8M0FNU) 4124 return initFromFloat8E8M0FNUAPInt(api); 4125 if (Sem == &semFloat6E3M2FN) 4126 return initFromFloat6E3M2FNAPInt(api); 4127 if (Sem == &semFloat6E2M3FN) 4128 return initFromFloat6E2M3FNAPInt(api); 4129 if (Sem == &semFloat4E2M1FN) 4130 return initFromFloat4E2M1FNAPInt(api); 4131 4132 llvm_unreachable(nullptr); 4133 } 4134 4135 /// Make this number the largest magnitude normal number in the given 4136 /// semantics. 4137 void IEEEFloat::makeLargest(bool Negative) { 4138 if (Negative && !semantics->hasSignedRepr) 4139 llvm_unreachable( 4140 "This floating point format does not support signed values"); 4141 // We want (in interchange format): 4142 // sign = {Negative} 4143 // exponent = 1..10 4144 // significand = 1..1 4145 category = fcNormal; 4146 sign = Negative; 4147 exponent = semantics->maxExponent; 4148 4149 // Use memset to set all but the highest integerPart to all ones. 4150 integerPart *significand = significandParts(); 4151 unsigned PartCount = partCount(); 4152 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4153 4154 // Set the high integerPart especially setting all unused top bits for 4155 // internal consistency. 4156 const unsigned NumUnusedHighBits = 4157 PartCount*integerPartWidth - semantics->precision; 4158 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4159 ? (~integerPart(0) >> NumUnusedHighBits) 4160 : 0; 4161 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4162 semantics->nanEncoding == fltNanEncoding::AllOnes && 4163 (semantics->precision > 1)) 4164 significand[0] &= ~integerPart(1); 4165 } 4166 4167 /// Make this number the smallest magnitude denormal number in the given 4168 /// semantics. 4169 void IEEEFloat::makeSmallest(bool Negative) { 4170 if (Negative && !semantics->hasSignedRepr) 4171 llvm_unreachable( 4172 "This floating point format does not support signed values"); 4173 // We want (in interchange format): 4174 // sign = {Negative} 4175 // exponent = 0..0 4176 // significand = 0..01 4177 category = fcNormal; 4178 sign = Negative; 4179 exponent = semantics->minExponent; 4180 APInt::tcSet(significandParts(), 1, partCount()); 4181 } 4182 4183 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4184 if (Negative && !semantics->hasSignedRepr) 4185 llvm_unreachable( 4186 "This floating point format does not support signed values"); 4187 // We want (in interchange format): 4188 // sign = {Negative} 4189 // exponent = 0..0 4190 // significand = 10..0 4191 4192 category = fcNormal; 4193 zeroSignificand(); 4194 sign = Negative; 4195 exponent = semantics->minExponent; 4196 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4197 } 4198 4199 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4200 initFromAPInt(&Sem, API); 4201 } 4202 4203 IEEEFloat::IEEEFloat(float f) { 4204 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4205 } 4206 4207 IEEEFloat::IEEEFloat(double d) { 4208 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4209 } 4210 4211 namespace { 4212 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4213 Buffer.append(Str.begin(), Str.end()); 4214 } 4215 4216 /// Removes data from the given significand until it is no more 4217 /// precise than is required for the desired precision. 4218 void AdjustToPrecision(APInt &significand, 4219 int &exp, unsigned FormatPrecision) { 4220 unsigned bits = significand.getActiveBits(); 4221 4222 // 196/59 is a very slight overestimate of lg_2(10). 4223 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4224 4225 if (bits <= bitsRequired) return; 4226 4227 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4228 if (!tensRemovable) return; 4229 4230 exp += tensRemovable; 4231 4232 APInt divisor(significand.getBitWidth(), 1); 4233 APInt powten(significand.getBitWidth(), 10); 4234 while (true) { 4235 if (tensRemovable & 1) 4236 divisor *= powten; 4237 tensRemovable >>= 1; 4238 if (!tensRemovable) break; 4239 powten *= powten; 4240 } 4241 4242 significand = significand.udiv(divisor); 4243 4244 // Truncate the significand down to its active bit count. 4245 significand = significand.trunc(significand.getActiveBits()); 4246 } 4247 4248 4249 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4250 int &exp, unsigned FormatPrecision) { 4251 unsigned N = buffer.size(); 4252 if (N <= FormatPrecision) return; 4253 4254 // The most significant figures are the last ones in the buffer. 4255 unsigned FirstSignificant = N - FormatPrecision; 4256 4257 // Round. 4258 // FIXME: this probably shouldn't use 'round half up'. 4259 4260 // Rounding down is just a truncation, except we also want to drop 4261 // trailing zeros from the new result. 4262 if (buffer[FirstSignificant - 1] < '5') { 4263 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4264 FirstSignificant++; 4265 4266 exp += FirstSignificant; 4267 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4268 return; 4269 } 4270 4271 // Rounding up requires a decimal add-with-carry. If we continue 4272 // the carry, the newly-introduced zeros will just be truncated. 4273 for (unsigned I = FirstSignificant; I != N; ++I) { 4274 if (buffer[I] == '9') { 4275 FirstSignificant++; 4276 } else { 4277 buffer[I]++; 4278 break; 4279 } 4280 } 4281 4282 // If we carried through, we have exactly one digit of precision. 4283 if (FirstSignificant == N) { 4284 exp += FirstSignificant; 4285 buffer.clear(); 4286 buffer.push_back('1'); 4287 return; 4288 } 4289 4290 exp += FirstSignificant; 4291 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4292 } 4293 4294 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4295 APInt significand, unsigned FormatPrecision, 4296 unsigned FormatMaxPadding, bool TruncateZero) { 4297 const int semanticsPrecision = significand.getBitWidth(); 4298 4299 if (isNeg) 4300 Str.push_back('-'); 4301 4302 // Set FormatPrecision if zero. We want to do this before we 4303 // truncate trailing zeros, as those are part of the precision. 4304 if (!FormatPrecision) { 4305 // We use enough digits so the number can be round-tripped back to an 4306 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4307 // Accurately" by Steele and White. 4308 // FIXME: Using a formula based purely on the precision is conservative; 4309 // we can print fewer digits depending on the actual value being printed. 4310 4311 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4312 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4313 } 4314 4315 // Ignore trailing binary zeros. 4316 int trailingZeros = significand.countr_zero(); 4317 exp += trailingZeros; 4318 significand.lshrInPlace(trailingZeros); 4319 4320 // Change the exponent from 2^e to 10^e. 4321 if (exp == 0) { 4322 // Nothing to do. 4323 } else if (exp > 0) { 4324 // Just shift left. 4325 significand = significand.zext(semanticsPrecision + exp); 4326 significand <<= exp; 4327 exp = 0; 4328 } else { /* exp < 0 */ 4329 int texp = -exp; 4330 4331 // We transform this using the identity: 4332 // (N)(2^-e) == (N)(5^e)(10^-e) 4333 // This means we have to multiply N (the significand) by 5^e. 4334 // To avoid overflow, we have to operate on numbers large 4335 // enough to store N * 5^e: 4336 // log2(N * 5^e) == log2(N) + e * log2(5) 4337 // <= semantics->precision + e * 137 / 59 4338 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4339 4340 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4341 4342 // Multiply significand by 5^e. 4343 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4344 significand = significand.zext(precision); 4345 APInt five_to_the_i(precision, 5); 4346 while (true) { 4347 if (texp & 1) 4348 significand *= five_to_the_i; 4349 4350 texp >>= 1; 4351 if (!texp) 4352 break; 4353 five_to_the_i *= five_to_the_i; 4354 } 4355 } 4356 4357 AdjustToPrecision(significand, exp, FormatPrecision); 4358 4359 SmallVector<char, 256> buffer; 4360 4361 // Fill the buffer. 4362 unsigned precision = significand.getBitWidth(); 4363 if (precision < 4) { 4364 // We need enough precision to store the value 10. 4365 precision = 4; 4366 significand = significand.zext(precision); 4367 } 4368 APInt ten(precision, 10); 4369 APInt digit(precision, 0); 4370 4371 bool inTrail = true; 4372 while (significand != 0) { 4373 // digit <- significand % 10 4374 // significand <- significand / 10 4375 APInt::udivrem(significand, ten, significand, digit); 4376 4377 unsigned d = digit.getZExtValue(); 4378 4379 // Drop trailing zeros. 4380 if (inTrail && !d) 4381 exp++; 4382 else { 4383 buffer.push_back((char) ('0' + d)); 4384 inTrail = false; 4385 } 4386 } 4387 4388 assert(!buffer.empty() && "no characters in buffer!"); 4389 4390 // Drop down to FormatPrecision. 4391 // TODO: don't do more precise calculations above than are required. 4392 AdjustToPrecision(buffer, exp, FormatPrecision); 4393 4394 unsigned NDigits = buffer.size(); 4395 4396 // Check whether we should use scientific notation. 4397 bool FormatScientific; 4398 if (!FormatMaxPadding) 4399 FormatScientific = true; 4400 else { 4401 if (exp >= 0) { 4402 // 765e3 --> 765000 4403 // ^^^ 4404 // But we shouldn't make the number look more precise than it is. 4405 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4406 NDigits + (unsigned) exp > FormatPrecision); 4407 } else { 4408 // Power of the most significant digit. 4409 int MSD = exp + (int) (NDigits - 1); 4410 if (MSD >= 0) { 4411 // 765e-2 == 7.65 4412 FormatScientific = false; 4413 } else { 4414 // 765e-5 == 0.00765 4415 // ^ ^^ 4416 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4417 } 4418 } 4419 } 4420 4421 // Scientific formatting is pretty straightforward. 4422 if (FormatScientific) { 4423 exp += (NDigits - 1); 4424 4425 Str.push_back(buffer[NDigits-1]); 4426 Str.push_back('.'); 4427 if (NDigits == 1 && TruncateZero) 4428 Str.push_back('0'); 4429 else 4430 for (unsigned I = 1; I != NDigits; ++I) 4431 Str.push_back(buffer[NDigits-1-I]); 4432 // Fill with zeros up to FormatPrecision. 4433 if (!TruncateZero && FormatPrecision > NDigits - 1) 4434 Str.append(FormatPrecision - NDigits + 1, '0'); 4435 // For !TruncateZero we use lower 'e'. 4436 Str.push_back(TruncateZero ? 'E' : 'e'); 4437 4438 Str.push_back(exp >= 0 ? '+' : '-'); 4439 if (exp < 0) 4440 exp = -exp; 4441 SmallVector<char, 6> expbuf; 4442 do { 4443 expbuf.push_back((char) ('0' + (exp % 10))); 4444 exp /= 10; 4445 } while (exp); 4446 // Exponent always at least two digits if we do not truncate zeros. 4447 if (!TruncateZero && expbuf.size() < 2) 4448 expbuf.push_back('0'); 4449 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4450 Str.push_back(expbuf[E-1-I]); 4451 return; 4452 } 4453 4454 // Non-scientific, positive exponents. 4455 if (exp >= 0) { 4456 for (unsigned I = 0; I != NDigits; ++I) 4457 Str.push_back(buffer[NDigits-1-I]); 4458 for (unsigned I = 0; I != (unsigned) exp; ++I) 4459 Str.push_back('0'); 4460 return; 4461 } 4462 4463 // Non-scientific, negative exponents. 4464 4465 // The number of digits to the left of the decimal point. 4466 int NWholeDigits = exp + (int) NDigits; 4467 4468 unsigned I = 0; 4469 if (NWholeDigits > 0) { 4470 for (; I != (unsigned) NWholeDigits; ++I) 4471 Str.push_back(buffer[NDigits-I-1]); 4472 Str.push_back('.'); 4473 } else { 4474 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4475 4476 Str.push_back('0'); 4477 Str.push_back('.'); 4478 for (unsigned Z = 1; Z != NZeros; ++Z) 4479 Str.push_back('0'); 4480 } 4481 4482 for (; I != NDigits; ++I) 4483 Str.push_back(buffer[NDigits-I-1]); 4484 4485 } 4486 } // namespace 4487 4488 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4489 unsigned FormatMaxPadding, bool TruncateZero) const { 4490 switch (category) { 4491 case fcInfinity: 4492 if (isNegative()) 4493 return append(Str, "-Inf"); 4494 else 4495 return append(Str, "+Inf"); 4496 4497 case fcNaN: return append(Str, "NaN"); 4498 4499 case fcZero: 4500 if (isNegative()) 4501 Str.push_back('-'); 4502 4503 if (!FormatMaxPadding) { 4504 if (TruncateZero) 4505 append(Str, "0.0E+0"); 4506 else { 4507 append(Str, "0.0"); 4508 if (FormatPrecision > 1) 4509 Str.append(FormatPrecision - 1, '0'); 4510 append(Str, "e+00"); 4511 } 4512 } else 4513 Str.push_back('0'); 4514 return; 4515 4516 case fcNormal: 4517 break; 4518 } 4519 4520 // Decompose the number into an APInt and an exponent. 4521 int exp = exponent - ((int) semantics->precision - 1); 4522 APInt significand( 4523 semantics->precision, 4524 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4525 4526 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4527 FormatMaxPadding, TruncateZero); 4528 4529 } 4530 4531 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4532 // Special floats and denormals have no exact inverse. 4533 if (!isFiniteNonZero()) 4534 return false; 4535 4536 // Check that the number is a power of two by making sure that only the 4537 // integer bit is set in the significand. 4538 if (significandLSB() != semantics->precision - 1) 4539 return false; 4540 4541 // Get the inverse. 4542 IEEEFloat reciprocal(*semantics, 1ULL); 4543 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4544 return false; 4545 4546 // Avoid multiplication with a denormal, it is not safe on all platforms and 4547 // may be slower than a normal division. 4548 if (reciprocal.isDenormal()) 4549 return false; 4550 4551 assert(reciprocal.isFiniteNonZero() && 4552 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4553 4554 if (inv) 4555 *inv = APFloat(reciprocal, *semantics); 4556 4557 return true; 4558 } 4559 4560 int IEEEFloat::getExactLog2Abs() const { 4561 if (!isFinite() || isZero()) 4562 return INT_MIN; 4563 4564 const integerPart *Parts = significandParts(); 4565 const int PartCount = partCountForBits(semantics->precision); 4566 4567 int PopCount = 0; 4568 for (int i = 0; i < PartCount; ++i) { 4569 PopCount += llvm::popcount(Parts[i]); 4570 if (PopCount > 1) 4571 return INT_MIN; 4572 } 4573 4574 if (exponent != semantics->minExponent) 4575 return exponent; 4576 4577 int CountrParts = 0; 4578 for (int i = 0; i < PartCount; 4579 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4580 if (Parts[i] != 0) { 4581 return exponent - semantics->precision + CountrParts + 4582 llvm::countr_zero(Parts[i]) + 1; 4583 } 4584 } 4585 4586 llvm_unreachable("didn't find the set bit"); 4587 } 4588 4589 bool IEEEFloat::isSignaling() const { 4590 if (!isNaN()) 4591 return false; 4592 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4593 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4594 return false; 4595 4596 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4597 // first bit of the trailing significand being 0. 4598 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4599 } 4600 4601 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4602 /// 4603 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4604 /// appropriate sign switching before/after the computation. 4605 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4606 // If we are performing nextDown, swap sign so we have -x. 4607 if (nextDown) 4608 changeSign(); 4609 4610 // Compute nextUp(x) 4611 opStatus result = opOK; 4612 4613 // Handle each float category separately. 4614 switch (category) { 4615 case fcInfinity: 4616 // nextUp(+inf) = +inf 4617 if (!isNegative()) 4618 break; 4619 // nextUp(-inf) = -getLargest() 4620 makeLargest(true); 4621 break; 4622 case fcNaN: 4623 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4624 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4625 // change the payload. 4626 if (isSignaling()) { 4627 result = opInvalidOp; 4628 // For consistency, propagate the sign of the sNaN to the qNaN. 4629 makeNaN(false, isNegative(), nullptr); 4630 } 4631 break; 4632 case fcZero: 4633 // nextUp(pm 0) = +getSmallest() 4634 makeSmallest(false); 4635 break; 4636 case fcNormal: 4637 // nextUp(-getSmallest()) = -0 4638 if (isSmallest() && isNegative()) { 4639 APInt::tcSet(significandParts(), 0, partCount()); 4640 category = fcZero; 4641 exponent = 0; 4642 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4643 sign = false; 4644 if (!semantics->hasZero) 4645 makeSmallestNormalized(false); 4646 break; 4647 } 4648 4649 if (isLargest() && !isNegative()) { 4650 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4651 // nextUp(getLargest()) == NAN 4652 makeNaN(); 4653 break; 4654 } else if (semantics->nonFiniteBehavior == 4655 fltNonfiniteBehavior::FiniteOnly) { 4656 // nextUp(getLargest()) == getLargest() 4657 break; 4658 } else { 4659 // nextUp(getLargest()) == INFINITY 4660 APInt::tcSet(significandParts(), 0, partCount()); 4661 category = fcInfinity; 4662 exponent = semantics->maxExponent + 1; 4663 break; 4664 } 4665 } 4666 4667 // nextUp(normal) == normal + inc. 4668 if (isNegative()) { 4669 // If we are negative, we need to decrement the significand. 4670 4671 // We only cross a binade boundary that requires adjusting the exponent 4672 // if: 4673 // 1. exponent != semantics->minExponent. This implies we are not in the 4674 // smallest binade or are dealing with denormals. 4675 // 2. Our significand excluding the integral bit is all zeros. 4676 bool WillCrossBinadeBoundary = 4677 exponent != semantics->minExponent && isSignificandAllZeros(); 4678 4679 // Decrement the significand. 4680 // 4681 // We always do this since: 4682 // 1. If we are dealing with a non-binade decrement, by definition we 4683 // just decrement the significand. 4684 // 2. If we are dealing with a normal -> normal binade decrement, since 4685 // we have an explicit integral bit the fact that all bits but the 4686 // integral bit are zero implies that subtracting one will yield a 4687 // significand with 0 integral bit and 1 in all other spots. Thus we 4688 // must just adjust the exponent and set the integral bit to 1. 4689 // 3. If we are dealing with a normal -> denormal binade decrement, 4690 // since we set the integral bit to 0 when we represent denormals, we 4691 // just decrement the significand. 4692 integerPart *Parts = significandParts(); 4693 APInt::tcDecrement(Parts, partCount()); 4694 4695 if (WillCrossBinadeBoundary) { 4696 // Our result is a normal number. Do the following: 4697 // 1. Set the integral bit to 1. 4698 // 2. Decrement the exponent. 4699 APInt::tcSetBit(Parts, semantics->precision - 1); 4700 exponent--; 4701 } 4702 } else { 4703 // If we are positive, we need to increment the significand. 4704 4705 // We only cross a binade boundary that requires adjusting the exponent if 4706 // the input is not a denormal and all of said input's significand bits 4707 // are set. If all of said conditions are true: clear the significand, set 4708 // the integral bit to 1, and increment the exponent. If we have a 4709 // denormal always increment since moving denormals and the numbers in the 4710 // smallest normal binade have the same exponent in our representation. 4711 // If there are only exponents, any increment always crosses the 4712 // BinadeBoundary. 4713 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) || 4714 (!isDenormal() && isSignificandAllOnes()); 4715 4716 if (WillCrossBinadeBoundary) { 4717 integerPart *Parts = significandParts(); 4718 APInt::tcSet(Parts, 0, partCount()); 4719 APInt::tcSetBit(Parts, semantics->precision - 1); 4720 assert(exponent != semantics->maxExponent && 4721 "We can not increment an exponent beyond the maxExponent allowed" 4722 " by the given floating point semantics."); 4723 exponent++; 4724 } else { 4725 incrementSignificand(); 4726 } 4727 } 4728 break; 4729 } 4730 4731 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4732 if (nextDown) 4733 changeSign(); 4734 4735 return result; 4736 } 4737 4738 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4739 return ::exponentNaN(*semantics); 4740 } 4741 4742 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4743 return ::exponentInf(*semantics); 4744 } 4745 4746 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4747 return ::exponentZero(*semantics); 4748 } 4749 4750 void IEEEFloat::makeInf(bool Negative) { 4751 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4752 llvm_unreachable("This floating point format does not support Inf"); 4753 4754 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4755 // There is no Inf, so make NaN instead. 4756 makeNaN(false, Negative); 4757 return; 4758 } 4759 category = fcInfinity; 4760 sign = Negative; 4761 exponent = exponentInf(); 4762 APInt::tcSet(significandParts(), 0, partCount()); 4763 } 4764 4765 void IEEEFloat::makeZero(bool Negative) { 4766 if (!semantics->hasZero) 4767 llvm_unreachable("This floating point format does not support Zero"); 4768 4769 category = fcZero; 4770 sign = Negative; 4771 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4772 // Merge negative zero to positive because 0b10000...000 is used for NaN 4773 sign = false; 4774 } 4775 exponent = exponentZero(); 4776 APInt::tcSet(significandParts(), 0, partCount()); 4777 } 4778 4779 void IEEEFloat::makeQuiet() { 4780 assert(isNaN()); 4781 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4782 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4783 } 4784 4785 int ilogb(const IEEEFloat &Arg) { 4786 if (Arg.isNaN()) 4787 return IEEEFloat::IEK_NaN; 4788 if (Arg.isZero()) 4789 return IEEEFloat::IEK_Zero; 4790 if (Arg.isInfinity()) 4791 return IEEEFloat::IEK_Inf; 4792 if (!Arg.isDenormal()) 4793 return Arg.exponent; 4794 4795 IEEEFloat Normalized(Arg); 4796 int SignificandBits = Arg.getSemantics().precision - 1; 4797 4798 Normalized.exponent += SignificandBits; 4799 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4800 return Normalized.exponent - SignificandBits; 4801 } 4802 4803 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4804 auto MaxExp = X.getSemantics().maxExponent; 4805 auto MinExp = X.getSemantics().minExponent; 4806 4807 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4808 // overflow; clamp it to a safe range before adding, but ensure that the range 4809 // is large enough that the clamp does not change the result. The range we 4810 // need to support is the difference between the largest possible exponent and 4811 // the normalized exponent of half the smallest denormal. 4812 4813 int SignificandBits = X.getSemantics().precision - 1; 4814 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4815 4816 // Clamp to one past the range ends to let normalize handle overlflow. 4817 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4818 X.normalize(RoundingMode, lfExactlyZero); 4819 if (X.isNaN()) 4820 X.makeQuiet(); 4821 return X; 4822 } 4823 4824 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4825 Exp = ilogb(Val); 4826 4827 // Quiet signalling nans. 4828 if (Exp == IEEEFloat::IEK_NaN) { 4829 IEEEFloat Quiet(Val); 4830 Quiet.makeQuiet(); 4831 return Quiet; 4832 } 4833 4834 if (Exp == IEEEFloat::IEK_Inf) 4835 return Val; 4836 4837 // 1 is added because frexp is defined to return a normalized fraction in 4838 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4839 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4840 return scalbn(Val, -Exp, RM); 4841 } 4842 4843 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4844 : Semantics(&S), 4845 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4846 assert(Semantics == &semPPCDoubleDouble); 4847 } 4848 4849 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4850 : Semantics(&S), 4851 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4852 APFloat(semIEEEdouble, uninitialized)}) { 4853 assert(Semantics == &semPPCDoubleDouble); 4854 } 4855 4856 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4857 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4858 APFloat(semIEEEdouble)}) { 4859 assert(Semantics == &semPPCDoubleDouble); 4860 } 4861 4862 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4863 : Semantics(&S), 4864 Floats(new APFloat[2]{ 4865 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4866 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4867 assert(Semantics == &semPPCDoubleDouble); 4868 } 4869 4870 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4871 APFloat &&Second) 4872 : Semantics(&S), 4873 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4874 assert(Semantics == &semPPCDoubleDouble); 4875 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4876 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4877 } 4878 4879 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4880 : Semantics(RHS.Semantics), 4881 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4882 APFloat(RHS.Floats[1])} 4883 : nullptr) { 4884 assert(Semantics == &semPPCDoubleDouble); 4885 } 4886 4887 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4888 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4889 RHS.Semantics = &semBogus; 4890 assert(Semantics == &semPPCDoubleDouble); 4891 } 4892 4893 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4894 if (Semantics == RHS.Semantics && RHS.Floats) { 4895 Floats[0] = RHS.Floats[0]; 4896 Floats[1] = RHS.Floats[1]; 4897 } else if (this != &RHS) { 4898 this->~DoubleAPFloat(); 4899 new (this) DoubleAPFloat(RHS); 4900 } 4901 return *this; 4902 } 4903 4904 // Implement addition, subtraction, multiplication and division based on: 4905 // "Software for Doubled-Precision Floating-Point Computations", 4906 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4907 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4908 const APFloat &c, const APFloat &cc, 4909 roundingMode RM) { 4910 int Status = opOK; 4911 APFloat z = a; 4912 Status |= z.add(c, RM); 4913 if (!z.isFinite()) { 4914 if (!z.isInfinity()) { 4915 Floats[0] = std::move(z); 4916 Floats[1].makeZero(/* Neg = */ false); 4917 return (opStatus)Status; 4918 } 4919 Status = opOK; 4920 auto AComparedToC = a.compareAbsoluteValue(c); 4921 z = cc; 4922 Status |= z.add(aa, RM); 4923 if (AComparedToC == APFloat::cmpGreaterThan) { 4924 // z = cc + aa + c + a; 4925 Status |= z.add(c, RM); 4926 Status |= z.add(a, RM); 4927 } else { 4928 // z = cc + aa + a + c; 4929 Status |= z.add(a, RM); 4930 Status |= z.add(c, RM); 4931 } 4932 if (!z.isFinite()) { 4933 Floats[0] = std::move(z); 4934 Floats[1].makeZero(/* Neg = */ false); 4935 return (opStatus)Status; 4936 } 4937 Floats[0] = z; 4938 APFloat zz = aa; 4939 Status |= zz.add(cc, RM); 4940 if (AComparedToC == APFloat::cmpGreaterThan) { 4941 // Floats[1] = a - z + c + zz; 4942 Floats[1] = a; 4943 Status |= Floats[1].subtract(z, RM); 4944 Status |= Floats[1].add(c, RM); 4945 Status |= Floats[1].add(zz, RM); 4946 } else { 4947 // Floats[1] = c - z + a + zz; 4948 Floats[1] = c; 4949 Status |= Floats[1].subtract(z, RM); 4950 Status |= Floats[1].add(a, RM); 4951 Status |= Floats[1].add(zz, RM); 4952 } 4953 } else { 4954 // q = a - z; 4955 APFloat q = a; 4956 Status |= q.subtract(z, RM); 4957 4958 // zz = q + c + (a - (q + z)) + aa + cc; 4959 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4960 auto zz = q; 4961 Status |= zz.add(c, RM); 4962 Status |= q.add(z, RM); 4963 Status |= q.subtract(a, RM); 4964 q.changeSign(); 4965 Status |= zz.add(q, RM); 4966 Status |= zz.add(aa, RM); 4967 Status |= zz.add(cc, RM); 4968 if (zz.isZero() && !zz.isNegative()) { 4969 Floats[0] = std::move(z); 4970 Floats[1].makeZero(/* Neg = */ false); 4971 return opOK; 4972 } 4973 Floats[0] = z; 4974 Status |= Floats[0].add(zz, RM); 4975 if (!Floats[0].isFinite()) { 4976 Floats[1].makeZero(/* Neg = */ false); 4977 return (opStatus)Status; 4978 } 4979 Floats[1] = std::move(z); 4980 Status |= Floats[1].subtract(Floats[0], RM); 4981 Status |= Floats[1].add(zz, RM); 4982 } 4983 return (opStatus)Status; 4984 } 4985 4986 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4987 const DoubleAPFloat &RHS, 4988 DoubleAPFloat &Out, 4989 roundingMode RM) { 4990 if (LHS.getCategory() == fcNaN) { 4991 Out = LHS; 4992 return opOK; 4993 } 4994 if (RHS.getCategory() == fcNaN) { 4995 Out = RHS; 4996 return opOK; 4997 } 4998 if (LHS.getCategory() == fcZero) { 4999 Out = RHS; 5000 return opOK; 5001 } 5002 if (RHS.getCategory() == fcZero) { 5003 Out = LHS; 5004 return opOK; 5005 } 5006 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 5007 LHS.isNegative() != RHS.isNegative()) { 5008 Out.makeNaN(false, Out.isNegative(), nullptr); 5009 return opInvalidOp; 5010 } 5011 if (LHS.getCategory() == fcInfinity) { 5012 Out = LHS; 5013 return opOK; 5014 } 5015 if (RHS.getCategory() == fcInfinity) { 5016 Out = RHS; 5017 return opOK; 5018 } 5019 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 5020 5021 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 5022 CC(RHS.Floats[1]); 5023 assert(&A.getSemantics() == &semIEEEdouble); 5024 assert(&AA.getSemantics() == &semIEEEdouble); 5025 assert(&C.getSemantics() == &semIEEEdouble); 5026 assert(&CC.getSemantics() == &semIEEEdouble); 5027 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 5028 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 5029 return Out.addImpl(A, AA, C, CC, RM); 5030 } 5031 5032 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 5033 roundingMode RM) { 5034 return addWithSpecial(*this, RHS, *this, RM); 5035 } 5036 5037 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 5038 roundingMode RM) { 5039 changeSign(); 5040 auto Ret = add(RHS, RM); 5041 changeSign(); 5042 return Ret; 5043 } 5044 5045 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 5046 APFloat::roundingMode RM) { 5047 const auto &LHS = *this; 5048 auto &Out = *this; 5049 /* Interesting observation: For special categories, finding the lowest 5050 common ancestor of the following layered graph gives the correct 5051 return category: 5052 5053 NaN 5054 / \ 5055 Zero Inf 5056 \ / 5057 Normal 5058 5059 e.g. NaN * NaN = NaN 5060 Zero * Inf = NaN 5061 Normal * Zero = Zero 5062 Normal * Inf = Inf 5063 */ 5064 if (LHS.getCategory() == fcNaN) { 5065 Out = LHS; 5066 return opOK; 5067 } 5068 if (RHS.getCategory() == fcNaN) { 5069 Out = RHS; 5070 return opOK; 5071 } 5072 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 5073 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 5074 Out.makeNaN(false, false, nullptr); 5075 return opOK; 5076 } 5077 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 5078 Out = LHS; 5079 return opOK; 5080 } 5081 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 5082 Out = RHS; 5083 return opOK; 5084 } 5085 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 5086 "Special cases not handled exhaustively"); 5087 5088 int Status = opOK; 5089 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 5090 // t = a * c 5091 APFloat T = A; 5092 Status |= T.multiply(C, RM); 5093 if (!T.isFiniteNonZero()) { 5094 Floats[0] = T; 5095 Floats[1].makeZero(/* Neg = */ false); 5096 return (opStatus)Status; 5097 } 5098 5099 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 5100 APFloat Tau = A; 5101 T.changeSign(); 5102 Status |= Tau.fusedMultiplyAdd(C, T, RM); 5103 T.changeSign(); 5104 { 5105 // v = a * d 5106 APFloat V = A; 5107 Status |= V.multiply(D, RM); 5108 // w = b * c 5109 APFloat W = B; 5110 Status |= W.multiply(C, RM); 5111 Status |= V.add(W, RM); 5112 // tau += v + w 5113 Status |= Tau.add(V, RM); 5114 } 5115 // u = t + tau 5116 APFloat U = T; 5117 Status |= U.add(Tau, RM); 5118 5119 Floats[0] = U; 5120 if (!U.isFinite()) { 5121 Floats[1].makeZero(/* Neg = */ false); 5122 } else { 5123 // Floats[1] = (t - u) + tau 5124 Status |= T.subtract(U, RM); 5125 Status |= T.add(Tau, RM); 5126 Floats[1] = T; 5127 } 5128 return (opStatus)Status; 5129 } 5130 5131 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 5132 APFloat::roundingMode RM) { 5133 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5134 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5135 auto Ret = 5136 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 5137 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5138 return Ret; 5139 } 5140 5141 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 5142 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5143 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5144 auto Ret = 5145 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5146 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5147 return Ret; 5148 } 5149 5150 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 5151 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5152 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5153 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5154 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5155 return Ret; 5156 } 5157 5158 APFloat::opStatus 5159 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5160 const DoubleAPFloat &Addend, 5161 APFloat::roundingMode RM) { 5162 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5163 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5164 auto Ret = Tmp.fusedMultiplyAdd( 5165 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5166 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5167 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5168 return Ret; 5169 } 5170 5171 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5172 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5173 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5174 auto Ret = Tmp.roundToIntegral(RM); 5175 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5176 return Ret; 5177 } 5178 5179 void DoubleAPFloat::changeSign() { 5180 Floats[0].changeSign(); 5181 Floats[1].changeSign(); 5182 } 5183 5184 APFloat::cmpResult 5185 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5186 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5187 if (Result != cmpEqual) 5188 return Result; 5189 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5190 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5191 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5192 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5193 if (Against && !RHSAgainst) 5194 return cmpLessThan; 5195 if (!Against && RHSAgainst) 5196 return cmpGreaterThan; 5197 if (!Against && !RHSAgainst) 5198 return Result; 5199 if (Against && RHSAgainst) 5200 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5201 } 5202 return Result; 5203 } 5204 5205 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5206 return Floats[0].getCategory(); 5207 } 5208 5209 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5210 5211 void DoubleAPFloat::makeInf(bool Neg) { 5212 Floats[0].makeInf(Neg); 5213 Floats[1].makeZero(/* Neg = */ false); 5214 } 5215 5216 void DoubleAPFloat::makeZero(bool Neg) { 5217 Floats[0].makeZero(Neg); 5218 Floats[1].makeZero(/* Neg = */ false); 5219 } 5220 5221 void DoubleAPFloat::makeLargest(bool Neg) { 5222 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5223 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5224 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5225 if (Neg) 5226 changeSign(); 5227 } 5228 5229 void DoubleAPFloat::makeSmallest(bool Neg) { 5230 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5231 Floats[0].makeSmallest(Neg); 5232 Floats[1].makeZero(/* Neg = */ false); 5233 } 5234 5235 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5236 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5237 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5238 if (Neg) 5239 Floats[0].changeSign(); 5240 Floats[1].makeZero(/* Neg = */ false); 5241 } 5242 5243 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5244 Floats[0].makeNaN(SNaN, Neg, fill); 5245 Floats[1].makeZero(/* Neg = */ false); 5246 } 5247 5248 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5249 auto Result = Floats[0].compare(RHS.Floats[0]); 5250 // |Float[0]| > |Float[1]| 5251 if (Result == APFloat::cmpEqual) 5252 return Floats[1].compare(RHS.Floats[1]); 5253 return Result; 5254 } 5255 5256 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5257 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5258 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5259 } 5260 5261 hash_code hash_value(const DoubleAPFloat &Arg) { 5262 if (Arg.Floats) 5263 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5264 return hash_combine(Arg.Semantics); 5265 } 5266 5267 APInt DoubleAPFloat::bitcastToAPInt() const { 5268 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5269 uint64_t Data[] = { 5270 Floats[0].bitcastToAPInt().getRawData()[0], 5271 Floats[1].bitcastToAPInt().getRawData()[0], 5272 }; 5273 return APInt(128, 2, Data); 5274 } 5275 5276 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5277 roundingMode RM) { 5278 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5279 APFloat Tmp(semPPCDoubleDoubleLegacy); 5280 auto Ret = Tmp.convertFromString(S, RM); 5281 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5282 return Ret; 5283 } 5284 5285 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5286 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5287 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5288 auto Ret = Tmp.next(nextDown); 5289 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5290 return Ret; 5291 } 5292 5293 APFloat::opStatus 5294 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5295 unsigned int Width, bool IsSigned, 5296 roundingMode RM, bool *IsExact) const { 5297 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5298 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5299 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5300 } 5301 5302 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5303 bool IsSigned, 5304 roundingMode RM) { 5305 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5306 APFloat Tmp(semPPCDoubleDoubleLegacy); 5307 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5308 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5309 return Ret; 5310 } 5311 5312 APFloat::opStatus 5313 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5314 unsigned int InputSize, 5315 bool IsSigned, roundingMode RM) { 5316 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5317 APFloat Tmp(semPPCDoubleDoubleLegacy); 5318 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5319 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5320 return Ret; 5321 } 5322 5323 APFloat::opStatus 5324 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5325 unsigned int InputSize, 5326 bool IsSigned, roundingMode RM) { 5327 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5328 APFloat Tmp(semPPCDoubleDoubleLegacy); 5329 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5330 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5331 return Ret; 5332 } 5333 5334 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5335 unsigned int HexDigits, 5336 bool UpperCase, 5337 roundingMode RM) const { 5338 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5339 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5340 .convertToHexString(DST, HexDigits, UpperCase, RM); 5341 } 5342 5343 bool DoubleAPFloat::isDenormal() const { 5344 return getCategory() == fcNormal && 5345 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5346 // (double)(Hi + Lo) == Hi defines a normal number. 5347 Floats[0] != Floats[0] + Floats[1]); 5348 } 5349 5350 bool DoubleAPFloat::isSmallest() const { 5351 if (getCategory() != fcNormal) 5352 return false; 5353 DoubleAPFloat Tmp(*this); 5354 Tmp.makeSmallest(this->isNegative()); 5355 return Tmp.compare(*this) == cmpEqual; 5356 } 5357 5358 bool DoubleAPFloat::isSmallestNormalized() const { 5359 if (getCategory() != fcNormal) 5360 return false; 5361 5362 DoubleAPFloat Tmp(*this); 5363 Tmp.makeSmallestNormalized(this->isNegative()); 5364 return Tmp.compare(*this) == cmpEqual; 5365 } 5366 5367 bool DoubleAPFloat::isLargest() const { 5368 if (getCategory() != fcNormal) 5369 return false; 5370 DoubleAPFloat Tmp(*this); 5371 Tmp.makeLargest(this->isNegative()); 5372 return Tmp.compare(*this) == cmpEqual; 5373 } 5374 5375 bool DoubleAPFloat::isInteger() const { 5376 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5377 return Floats[0].isInteger() && Floats[1].isInteger(); 5378 } 5379 5380 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5381 unsigned FormatPrecision, 5382 unsigned FormatMaxPadding, 5383 bool TruncateZero) const { 5384 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5385 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5386 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5387 } 5388 5389 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5390 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5391 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5392 if (!inv) 5393 return Tmp.getExactInverse(nullptr); 5394 APFloat Inv(semPPCDoubleDoubleLegacy); 5395 auto Ret = Tmp.getExactInverse(&Inv); 5396 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5397 return Ret; 5398 } 5399 5400 int DoubleAPFloat::getExactLog2() const { 5401 // TODO: Implement me 5402 return INT_MIN; 5403 } 5404 5405 int DoubleAPFloat::getExactLog2Abs() const { 5406 // TODO: Implement me 5407 return INT_MIN; 5408 } 5409 5410 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5411 APFloat::roundingMode RM) { 5412 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5413 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5414 scalbn(Arg.Floats[1], Exp, RM)); 5415 } 5416 5417 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5418 APFloat::roundingMode RM) { 5419 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5420 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5421 APFloat Second = Arg.Floats[1]; 5422 if (Arg.getCategory() == APFloat::fcNormal) 5423 Second = scalbn(Second, -Exp, RM); 5424 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5425 } 5426 5427 } // namespace detail 5428 5429 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5430 if (usesLayout<IEEEFloat>(Semantics)) { 5431 new (&IEEE) IEEEFloat(std::move(F)); 5432 return; 5433 } 5434 if (usesLayout<DoubleAPFloat>(Semantics)) { 5435 const fltSemantics& S = F.getSemantics(); 5436 new (&Double) 5437 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5438 APFloat(semIEEEdouble)); 5439 return; 5440 } 5441 llvm_unreachable("Unexpected semantics"); 5442 } 5443 5444 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5445 roundingMode RM) { 5446 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5447 } 5448 5449 hash_code hash_value(const APFloat &Arg) { 5450 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5451 return hash_value(Arg.U.IEEE); 5452 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5453 return hash_value(Arg.U.Double); 5454 llvm_unreachable("Unexpected semantics"); 5455 } 5456 5457 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5458 : APFloat(Semantics) { 5459 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5460 assert(StatusOrErr && "Invalid floating point representation"); 5461 consumeError(StatusOrErr.takeError()); 5462 } 5463 5464 FPClassTest APFloat::classify() const { 5465 if (isZero()) 5466 return isNegative() ? fcNegZero : fcPosZero; 5467 if (isNormal()) 5468 return isNegative() ? fcNegNormal : fcPosNormal; 5469 if (isDenormal()) 5470 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5471 if (isInfinity()) 5472 return isNegative() ? fcNegInf : fcPosInf; 5473 assert(isNaN() && "Other class of FP constant"); 5474 return isSignaling() ? fcSNan : fcQNan; 5475 } 5476 5477 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5478 roundingMode RM, bool *losesInfo) { 5479 if (&getSemantics() == &ToSemantics) { 5480 *losesInfo = false; 5481 return opOK; 5482 } 5483 if (usesLayout<IEEEFloat>(getSemantics()) && 5484 usesLayout<IEEEFloat>(ToSemantics)) 5485 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5486 if (usesLayout<IEEEFloat>(getSemantics()) && 5487 usesLayout<DoubleAPFloat>(ToSemantics)) { 5488 assert(&ToSemantics == &semPPCDoubleDouble); 5489 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5490 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5491 return Ret; 5492 } 5493 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5494 usesLayout<IEEEFloat>(ToSemantics)) { 5495 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5496 *this = APFloat(std::move(getIEEE()), ToSemantics); 5497 return Ret; 5498 } 5499 llvm_unreachable("Unexpected semantics"); 5500 } 5501 5502 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5503 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5504 } 5505 5506 void APFloat::print(raw_ostream &OS) const { 5507 SmallVector<char, 16> Buffer; 5508 toString(Buffer); 5509 OS << Buffer; 5510 } 5511 5512 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5513 LLVM_DUMP_METHOD void APFloat::dump() const { 5514 print(dbgs()); 5515 dbgs() << '\n'; 5516 } 5517 #endif 5518 5519 void APFloat::Profile(FoldingSetNodeID &NID) const { 5520 NID.Add(bitcastToAPInt()); 5521 } 5522 5523 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5524 an APSInt, whose initial bit-width and signed-ness are used to determine the 5525 precision of the conversion. 5526 */ 5527 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5528 roundingMode rounding_mode, 5529 bool *isExact) const { 5530 unsigned bitWidth = result.getBitWidth(); 5531 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5532 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5533 rounding_mode, isExact); 5534 // Keeps the original signed-ness. 5535 result = APInt(bitWidth, parts); 5536 return status; 5537 } 5538 5539 double APFloat::convertToDouble() const { 5540 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5541 return getIEEE().convertToDouble(); 5542 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5543 "Float semantics is not representable by IEEEdouble"); 5544 APFloat Temp = *this; 5545 bool LosesInfo; 5546 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5547 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5548 (void)St; 5549 return Temp.getIEEE().convertToDouble(); 5550 } 5551 5552 #ifdef HAS_IEE754_FLOAT128 5553 float128 APFloat::convertToQuad() const { 5554 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5555 return getIEEE().convertToQuad(); 5556 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5557 "Float semantics is not representable by IEEEquad"); 5558 APFloat Temp = *this; 5559 bool LosesInfo; 5560 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5561 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5562 (void)St; 5563 return Temp.getIEEE().convertToQuad(); 5564 } 5565 #endif 5566 5567 float APFloat::convertToFloat() const { 5568 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5569 return getIEEE().convertToFloat(); 5570 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5571 "Float semantics is not representable by IEEEsingle"); 5572 APFloat Temp = *this; 5573 bool LosesInfo; 5574 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5575 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5576 (void)St; 5577 return Temp.getIEEE().convertToFloat(); 5578 } 5579 5580 } // namespace llvm 5581 5582 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5583