1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 // Returns true if any number described by this semantics can be precisely 123 // represented by the specified semantics. Does not take into account 124 // the value of fltNonfiniteBehavior. 125 bool isRepresentableBy(const fltSemantics &S) const { 126 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 127 precision <= S.precision; 128 } 129 }; 130 131 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 132 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 133 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 134 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 135 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 136 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 137 static constexpr fltSemantics semFloat8E5M2FNUZ = { 138 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 139 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 140 static constexpr fltSemantics semFloat8E4M3FN = { 141 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 142 static constexpr fltSemantics semFloat8E4M3FNUZ = { 143 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 144 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 145 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; 147 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 148 static constexpr fltSemantics semFloat6E3M2FN = { 149 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 150 static constexpr fltSemantics semFloat6E2M3FN = { 151 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 152 static constexpr fltSemantics semFloat4E2M1FN = { 153 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 154 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 155 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 156 157 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 158 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 159 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 160 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 161 to each other, and two 11-bit exponents. 162 163 Note: we need to make the value different from semBogus as otherwise 164 an unsafe optimization may collapse both values to a single address, 165 and we heavily rely on them having distinct addresses. */ 166 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 167 168 /* These are legacy semantics for the fallback, inaccrurate implementation of 169 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 170 operation. It's equivalent to having an IEEE number with consecutive 106 171 bits of mantissa and 11 bits of exponent. 172 173 It's not equivalent to IBM double-double. For example, a legit IBM 174 double-double, 1 + epsilon: 175 176 1 + epsilon = 1 + (1 >> 1076) 177 178 is not representable by a consecutive 106 bits of mantissa. 179 180 Currently, these semantics are used in the following way: 181 182 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 183 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 184 semPPCDoubleDoubleLegacy -> IEEE operations 185 186 We use bitcastToAPInt() to get the bit representation (in APInt) of the 187 underlying IEEEdouble, then use the APInt constructor to construct the 188 legacy IEEE float. 189 190 TODO: Implement all operations in semPPCDoubleDouble, and delete these 191 semantics. */ 192 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 193 53 + 53, 128}; 194 195 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 196 switch (S) { 197 case S_IEEEhalf: 198 return IEEEhalf(); 199 case S_BFloat: 200 return BFloat(); 201 case S_IEEEsingle: 202 return IEEEsingle(); 203 case S_IEEEdouble: 204 return IEEEdouble(); 205 case S_IEEEquad: 206 return IEEEquad(); 207 case S_PPCDoubleDouble: 208 return PPCDoubleDouble(); 209 case S_Float8E5M2: 210 return Float8E5M2(); 211 case S_Float8E5M2FNUZ: 212 return Float8E5M2FNUZ(); 213 case S_Float8E4M3: 214 return Float8E4M3(); 215 case S_Float8E4M3FN: 216 return Float8E4M3FN(); 217 case S_Float8E4M3FNUZ: 218 return Float8E4M3FNUZ(); 219 case S_Float8E4M3B11FNUZ: 220 return Float8E4M3B11FNUZ(); 221 case S_Float8E3M4: 222 return Float8E3M4(); 223 case S_FloatTF32: 224 return FloatTF32(); 225 case S_Float6E3M2FN: 226 return Float6E3M2FN(); 227 case S_Float6E2M3FN: 228 return Float6E2M3FN(); 229 case S_Float4E2M1FN: 230 return Float4E2M1FN(); 231 case S_x87DoubleExtended: 232 return x87DoubleExtended(); 233 } 234 llvm_unreachable("Unrecognised floating semantics"); 235 } 236 237 APFloatBase::Semantics 238 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 239 if (&Sem == &llvm::APFloat::IEEEhalf()) 240 return S_IEEEhalf; 241 else if (&Sem == &llvm::APFloat::BFloat()) 242 return S_BFloat; 243 else if (&Sem == &llvm::APFloat::IEEEsingle()) 244 return S_IEEEsingle; 245 else if (&Sem == &llvm::APFloat::IEEEdouble()) 246 return S_IEEEdouble; 247 else if (&Sem == &llvm::APFloat::IEEEquad()) 248 return S_IEEEquad; 249 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 250 return S_PPCDoubleDouble; 251 else if (&Sem == &llvm::APFloat::Float8E5M2()) 252 return S_Float8E5M2; 253 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 254 return S_Float8E5M2FNUZ; 255 else if (&Sem == &llvm::APFloat::Float8E4M3()) 256 return S_Float8E4M3; 257 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 258 return S_Float8E4M3FN; 259 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 260 return S_Float8E4M3FNUZ; 261 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 262 return S_Float8E4M3B11FNUZ; 263 else if (&Sem == &llvm::APFloat::Float8E3M4()) 264 return S_Float8E3M4; 265 else if (&Sem == &llvm::APFloat::FloatTF32()) 266 return S_FloatTF32; 267 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 268 return S_Float6E3M2FN; 269 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 270 return S_Float6E2M3FN; 271 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 272 return S_Float4E2M1FN; 273 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 274 return S_x87DoubleExtended; 275 else 276 llvm_unreachable("Unknown floating semantics"); 277 } 278 279 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 280 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 281 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 282 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 283 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 284 const fltSemantics &APFloatBase::PPCDoubleDouble() { 285 return semPPCDoubleDouble; 286 } 287 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 288 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 289 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 290 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 291 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 292 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 293 return semFloat8E4M3B11FNUZ; 294 } 295 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } 296 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 297 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 298 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 299 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 300 const fltSemantics &APFloatBase::x87DoubleExtended() { 301 return semX87DoubleExtended; 302 } 303 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 304 305 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 306 constexpr RoundingMode APFloatBase::rmTowardPositive; 307 constexpr RoundingMode APFloatBase::rmTowardNegative; 308 constexpr RoundingMode APFloatBase::rmTowardZero; 309 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 310 311 /* A tight upper bound on number of parts required to hold the value 312 pow(5, power) is 313 314 power * 815 / (351 * integerPartWidth) + 1 315 316 However, whilst the result may require only this many parts, 317 because we are multiplying two values to get it, the 318 multiplication may require an extra part with the excess part 319 being zero (consider the trivial case of 1 * 1, tcFullMultiply 320 requires two parts to hold the single-part result). So we add an 321 extra one to guarantee enough space whilst multiplying. */ 322 const unsigned int maxExponent = 16383; 323 const unsigned int maxPrecision = 113; 324 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 325 const unsigned int maxPowerOfFiveParts = 326 2 + 327 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 328 329 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 330 return semantics.precision; 331 } 332 APFloatBase::ExponentType 333 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 334 return semantics.maxExponent; 335 } 336 APFloatBase::ExponentType 337 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 338 return semantics.minExponent; 339 } 340 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 341 return semantics.sizeInBits; 342 } 343 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 344 bool isSigned) { 345 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 346 // at least one more bit than the MaxExponent to hold the max FP value. 347 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 348 // Extra sign bit needed. 349 if (isSigned) 350 ++MinBitWidth; 351 return MinBitWidth; 352 } 353 354 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 355 const fltSemantics &Dst) { 356 // Exponent range must be larger. 357 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 358 return false; 359 360 // If the mantissa is long enough, the result value could still be denormal 361 // with a larger exponent range. 362 // 363 // FIXME: This condition is probably not accurate but also shouldn't be a 364 // practical concern with existing types. 365 return Dst.precision >= Src.precision; 366 } 367 368 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 369 return Sem.sizeInBits; 370 } 371 372 static constexpr APFloatBase::ExponentType 373 exponentZero(const fltSemantics &semantics) { 374 return semantics.minExponent - 1; 375 } 376 377 static constexpr APFloatBase::ExponentType 378 exponentInf(const fltSemantics &semantics) { 379 return semantics.maxExponent + 1; 380 } 381 382 static constexpr APFloatBase::ExponentType 383 exponentNaN(const fltSemantics &semantics) { 384 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 385 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 386 return exponentZero(semantics); 387 return semantics.maxExponent; 388 } 389 return semantics.maxExponent + 1; 390 } 391 392 /* A bunch of private, handy routines. */ 393 394 static inline Error createError(const Twine &Err) { 395 return make_error<StringError>(Err, inconvertibleErrorCode()); 396 } 397 398 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 399 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; 400 } 401 402 /* Returns 0U-9U. Return values >= 10U are not digits. */ 403 static inline unsigned int 404 decDigitValue(unsigned int c) 405 { 406 return c - '0'; 407 } 408 409 /* Return the value of a decimal exponent of the form 410 [+-]ddddddd. 411 412 If the exponent overflows, returns a large exponent with the 413 appropriate sign. */ 414 static Expected<int> readExponent(StringRef::iterator begin, 415 StringRef::iterator end) { 416 bool isNegative; 417 unsigned int absExponent; 418 const unsigned int overlargeExponent = 24000; /* FIXME. */ 419 StringRef::iterator p = begin; 420 421 // Treat no exponent as 0 to match binutils 422 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 423 return 0; 424 } 425 426 isNegative = (*p == '-'); 427 if (*p == '-' || *p == '+') { 428 p++; 429 if (p == end) 430 return createError("Exponent has no digits"); 431 } 432 433 absExponent = decDigitValue(*p++); 434 if (absExponent >= 10U) 435 return createError("Invalid character in exponent"); 436 437 for (; p != end; ++p) { 438 unsigned int value; 439 440 value = decDigitValue(*p); 441 if (value >= 10U) 442 return createError("Invalid character in exponent"); 443 444 absExponent = absExponent * 10U + value; 445 if (absExponent >= overlargeExponent) { 446 absExponent = overlargeExponent; 447 break; 448 } 449 } 450 451 if (isNegative) 452 return -(int) absExponent; 453 else 454 return (int) absExponent; 455 } 456 457 /* This is ugly and needs cleaning up, but I don't immediately see 458 how whilst remaining safe. */ 459 static Expected<int> totalExponent(StringRef::iterator p, 460 StringRef::iterator end, 461 int exponentAdjustment) { 462 int unsignedExponent; 463 bool negative, overflow; 464 int exponent = 0; 465 466 if (p == end) 467 return createError("Exponent has no digits"); 468 469 negative = *p == '-'; 470 if (*p == '-' || *p == '+') { 471 p++; 472 if (p == end) 473 return createError("Exponent has no digits"); 474 } 475 476 unsignedExponent = 0; 477 overflow = false; 478 for (; p != end; ++p) { 479 unsigned int value; 480 481 value = decDigitValue(*p); 482 if (value >= 10U) 483 return createError("Invalid character in exponent"); 484 485 unsignedExponent = unsignedExponent * 10 + value; 486 if (unsignedExponent > 32767) { 487 overflow = true; 488 break; 489 } 490 } 491 492 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 493 overflow = true; 494 495 if (!overflow) { 496 exponent = unsignedExponent; 497 if (negative) 498 exponent = -exponent; 499 exponent += exponentAdjustment; 500 if (exponent > 32767 || exponent < -32768) 501 overflow = true; 502 } 503 504 if (overflow) 505 exponent = negative ? -32768: 32767; 506 507 return exponent; 508 } 509 510 static Expected<StringRef::iterator> 511 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 512 StringRef::iterator *dot) { 513 StringRef::iterator p = begin; 514 *dot = end; 515 while (p != end && *p == '0') 516 p++; 517 518 if (p != end && *p == '.') { 519 *dot = p++; 520 521 if (end - begin == 1) 522 return createError("Significand has no digits"); 523 524 while (p != end && *p == '0') 525 p++; 526 } 527 528 return p; 529 } 530 531 /* Given a normal decimal floating point number of the form 532 533 dddd.dddd[eE][+-]ddd 534 535 where the decimal point and exponent are optional, fill out the 536 structure D. Exponent is appropriate if the significand is 537 treated as an integer, and normalizedExponent if the significand 538 is taken to have the decimal point after a single leading 539 non-zero digit. 540 541 If the value is zero, V->firstSigDigit points to a non-digit, and 542 the return exponent is zero. 543 */ 544 struct decimalInfo { 545 const char *firstSigDigit; 546 const char *lastSigDigit; 547 int exponent; 548 int normalizedExponent; 549 }; 550 551 static Error interpretDecimal(StringRef::iterator begin, 552 StringRef::iterator end, decimalInfo *D) { 553 StringRef::iterator dot = end; 554 555 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 556 if (!PtrOrErr) 557 return PtrOrErr.takeError(); 558 StringRef::iterator p = *PtrOrErr; 559 560 D->firstSigDigit = p; 561 D->exponent = 0; 562 D->normalizedExponent = 0; 563 564 for (; p != end; ++p) { 565 if (*p == '.') { 566 if (dot != end) 567 return createError("String contains multiple dots"); 568 dot = p++; 569 if (p == end) 570 break; 571 } 572 if (decDigitValue(*p) >= 10U) 573 break; 574 } 575 576 if (p != end) { 577 if (*p != 'e' && *p != 'E') 578 return createError("Invalid character in significand"); 579 if (p == begin) 580 return createError("Significand has no digits"); 581 if (dot != end && p - begin == 1) 582 return createError("Significand has no digits"); 583 584 /* p points to the first non-digit in the string */ 585 auto ExpOrErr = readExponent(p + 1, end); 586 if (!ExpOrErr) 587 return ExpOrErr.takeError(); 588 D->exponent = *ExpOrErr; 589 590 /* Implied decimal point? */ 591 if (dot == end) 592 dot = p; 593 } 594 595 /* If number is all zeroes accept any exponent. */ 596 if (p != D->firstSigDigit) { 597 /* Drop insignificant trailing zeroes. */ 598 if (p != begin) { 599 do 600 do 601 p--; 602 while (p != begin && *p == '0'); 603 while (p != begin && *p == '.'); 604 } 605 606 /* Adjust the exponents for any decimal point. */ 607 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 608 D->normalizedExponent = (D->exponent + 609 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 610 - (dot > D->firstSigDigit && dot < p))); 611 } 612 613 D->lastSigDigit = p; 614 return Error::success(); 615 } 616 617 /* Return the trailing fraction of a hexadecimal number. 618 DIGITVALUE is the first hex digit of the fraction, P points to 619 the next digit. */ 620 static Expected<lostFraction> 621 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 622 unsigned int digitValue) { 623 unsigned int hexDigit; 624 625 /* If the first trailing digit isn't 0 or 8 we can work out the 626 fraction immediately. */ 627 if (digitValue > 8) 628 return lfMoreThanHalf; 629 else if (digitValue < 8 && digitValue > 0) 630 return lfLessThanHalf; 631 632 // Otherwise we need to find the first non-zero digit. 633 while (p != end && (*p == '0' || *p == '.')) 634 p++; 635 636 if (p == end) 637 return createError("Invalid trailing hexadecimal fraction!"); 638 639 hexDigit = hexDigitValue(*p); 640 641 /* If we ran off the end it is exactly zero or one-half, otherwise 642 a little more. */ 643 if (hexDigit == UINT_MAX) 644 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 645 else 646 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 647 } 648 649 /* Return the fraction lost were a bignum truncated losing the least 650 significant BITS bits. */ 651 static lostFraction 652 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 653 unsigned int partCount, 654 unsigned int bits) 655 { 656 unsigned int lsb; 657 658 lsb = APInt::tcLSB(parts, partCount); 659 660 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 661 if (bits <= lsb) 662 return lfExactlyZero; 663 if (bits == lsb + 1) 664 return lfExactlyHalf; 665 if (bits <= partCount * APFloatBase::integerPartWidth && 666 APInt::tcExtractBit(parts, bits - 1)) 667 return lfMoreThanHalf; 668 669 return lfLessThanHalf; 670 } 671 672 /* Shift DST right BITS bits noting lost fraction. */ 673 static lostFraction 674 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 675 { 676 lostFraction lost_fraction; 677 678 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 679 680 APInt::tcShiftRight(dst, parts, bits); 681 682 return lost_fraction; 683 } 684 685 /* Combine the effect of two lost fractions. */ 686 static lostFraction 687 combineLostFractions(lostFraction moreSignificant, 688 lostFraction lessSignificant) 689 { 690 if (lessSignificant != lfExactlyZero) { 691 if (moreSignificant == lfExactlyZero) 692 moreSignificant = lfLessThanHalf; 693 else if (moreSignificant == lfExactlyHalf) 694 moreSignificant = lfMoreThanHalf; 695 } 696 697 return moreSignificant; 698 } 699 700 /* The error from the true value, in half-ulps, on multiplying two 701 floating point numbers, which differ from the value they 702 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 703 than the returned value. 704 705 See "How to Read Floating Point Numbers Accurately" by William D 706 Clinger. */ 707 static unsigned int 708 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 709 { 710 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 711 712 if (HUerr1 + HUerr2 == 0) 713 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 714 else 715 return inexactMultiply + 2 * (HUerr1 + HUerr2); 716 } 717 718 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 719 when the least significant BITS are truncated. BITS cannot be 720 zero. */ 721 static APFloatBase::integerPart 722 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 723 bool isNearest) { 724 unsigned int count, partBits; 725 APFloatBase::integerPart part, boundary; 726 727 assert(bits != 0); 728 729 bits--; 730 count = bits / APFloatBase::integerPartWidth; 731 partBits = bits % APFloatBase::integerPartWidth + 1; 732 733 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 734 735 if (isNearest) 736 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 737 else 738 boundary = 0; 739 740 if (count == 0) { 741 if (part - boundary <= boundary - part) 742 return part - boundary; 743 else 744 return boundary - part; 745 } 746 747 if (part == boundary) { 748 while (--count) 749 if (parts[count]) 750 return ~(APFloatBase::integerPart) 0; /* A lot. */ 751 752 return parts[0]; 753 } else if (part == boundary - 1) { 754 while (--count) 755 if (~parts[count]) 756 return ~(APFloatBase::integerPart) 0; /* A lot. */ 757 758 return -parts[0]; 759 } 760 761 return ~(APFloatBase::integerPart) 0; /* A lot. */ 762 } 763 764 /* Place pow(5, power) in DST, and return the number of parts used. 765 DST must be at least one part larger than size of the answer. */ 766 static unsigned int 767 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 768 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 769 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 770 pow5s[0] = 78125 * 5; 771 772 unsigned int partsCount = 1; 773 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 774 unsigned int result; 775 assert(power <= maxExponent); 776 777 p1 = dst; 778 p2 = scratch; 779 780 *p1 = firstEightPowers[power & 7]; 781 power >>= 3; 782 783 result = 1; 784 pow5 = pow5s; 785 786 for (unsigned int n = 0; power; power >>= 1, n++) { 787 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 788 if (n != 0) { 789 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 790 partsCount, partsCount); 791 partsCount *= 2; 792 if (pow5[partsCount - 1] == 0) 793 partsCount--; 794 } 795 796 if (power & 1) { 797 APFloatBase::integerPart *tmp; 798 799 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 800 result += partsCount; 801 if (p2[result - 1] == 0) 802 result--; 803 804 /* Now result is in p1 with partsCount parts and p2 is scratch 805 space. */ 806 tmp = p1; 807 p1 = p2; 808 p2 = tmp; 809 } 810 811 pow5 += partsCount; 812 } 813 814 if (p1 != dst) 815 APInt::tcAssign(dst, p1, result); 816 817 return result; 818 } 819 820 /* Zero at the end to avoid modular arithmetic when adding one; used 821 when rounding up during hexadecimal output. */ 822 static const char hexDigitsLower[] = "0123456789abcdef0"; 823 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 824 static const char infinityL[] = "infinity"; 825 static const char infinityU[] = "INFINITY"; 826 static const char NaNL[] = "nan"; 827 static const char NaNU[] = "NAN"; 828 829 /* Write out an integerPart in hexadecimal, starting with the most 830 significant nibble. Write out exactly COUNT hexdigits, return 831 COUNT. */ 832 static unsigned int 833 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 834 const char *hexDigitChars) 835 { 836 unsigned int result = count; 837 838 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 839 840 part >>= (APFloatBase::integerPartWidth - 4 * count); 841 while (count--) { 842 dst[count] = hexDigitChars[part & 0xf]; 843 part >>= 4; 844 } 845 846 return result; 847 } 848 849 /* Write out an unsigned decimal integer. */ 850 static char * 851 writeUnsignedDecimal (char *dst, unsigned int n) 852 { 853 char buff[40], *p; 854 855 p = buff; 856 do 857 *p++ = '0' + n % 10; 858 while (n /= 10); 859 860 do 861 *dst++ = *--p; 862 while (p != buff); 863 864 return dst; 865 } 866 867 /* Write out a signed decimal integer. */ 868 static char * 869 writeSignedDecimal (char *dst, int value) 870 { 871 if (value < 0) { 872 *dst++ = '-'; 873 dst = writeUnsignedDecimal(dst, -(unsigned) value); 874 } else 875 dst = writeUnsignedDecimal(dst, value); 876 877 return dst; 878 } 879 880 namespace detail { 881 /* Constructors. */ 882 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 883 unsigned int count; 884 885 semantics = ourSemantics; 886 count = partCount(); 887 if (count > 1) 888 significand.parts = new integerPart[count]; 889 } 890 891 void IEEEFloat::freeSignificand() { 892 if (needsCleanup()) 893 delete [] significand.parts; 894 } 895 896 void IEEEFloat::assign(const IEEEFloat &rhs) { 897 assert(semantics == rhs.semantics); 898 899 sign = rhs.sign; 900 category = rhs.category; 901 exponent = rhs.exponent; 902 if (isFiniteNonZero() || category == fcNaN) 903 copySignificand(rhs); 904 } 905 906 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 907 assert(isFiniteNonZero() || category == fcNaN); 908 assert(rhs.partCount() >= partCount()); 909 910 APInt::tcAssign(significandParts(), rhs.significandParts(), 911 partCount()); 912 } 913 914 /* Make this number a NaN, with an arbitrary but deterministic value 915 for the significand. If double or longer, this is a signalling NaN, 916 which may not be ideal. If float, this is QNaN(0). */ 917 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 918 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 919 llvm_unreachable("This floating point format does not support NaN"); 920 921 category = fcNaN; 922 sign = Negative; 923 exponent = exponentNaN(); 924 925 integerPart *significand = significandParts(); 926 unsigned numParts = partCount(); 927 928 APInt fill_storage; 929 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 930 // Finite-only types do not distinguish signalling and quiet NaN, so 931 // make them all signalling. 932 SNaN = false; 933 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 934 sign = true; 935 fill_storage = APInt::getZero(semantics->precision - 1); 936 } else { 937 fill_storage = APInt::getAllOnes(semantics->precision - 1); 938 } 939 fill = &fill_storage; 940 } 941 942 // Set the significand bits to the fill. 943 if (!fill || fill->getNumWords() < numParts) 944 APInt::tcSet(significand, 0, numParts); 945 if (fill) { 946 APInt::tcAssign(significand, fill->getRawData(), 947 std::min(fill->getNumWords(), numParts)); 948 949 // Zero out the excess bits of the significand. 950 unsigned bitsToPreserve = semantics->precision - 1; 951 unsigned part = bitsToPreserve / 64; 952 bitsToPreserve %= 64; 953 significand[part] &= ((1ULL << bitsToPreserve) - 1); 954 for (part++; part != numParts; ++part) 955 significand[part] = 0; 956 } 957 958 unsigned QNaNBit = semantics->precision - 2; 959 960 if (SNaN) { 961 // We always have to clear the QNaN bit to make it an SNaN. 962 APInt::tcClearBit(significand, QNaNBit); 963 964 // If there are no bits set in the payload, we have to set 965 // *something* to make it a NaN instead of an infinity; 966 // conventionally, this is the next bit down from the QNaN bit. 967 if (APInt::tcIsZero(significand, numParts)) 968 APInt::tcSetBit(significand, QNaNBit - 1); 969 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 970 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 971 // Do nothing. 972 } else { 973 // We always have to set the QNaN bit to make it a QNaN. 974 APInt::tcSetBit(significand, QNaNBit); 975 } 976 977 // For x87 extended precision, we want to make a NaN, not a 978 // pseudo-NaN. Maybe we should expose the ability to make 979 // pseudo-NaNs? 980 if (semantics == &semX87DoubleExtended) 981 APInt::tcSetBit(significand, QNaNBit + 1); 982 } 983 984 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 985 if (this != &rhs) { 986 if (semantics != rhs.semantics) { 987 freeSignificand(); 988 initialize(rhs.semantics); 989 } 990 assign(rhs); 991 } 992 993 return *this; 994 } 995 996 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 997 freeSignificand(); 998 999 semantics = rhs.semantics; 1000 significand = rhs.significand; 1001 exponent = rhs.exponent; 1002 category = rhs.category; 1003 sign = rhs.sign; 1004 1005 rhs.semantics = &semBogus; 1006 return *this; 1007 } 1008 1009 bool IEEEFloat::isDenormal() const { 1010 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1011 (APInt::tcExtractBit(significandParts(), 1012 semantics->precision - 1) == 0); 1013 } 1014 1015 bool IEEEFloat::isSmallest() const { 1016 // The smallest number by magnitude in our format will be the smallest 1017 // denormal, i.e. the floating point number with exponent being minimum 1018 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1019 return isFiniteNonZero() && exponent == semantics->minExponent && 1020 significandMSB() == 0; 1021 } 1022 1023 bool IEEEFloat::isSmallestNormalized() const { 1024 return getCategory() == fcNormal && exponent == semantics->minExponent && 1025 isSignificandAllZerosExceptMSB(); 1026 } 1027 1028 bool IEEEFloat::isSignificandAllOnes() const { 1029 // Test if the significand excluding the integral bit is all ones. This allows 1030 // us to test for binade boundaries. 1031 const integerPart *Parts = significandParts(); 1032 const unsigned PartCount = partCountForBits(semantics->precision); 1033 for (unsigned i = 0; i < PartCount - 1; i++) 1034 if (~Parts[i]) 1035 return false; 1036 1037 // Set the unused high bits to all ones when we compare. 1038 const unsigned NumHighBits = 1039 PartCount*integerPartWidth - semantics->precision + 1; 1040 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1041 "Can not have more high bits to fill than integerPartWidth"); 1042 const integerPart HighBitFill = 1043 ~integerPart(0) << (integerPartWidth - NumHighBits); 1044 if (~(Parts[PartCount - 1] | HighBitFill)) 1045 return false; 1046 1047 return true; 1048 } 1049 1050 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1051 // Test if the significand excluding the integral bit is all ones except for 1052 // the least significant bit. 1053 const integerPart *Parts = significandParts(); 1054 1055 if (Parts[0] & 1) 1056 return false; 1057 1058 const unsigned PartCount = partCountForBits(semantics->precision); 1059 for (unsigned i = 0; i < PartCount - 1; i++) { 1060 if (~Parts[i] & ~unsigned{!i}) 1061 return false; 1062 } 1063 1064 // Set the unused high bits to all ones when we compare. 1065 const unsigned NumHighBits = 1066 PartCount * integerPartWidth - semantics->precision + 1; 1067 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1068 "Can not have more high bits to fill than integerPartWidth"); 1069 const integerPart HighBitFill = ~integerPart(0) 1070 << (integerPartWidth - NumHighBits); 1071 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1072 return false; 1073 1074 return true; 1075 } 1076 1077 bool IEEEFloat::isSignificandAllZeros() const { 1078 // Test if the significand excluding the integral bit is all zeros. This 1079 // allows us to test for binade boundaries. 1080 const integerPart *Parts = significandParts(); 1081 const unsigned PartCount = partCountForBits(semantics->precision); 1082 1083 for (unsigned i = 0; i < PartCount - 1; i++) 1084 if (Parts[i]) 1085 return false; 1086 1087 // Compute how many bits are used in the final word. 1088 const unsigned NumHighBits = 1089 PartCount*integerPartWidth - semantics->precision + 1; 1090 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1091 "clear than integerPartWidth"); 1092 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1093 1094 if (Parts[PartCount - 1] & HighBitMask) 1095 return false; 1096 1097 return true; 1098 } 1099 1100 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1101 const integerPart *Parts = significandParts(); 1102 const unsigned PartCount = partCountForBits(semantics->precision); 1103 1104 for (unsigned i = 0; i < PartCount - 1; i++) { 1105 if (Parts[i]) 1106 return false; 1107 } 1108 1109 const unsigned NumHighBits = 1110 PartCount * integerPartWidth - semantics->precision + 1; 1111 return Parts[PartCount - 1] == integerPart(1) 1112 << (integerPartWidth - NumHighBits); 1113 } 1114 1115 bool IEEEFloat::isLargest() const { 1116 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1117 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1118 // The largest number by magnitude in our format will be the floating point 1119 // number with maximum exponent and with significand that is all ones except 1120 // the LSB. 1121 return isFiniteNonZero() && exponent == semantics->maxExponent && 1122 isSignificandAllOnesExceptLSB(); 1123 } else { 1124 // The largest number by magnitude in our format will be the floating point 1125 // number with maximum exponent and with significand that is all ones. 1126 return isFiniteNonZero() && exponent == semantics->maxExponent && 1127 isSignificandAllOnes(); 1128 } 1129 } 1130 1131 bool IEEEFloat::isInteger() const { 1132 // This could be made more efficient; I'm going for obviously correct. 1133 if (!isFinite()) return false; 1134 IEEEFloat truncated = *this; 1135 truncated.roundToIntegral(rmTowardZero); 1136 return compare(truncated) == cmpEqual; 1137 } 1138 1139 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1140 if (this == &rhs) 1141 return true; 1142 if (semantics != rhs.semantics || 1143 category != rhs.category || 1144 sign != rhs.sign) 1145 return false; 1146 if (category==fcZero || category==fcInfinity) 1147 return true; 1148 1149 if (isFiniteNonZero() && exponent != rhs.exponent) 1150 return false; 1151 1152 return std::equal(significandParts(), significandParts() + partCount(), 1153 rhs.significandParts()); 1154 } 1155 1156 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1157 initialize(&ourSemantics); 1158 sign = 0; 1159 category = fcNormal; 1160 zeroSignificand(); 1161 exponent = ourSemantics.precision - 1; 1162 significandParts()[0] = value; 1163 normalize(rmNearestTiesToEven, lfExactlyZero); 1164 } 1165 1166 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1167 initialize(&ourSemantics); 1168 makeZero(false); 1169 } 1170 1171 // Delegate to the previous constructor, because later copy constructor may 1172 // actually inspects category, which can't be garbage. 1173 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1174 : IEEEFloat(ourSemantics) {} 1175 1176 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1177 initialize(rhs.semantics); 1178 assign(rhs); 1179 } 1180 1181 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1182 *this = std::move(rhs); 1183 } 1184 1185 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1186 1187 unsigned int IEEEFloat::partCount() const { 1188 return partCountForBits(semantics->precision + 1); 1189 } 1190 1191 const IEEEFloat::integerPart *IEEEFloat::significandParts() const { 1192 return const_cast<IEEEFloat *>(this)->significandParts(); 1193 } 1194 1195 IEEEFloat::integerPart *IEEEFloat::significandParts() { 1196 if (partCount() > 1) 1197 return significand.parts; 1198 else 1199 return &significand.part; 1200 } 1201 1202 void IEEEFloat::zeroSignificand() { 1203 APInt::tcSet(significandParts(), 0, partCount()); 1204 } 1205 1206 /* Increment an fcNormal floating point number's significand. */ 1207 void IEEEFloat::incrementSignificand() { 1208 integerPart carry; 1209 1210 carry = APInt::tcIncrement(significandParts(), partCount()); 1211 1212 /* Our callers should never cause us to overflow. */ 1213 assert(carry == 0); 1214 (void)carry; 1215 } 1216 1217 /* Add the significand of the RHS. Returns the carry flag. */ 1218 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1219 integerPart *parts; 1220 1221 parts = significandParts(); 1222 1223 assert(semantics == rhs.semantics); 1224 assert(exponent == rhs.exponent); 1225 1226 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1227 } 1228 1229 /* Subtract the significand of the RHS with a borrow flag. Returns 1230 the borrow flag. */ 1231 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1232 integerPart borrow) { 1233 integerPart *parts; 1234 1235 parts = significandParts(); 1236 1237 assert(semantics == rhs.semantics); 1238 assert(exponent == rhs.exponent); 1239 1240 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1241 partCount()); 1242 } 1243 1244 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1245 on to the full-precision result of the multiplication. Returns the 1246 lost fraction. */ 1247 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1248 IEEEFloat addend) { 1249 unsigned int omsb; // One, not zero, based MSB. 1250 unsigned int partsCount, newPartsCount, precision; 1251 integerPart *lhsSignificand; 1252 integerPart scratch[4]; 1253 integerPart *fullSignificand; 1254 lostFraction lost_fraction; 1255 bool ignored; 1256 1257 assert(semantics == rhs.semantics); 1258 1259 precision = semantics->precision; 1260 1261 // Allocate space for twice as many bits as the original significand, plus one 1262 // extra bit for the addition to overflow into. 1263 newPartsCount = partCountForBits(precision * 2 + 1); 1264 1265 if (newPartsCount > 4) 1266 fullSignificand = new integerPart[newPartsCount]; 1267 else 1268 fullSignificand = scratch; 1269 1270 lhsSignificand = significandParts(); 1271 partsCount = partCount(); 1272 1273 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1274 rhs.significandParts(), partsCount, partsCount); 1275 1276 lost_fraction = lfExactlyZero; 1277 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1278 exponent += rhs.exponent; 1279 1280 // Assume the operands involved in the multiplication are single-precision 1281 // FP, and the two multiplicants are: 1282 // *this = a23 . a22 ... a0 * 2^e1 1283 // rhs = b23 . b22 ... b0 * 2^e2 1284 // the result of multiplication is: 1285 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1286 // Note that there are three significant bits at the left-hand side of the 1287 // radix point: two for the multiplication, and an overflow bit for the 1288 // addition (that will always be zero at this point). Move the radix point 1289 // toward left by two bits, and adjust exponent accordingly. 1290 exponent += 2; 1291 1292 if (addend.isNonZero()) { 1293 // The intermediate result of the multiplication has "2 * precision" 1294 // signicant bit; adjust the addend to be consistent with mul result. 1295 // 1296 Significand savedSignificand = significand; 1297 const fltSemantics *savedSemantics = semantics; 1298 fltSemantics extendedSemantics; 1299 opStatus status; 1300 unsigned int extendedPrecision; 1301 1302 // Normalize our MSB to one below the top bit to allow for overflow. 1303 extendedPrecision = 2 * precision + 1; 1304 if (omsb != extendedPrecision - 1) { 1305 assert(extendedPrecision > omsb); 1306 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1307 (extendedPrecision - 1) - omsb); 1308 exponent -= (extendedPrecision - 1) - omsb; 1309 } 1310 1311 /* Create new semantics. */ 1312 extendedSemantics = *semantics; 1313 extendedSemantics.precision = extendedPrecision; 1314 1315 if (newPartsCount == 1) 1316 significand.part = fullSignificand[0]; 1317 else 1318 significand.parts = fullSignificand; 1319 semantics = &extendedSemantics; 1320 1321 // Make a copy so we can convert it to the extended semantics. 1322 // Note that we cannot convert the addend directly, as the extendedSemantics 1323 // is a local variable (which we take a reference to). 1324 IEEEFloat extendedAddend(addend); 1325 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 1326 assert(status == opOK); 1327 (void)status; 1328 1329 // Shift the significand of the addend right by one bit. This guarantees 1330 // that the high bit of the significand is zero (same as fullSignificand), 1331 // so the addition will overflow (if it does overflow at all) into the top bit. 1332 lost_fraction = extendedAddend.shiftSignificandRight(1); 1333 assert(lost_fraction == lfExactlyZero && 1334 "Lost precision while shifting addend for fused-multiply-add."); 1335 1336 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1337 1338 /* Restore our state. */ 1339 if (newPartsCount == 1) 1340 fullSignificand[0] = significand.part; 1341 significand = savedSignificand; 1342 semantics = savedSemantics; 1343 1344 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1345 } 1346 1347 // Convert the result having "2 * precision" significant-bits back to the one 1348 // having "precision" significant-bits. First, move the radix point from 1349 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1350 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1351 exponent -= precision + 1; 1352 1353 // In case MSB resides at the left-hand side of radix point, shift the 1354 // mantissa right by some amount to make sure the MSB reside right before 1355 // the radix point (i.e. "MSB . rest-significant-bits"). 1356 // 1357 // Note that the result is not normalized when "omsb < precision". So, the 1358 // caller needs to call IEEEFloat::normalize() if normalized value is 1359 // expected. 1360 if (omsb > precision) { 1361 unsigned int bits, significantParts; 1362 lostFraction lf; 1363 1364 bits = omsb - precision; 1365 significantParts = partCountForBits(omsb); 1366 lf = shiftRight(fullSignificand, significantParts, bits); 1367 lost_fraction = combineLostFractions(lf, lost_fraction); 1368 exponent += bits; 1369 } 1370 1371 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1372 1373 if (newPartsCount > 4) 1374 delete [] fullSignificand; 1375 1376 return lost_fraction; 1377 } 1378 1379 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1380 return multiplySignificand(rhs, IEEEFloat(*semantics)); 1381 } 1382 1383 /* Multiply the significands of LHS and RHS to DST. */ 1384 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1385 unsigned int bit, i, partsCount; 1386 const integerPart *rhsSignificand; 1387 integerPart *lhsSignificand, *dividend, *divisor; 1388 integerPart scratch[4]; 1389 lostFraction lost_fraction; 1390 1391 assert(semantics == rhs.semantics); 1392 1393 lhsSignificand = significandParts(); 1394 rhsSignificand = rhs.significandParts(); 1395 partsCount = partCount(); 1396 1397 if (partsCount > 2) 1398 dividend = new integerPart[partsCount * 2]; 1399 else 1400 dividend = scratch; 1401 1402 divisor = dividend + partsCount; 1403 1404 /* Copy the dividend and divisor as they will be modified in-place. */ 1405 for (i = 0; i < partsCount; i++) { 1406 dividend[i] = lhsSignificand[i]; 1407 divisor[i] = rhsSignificand[i]; 1408 lhsSignificand[i] = 0; 1409 } 1410 1411 exponent -= rhs.exponent; 1412 1413 unsigned int precision = semantics->precision; 1414 1415 /* Normalize the divisor. */ 1416 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1417 if (bit) { 1418 exponent += bit; 1419 APInt::tcShiftLeft(divisor, partsCount, bit); 1420 } 1421 1422 /* Normalize the dividend. */ 1423 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1424 if (bit) { 1425 exponent -= bit; 1426 APInt::tcShiftLeft(dividend, partsCount, bit); 1427 } 1428 1429 /* Ensure the dividend >= divisor initially for the loop below. 1430 Incidentally, this means that the division loop below is 1431 guaranteed to set the integer bit to one. */ 1432 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1433 exponent--; 1434 APInt::tcShiftLeft(dividend, partsCount, 1); 1435 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1436 } 1437 1438 /* Long division. */ 1439 for (bit = precision; bit; bit -= 1) { 1440 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1441 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1442 APInt::tcSetBit(lhsSignificand, bit - 1); 1443 } 1444 1445 APInt::tcShiftLeft(dividend, partsCount, 1); 1446 } 1447 1448 /* Figure out the lost fraction. */ 1449 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1450 1451 if (cmp > 0) 1452 lost_fraction = lfMoreThanHalf; 1453 else if (cmp == 0) 1454 lost_fraction = lfExactlyHalf; 1455 else if (APInt::tcIsZero(dividend, partsCount)) 1456 lost_fraction = lfExactlyZero; 1457 else 1458 lost_fraction = lfLessThanHalf; 1459 1460 if (partsCount > 2) 1461 delete [] dividend; 1462 1463 return lost_fraction; 1464 } 1465 1466 unsigned int IEEEFloat::significandMSB() const { 1467 return APInt::tcMSB(significandParts(), partCount()); 1468 } 1469 1470 unsigned int IEEEFloat::significandLSB() const { 1471 return APInt::tcLSB(significandParts(), partCount()); 1472 } 1473 1474 /* Note that a zero result is NOT normalized to fcZero. */ 1475 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1476 /* Our exponent should not overflow. */ 1477 assert((ExponentType) (exponent + bits) >= exponent); 1478 1479 exponent += bits; 1480 1481 return shiftRight(significandParts(), partCount(), bits); 1482 } 1483 1484 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1485 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1486 assert(bits < semantics->precision); 1487 1488 if (bits) { 1489 unsigned int partsCount = partCount(); 1490 1491 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1492 exponent -= bits; 1493 1494 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1495 } 1496 } 1497 1498 IEEEFloat::cmpResult 1499 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1500 int compare; 1501 1502 assert(semantics == rhs.semantics); 1503 assert(isFiniteNonZero()); 1504 assert(rhs.isFiniteNonZero()); 1505 1506 compare = exponent - rhs.exponent; 1507 1508 /* If exponents are equal, do an unsigned bignum comparison of the 1509 significands. */ 1510 if (compare == 0) 1511 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1512 partCount()); 1513 1514 if (compare > 0) 1515 return cmpGreaterThan; 1516 else if (compare < 0) 1517 return cmpLessThan; 1518 else 1519 return cmpEqual; 1520 } 1521 1522 /* Set the least significant BITS bits of a bignum, clear the 1523 rest. */ 1524 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1525 unsigned bits) { 1526 unsigned i = 0; 1527 while (bits > APInt::APINT_BITS_PER_WORD) { 1528 dst[i++] = ~(APInt::WordType)0; 1529 bits -= APInt::APINT_BITS_PER_WORD; 1530 } 1531 1532 if (bits) 1533 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1534 1535 while (i < parts) 1536 dst[i++] = 0; 1537 } 1538 1539 /* Handle overflow. Sign is preserved. We either become infinity or 1540 the largest finite number. */ 1541 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1542 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1543 /* Infinity? */ 1544 if (rounding_mode == rmNearestTiesToEven || 1545 rounding_mode == rmNearestTiesToAway || 1546 (rounding_mode == rmTowardPositive && !sign) || 1547 (rounding_mode == rmTowardNegative && sign)) { 1548 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1549 makeNaN(false, sign); 1550 else 1551 category = fcInfinity; 1552 return static_cast<opStatus>(opOverflow | opInexact); 1553 } 1554 } 1555 1556 /* Otherwise we become the largest finite number. */ 1557 category = fcNormal; 1558 exponent = semantics->maxExponent; 1559 tcSetLeastSignificantBits(significandParts(), partCount(), 1560 semantics->precision); 1561 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1562 semantics->nanEncoding == fltNanEncoding::AllOnes) 1563 APInt::tcClearBit(significandParts(), 0); 1564 1565 return opInexact; 1566 } 1567 1568 /* Returns TRUE if, when truncating the current number, with BIT the 1569 new LSB, with the given lost fraction and rounding mode, the result 1570 would need to be rounded away from zero (i.e., by increasing the 1571 signficand). This routine must work for fcZero of both signs, and 1572 fcNormal numbers. */ 1573 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1574 lostFraction lost_fraction, 1575 unsigned int bit) const { 1576 /* NaNs and infinities should not have lost fractions. */ 1577 assert(isFiniteNonZero() || category == fcZero); 1578 1579 /* Current callers never pass this so we don't handle it. */ 1580 assert(lost_fraction != lfExactlyZero); 1581 1582 switch (rounding_mode) { 1583 case rmNearestTiesToAway: 1584 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1585 1586 case rmNearestTiesToEven: 1587 if (lost_fraction == lfMoreThanHalf) 1588 return true; 1589 1590 /* Our zeroes don't have a significand to test. */ 1591 if (lost_fraction == lfExactlyHalf && category != fcZero) 1592 return APInt::tcExtractBit(significandParts(), bit); 1593 1594 return false; 1595 1596 case rmTowardZero: 1597 return false; 1598 1599 case rmTowardPositive: 1600 return !sign; 1601 1602 case rmTowardNegative: 1603 return sign; 1604 1605 default: 1606 break; 1607 } 1608 llvm_unreachable("Invalid rounding mode found"); 1609 } 1610 1611 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1612 lostFraction lost_fraction) { 1613 unsigned int omsb; /* One, not zero, based MSB. */ 1614 int exponentChange; 1615 1616 if (!isFiniteNonZero()) 1617 return opOK; 1618 1619 /* Before rounding normalize the exponent of fcNormal numbers. */ 1620 omsb = significandMSB() + 1; 1621 1622 if (omsb) { 1623 /* OMSB is numbered from 1. We want to place it in the integer 1624 bit numbered PRECISION if possible, with a compensating change in 1625 the exponent. */ 1626 exponentChange = omsb - semantics->precision; 1627 1628 /* If the resulting exponent is too high, overflow according to 1629 the rounding mode. */ 1630 if (exponent + exponentChange > semantics->maxExponent) 1631 return handleOverflow(rounding_mode); 1632 1633 /* Subnormal numbers have exponent minExponent, and their MSB 1634 is forced based on that. */ 1635 if (exponent + exponentChange < semantics->minExponent) 1636 exponentChange = semantics->minExponent - exponent; 1637 1638 /* Shifting left is easy as we don't lose precision. */ 1639 if (exponentChange < 0) { 1640 assert(lost_fraction == lfExactlyZero); 1641 1642 shiftSignificandLeft(-exponentChange); 1643 1644 return opOK; 1645 } 1646 1647 if (exponentChange > 0) { 1648 lostFraction lf; 1649 1650 /* Shift right and capture any new lost fraction. */ 1651 lf = shiftSignificandRight(exponentChange); 1652 1653 lost_fraction = combineLostFractions(lf, lost_fraction); 1654 1655 /* Keep OMSB up-to-date. */ 1656 if (omsb > (unsigned) exponentChange) 1657 omsb -= exponentChange; 1658 else 1659 omsb = 0; 1660 } 1661 } 1662 1663 // The all-ones values is an overflow if NaN is all ones. If NaN is 1664 // represented by negative zero, then it is a valid finite value. 1665 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1666 semantics->nanEncoding == fltNanEncoding::AllOnes && 1667 exponent == semantics->maxExponent && isSignificandAllOnes()) 1668 return handleOverflow(rounding_mode); 1669 1670 /* Now round the number according to rounding_mode given the lost 1671 fraction. */ 1672 1673 /* As specified in IEEE 754, since we do not trap we do not report 1674 underflow for exact results. */ 1675 if (lost_fraction == lfExactlyZero) { 1676 /* Canonicalize zeroes. */ 1677 if (omsb == 0) { 1678 category = fcZero; 1679 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1680 sign = false; 1681 } 1682 1683 return opOK; 1684 } 1685 1686 /* Increment the significand if we're rounding away from zero. */ 1687 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1688 if (omsb == 0) 1689 exponent = semantics->minExponent; 1690 1691 incrementSignificand(); 1692 omsb = significandMSB() + 1; 1693 1694 /* Did the significand increment overflow? */ 1695 if (omsb == (unsigned) semantics->precision + 1) { 1696 /* Renormalize by incrementing the exponent and shifting our 1697 significand right one. However if we already have the 1698 maximum exponent we overflow to infinity. */ 1699 if (exponent == semantics->maxExponent) 1700 // Invoke overflow handling with a rounding mode that will guarantee 1701 // that the result gets turned into the correct infinity representation. 1702 // This is needed instead of just setting the category to infinity to 1703 // account for 8-bit floating point types that have no inf, only NaN. 1704 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1705 1706 shiftSignificandRight(1); 1707 1708 return opInexact; 1709 } 1710 1711 // The all-ones values is an overflow if NaN is all ones. If NaN is 1712 // represented by negative zero, then it is a valid finite value. 1713 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1714 semantics->nanEncoding == fltNanEncoding::AllOnes && 1715 exponent == semantics->maxExponent && isSignificandAllOnes()) 1716 return handleOverflow(rounding_mode); 1717 } 1718 1719 /* The normal case - we were and are not denormal, and any 1720 significand increment above didn't overflow. */ 1721 if (omsb == semantics->precision) 1722 return opInexact; 1723 1724 /* We have a non-zero denormal. */ 1725 assert(omsb < semantics->precision); 1726 1727 /* Canonicalize zeroes. */ 1728 if (omsb == 0) { 1729 category = fcZero; 1730 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1731 sign = false; 1732 } 1733 1734 /* The fcZero case is a denormal that underflowed to zero. */ 1735 return (opStatus) (opUnderflow | opInexact); 1736 } 1737 1738 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1739 bool subtract) { 1740 switch (PackCategoriesIntoKey(category, rhs.category)) { 1741 default: 1742 llvm_unreachable(nullptr); 1743 1744 case PackCategoriesIntoKey(fcZero, fcNaN): 1745 case PackCategoriesIntoKey(fcNormal, fcNaN): 1746 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1747 assign(rhs); 1748 [[fallthrough]]; 1749 case PackCategoriesIntoKey(fcNaN, fcZero): 1750 case PackCategoriesIntoKey(fcNaN, fcNormal): 1751 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1752 case PackCategoriesIntoKey(fcNaN, fcNaN): 1753 if (isSignaling()) { 1754 makeQuiet(); 1755 return opInvalidOp; 1756 } 1757 return rhs.isSignaling() ? opInvalidOp : opOK; 1758 1759 case PackCategoriesIntoKey(fcNormal, fcZero): 1760 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1761 case PackCategoriesIntoKey(fcInfinity, fcZero): 1762 return opOK; 1763 1764 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1765 case PackCategoriesIntoKey(fcZero, fcInfinity): 1766 category = fcInfinity; 1767 sign = rhs.sign ^ subtract; 1768 return opOK; 1769 1770 case PackCategoriesIntoKey(fcZero, fcNormal): 1771 assign(rhs); 1772 sign = rhs.sign ^ subtract; 1773 return opOK; 1774 1775 case PackCategoriesIntoKey(fcZero, fcZero): 1776 /* Sign depends on rounding mode; handled by caller. */ 1777 return opOK; 1778 1779 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1780 /* Differently signed infinities can only be validly 1781 subtracted. */ 1782 if (((sign ^ rhs.sign)!=0) != subtract) { 1783 makeNaN(); 1784 return opInvalidOp; 1785 } 1786 1787 return opOK; 1788 1789 case PackCategoriesIntoKey(fcNormal, fcNormal): 1790 return opDivByZero; 1791 } 1792 } 1793 1794 /* Add or subtract two normal numbers. */ 1795 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1796 bool subtract) { 1797 integerPart carry; 1798 lostFraction lost_fraction; 1799 int bits; 1800 1801 /* Determine if the operation on the absolute values is effectively 1802 an addition or subtraction. */ 1803 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1804 1805 /* Are we bigger exponent-wise than the RHS? */ 1806 bits = exponent - rhs.exponent; 1807 1808 /* Subtraction is more subtle than one might naively expect. */ 1809 if (subtract) { 1810 IEEEFloat temp_rhs(rhs); 1811 1812 if (bits == 0) 1813 lost_fraction = lfExactlyZero; 1814 else if (bits > 0) { 1815 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1816 shiftSignificandLeft(1); 1817 } else { 1818 lost_fraction = shiftSignificandRight(-bits - 1); 1819 temp_rhs.shiftSignificandLeft(1); 1820 } 1821 1822 // Should we reverse the subtraction. 1823 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1824 carry = temp_rhs.subtractSignificand 1825 (*this, lost_fraction != lfExactlyZero); 1826 copySignificand(temp_rhs); 1827 sign = !sign; 1828 } else { 1829 carry = subtractSignificand 1830 (temp_rhs, lost_fraction != lfExactlyZero); 1831 } 1832 1833 /* Invert the lost fraction - it was on the RHS and 1834 subtracted. */ 1835 if (lost_fraction == lfLessThanHalf) 1836 lost_fraction = lfMoreThanHalf; 1837 else if (lost_fraction == lfMoreThanHalf) 1838 lost_fraction = lfLessThanHalf; 1839 1840 /* The code above is intended to ensure that no borrow is 1841 necessary. */ 1842 assert(!carry); 1843 (void)carry; 1844 } else { 1845 if (bits > 0) { 1846 IEEEFloat temp_rhs(rhs); 1847 1848 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1849 carry = addSignificand(temp_rhs); 1850 } else { 1851 lost_fraction = shiftSignificandRight(-bits); 1852 carry = addSignificand(rhs); 1853 } 1854 1855 /* We have a guard bit; generating a carry cannot happen. */ 1856 assert(!carry); 1857 (void)carry; 1858 } 1859 1860 return lost_fraction; 1861 } 1862 1863 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1864 switch (PackCategoriesIntoKey(category, rhs.category)) { 1865 default: 1866 llvm_unreachable(nullptr); 1867 1868 case PackCategoriesIntoKey(fcZero, fcNaN): 1869 case PackCategoriesIntoKey(fcNormal, fcNaN): 1870 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1871 assign(rhs); 1872 sign = false; 1873 [[fallthrough]]; 1874 case PackCategoriesIntoKey(fcNaN, fcZero): 1875 case PackCategoriesIntoKey(fcNaN, fcNormal): 1876 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1877 case PackCategoriesIntoKey(fcNaN, fcNaN): 1878 sign ^= rhs.sign; // restore the original sign 1879 if (isSignaling()) { 1880 makeQuiet(); 1881 return opInvalidOp; 1882 } 1883 return rhs.isSignaling() ? opInvalidOp : opOK; 1884 1885 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1886 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1887 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1888 category = fcInfinity; 1889 return opOK; 1890 1891 case PackCategoriesIntoKey(fcZero, fcNormal): 1892 case PackCategoriesIntoKey(fcNormal, fcZero): 1893 case PackCategoriesIntoKey(fcZero, fcZero): 1894 category = fcZero; 1895 return opOK; 1896 1897 case PackCategoriesIntoKey(fcZero, fcInfinity): 1898 case PackCategoriesIntoKey(fcInfinity, fcZero): 1899 makeNaN(); 1900 return opInvalidOp; 1901 1902 case PackCategoriesIntoKey(fcNormal, fcNormal): 1903 return opOK; 1904 } 1905 } 1906 1907 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1908 switch (PackCategoriesIntoKey(category, rhs.category)) { 1909 default: 1910 llvm_unreachable(nullptr); 1911 1912 case PackCategoriesIntoKey(fcZero, fcNaN): 1913 case PackCategoriesIntoKey(fcNormal, fcNaN): 1914 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1915 assign(rhs); 1916 sign = false; 1917 [[fallthrough]]; 1918 case PackCategoriesIntoKey(fcNaN, fcZero): 1919 case PackCategoriesIntoKey(fcNaN, fcNormal): 1920 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1921 case PackCategoriesIntoKey(fcNaN, fcNaN): 1922 sign ^= rhs.sign; // restore the original sign 1923 if (isSignaling()) { 1924 makeQuiet(); 1925 return opInvalidOp; 1926 } 1927 return rhs.isSignaling() ? opInvalidOp : opOK; 1928 1929 case PackCategoriesIntoKey(fcInfinity, fcZero): 1930 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1931 case PackCategoriesIntoKey(fcZero, fcInfinity): 1932 case PackCategoriesIntoKey(fcZero, fcNormal): 1933 return opOK; 1934 1935 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1936 category = fcZero; 1937 return opOK; 1938 1939 case PackCategoriesIntoKey(fcNormal, fcZero): 1940 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1941 makeNaN(false, sign); 1942 else 1943 category = fcInfinity; 1944 return opDivByZero; 1945 1946 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1947 case PackCategoriesIntoKey(fcZero, fcZero): 1948 makeNaN(); 1949 return opInvalidOp; 1950 1951 case PackCategoriesIntoKey(fcNormal, fcNormal): 1952 return opOK; 1953 } 1954 } 1955 1956 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 1957 switch (PackCategoriesIntoKey(category, rhs.category)) { 1958 default: 1959 llvm_unreachable(nullptr); 1960 1961 case PackCategoriesIntoKey(fcZero, fcNaN): 1962 case PackCategoriesIntoKey(fcNormal, fcNaN): 1963 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1964 assign(rhs); 1965 [[fallthrough]]; 1966 case PackCategoriesIntoKey(fcNaN, fcZero): 1967 case PackCategoriesIntoKey(fcNaN, fcNormal): 1968 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1969 case PackCategoriesIntoKey(fcNaN, fcNaN): 1970 if (isSignaling()) { 1971 makeQuiet(); 1972 return opInvalidOp; 1973 } 1974 return rhs.isSignaling() ? opInvalidOp : opOK; 1975 1976 case PackCategoriesIntoKey(fcZero, fcInfinity): 1977 case PackCategoriesIntoKey(fcZero, fcNormal): 1978 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1979 return opOK; 1980 1981 case PackCategoriesIntoKey(fcNormal, fcZero): 1982 case PackCategoriesIntoKey(fcInfinity, fcZero): 1983 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1984 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1985 case PackCategoriesIntoKey(fcZero, fcZero): 1986 makeNaN(); 1987 return opInvalidOp; 1988 1989 case PackCategoriesIntoKey(fcNormal, fcNormal): 1990 return opOK; 1991 } 1992 } 1993 1994 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 1995 switch (PackCategoriesIntoKey(category, rhs.category)) { 1996 default: 1997 llvm_unreachable(nullptr); 1998 1999 case PackCategoriesIntoKey(fcZero, fcNaN): 2000 case PackCategoriesIntoKey(fcNormal, fcNaN): 2001 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2002 assign(rhs); 2003 [[fallthrough]]; 2004 case PackCategoriesIntoKey(fcNaN, fcZero): 2005 case PackCategoriesIntoKey(fcNaN, fcNormal): 2006 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2007 case PackCategoriesIntoKey(fcNaN, fcNaN): 2008 if (isSignaling()) { 2009 makeQuiet(); 2010 return opInvalidOp; 2011 } 2012 return rhs.isSignaling() ? opInvalidOp : opOK; 2013 2014 case PackCategoriesIntoKey(fcZero, fcInfinity): 2015 case PackCategoriesIntoKey(fcZero, fcNormal): 2016 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2017 return opOK; 2018 2019 case PackCategoriesIntoKey(fcNormal, fcZero): 2020 case PackCategoriesIntoKey(fcInfinity, fcZero): 2021 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2022 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2023 case PackCategoriesIntoKey(fcZero, fcZero): 2024 makeNaN(); 2025 return opInvalidOp; 2026 2027 case PackCategoriesIntoKey(fcNormal, fcNormal): 2028 return opDivByZero; // fake status, indicating this is not a special case 2029 } 2030 } 2031 2032 /* Change sign. */ 2033 void IEEEFloat::changeSign() { 2034 // With NaN-as-negative-zero, neither NaN or negative zero can change 2035 // their signs. 2036 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2037 (isZero() || isNaN())) 2038 return; 2039 /* Look mummy, this one's easy. */ 2040 sign = !sign; 2041 } 2042 2043 /* Normalized addition or subtraction. */ 2044 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2045 roundingMode rounding_mode, 2046 bool subtract) { 2047 opStatus fs; 2048 2049 fs = addOrSubtractSpecials(rhs, subtract); 2050 2051 /* This return code means it was not a simple case. */ 2052 if (fs == opDivByZero) { 2053 lostFraction lost_fraction; 2054 2055 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2056 fs = normalize(rounding_mode, lost_fraction); 2057 2058 /* Can only be zero if we lost no fraction. */ 2059 assert(category != fcZero || lost_fraction == lfExactlyZero); 2060 } 2061 2062 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2063 positive zero unless rounding to minus infinity, except that 2064 adding two like-signed zeroes gives that zero. */ 2065 if (category == fcZero) { 2066 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2067 sign = (rounding_mode == rmTowardNegative); 2068 // NaN-in-negative-zero means zeros need to be normalized to +0. 2069 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2070 sign = false; 2071 } 2072 2073 return fs; 2074 } 2075 2076 /* Normalized addition. */ 2077 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2078 roundingMode rounding_mode) { 2079 return addOrSubtract(rhs, rounding_mode, false); 2080 } 2081 2082 /* Normalized subtraction. */ 2083 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2084 roundingMode rounding_mode) { 2085 return addOrSubtract(rhs, rounding_mode, true); 2086 } 2087 2088 /* Normalized multiply. */ 2089 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2090 roundingMode rounding_mode) { 2091 opStatus fs; 2092 2093 sign ^= rhs.sign; 2094 fs = multiplySpecials(rhs); 2095 2096 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2097 sign = false; 2098 if (isFiniteNonZero()) { 2099 lostFraction lost_fraction = multiplySignificand(rhs); 2100 fs = normalize(rounding_mode, lost_fraction); 2101 if (lost_fraction != lfExactlyZero) 2102 fs = (opStatus) (fs | opInexact); 2103 } 2104 2105 return fs; 2106 } 2107 2108 /* Normalized divide. */ 2109 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2110 roundingMode rounding_mode) { 2111 opStatus fs; 2112 2113 sign ^= rhs.sign; 2114 fs = divideSpecials(rhs); 2115 2116 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2117 sign = false; 2118 if (isFiniteNonZero()) { 2119 lostFraction lost_fraction = divideSignificand(rhs); 2120 fs = normalize(rounding_mode, lost_fraction); 2121 if (lost_fraction != lfExactlyZero) 2122 fs = (opStatus) (fs | opInexact); 2123 } 2124 2125 return fs; 2126 } 2127 2128 /* Normalized remainder. */ 2129 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2130 opStatus fs; 2131 unsigned int origSign = sign; 2132 2133 // First handle the special cases. 2134 fs = remainderSpecials(rhs); 2135 if (fs != opDivByZero) 2136 return fs; 2137 2138 fs = opOK; 2139 2140 // Make sure the current value is less than twice the denom. If the addition 2141 // did not succeed (an overflow has happened), which means that the finite 2142 // value we currently posses must be less than twice the denom (as we are 2143 // using the same semantics). 2144 IEEEFloat P2 = rhs; 2145 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2146 fs = mod(P2); 2147 assert(fs == opOK); 2148 } 2149 2150 // Lets work with absolute numbers. 2151 IEEEFloat P = rhs; 2152 P.sign = false; 2153 sign = false; 2154 2155 // 2156 // To calculate the remainder we use the following scheme. 2157 // 2158 // The remainder is defained as follows: 2159 // 2160 // remainder = numer - rquot * denom = x - r * p 2161 // 2162 // Where r is the result of: x/p, rounded toward the nearest integral value 2163 // (with halfway cases rounded toward the even number). 2164 // 2165 // Currently, (after x mod 2p): 2166 // r is the number of 2p's present inside x, which is inherently, an even 2167 // number of p's. 2168 // 2169 // We may split the remaining calculation into 4 options: 2170 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2171 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2172 // are done as well. 2173 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2174 // to subtract 1p at least once. 2175 // - if x >= p then we must subtract p at least once, as x must be a 2176 // remainder. 2177 // 2178 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2179 // 2180 // We can now split the remaining calculation to the following 3 options: 2181 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2182 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2183 // must round up to the next even number. so we must subtract p once more. 2184 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2185 // integral, and subtract p once more. 2186 // 2187 2188 // Extend the semantics to prevent an overflow/underflow or inexact result. 2189 bool losesInfo; 2190 fltSemantics extendedSemantics = *semantics; 2191 extendedSemantics.maxExponent++; 2192 extendedSemantics.minExponent--; 2193 extendedSemantics.precision += 2; 2194 2195 IEEEFloat VEx = *this; 2196 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2197 assert(fs == opOK && !losesInfo); 2198 IEEEFloat PEx = P; 2199 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2200 assert(fs == opOK && !losesInfo); 2201 2202 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2203 // any fraction. 2204 fs = VEx.add(VEx, rmNearestTiesToEven); 2205 assert(fs == opOK); 2206 2207 if (VEx.compare(PEx) == cmpGreaterThan) { 2208 fs = subtract(P, rmNearestTiesToEven); 2209 assert(fs == opOK); 2210 2211 // Make VEx = this.add(this), but because we have different semantics, we do 2212 // not want to `convert` again, so we just subtract PEx twice (which equals 2213 // to the desired value). 2214 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2215 assert(fs == opOK); 2216 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2217 assert(fs == opOK); 2218 2219 cmpResult result = VEx.compare(PEx); 2220 if (result == cmpGreaterThan || result == cmpEqual) { 2221 fs = subtract(P, rmNearestTiesToEven); 2222 assert(fs == opOK); 2223 } 2224 } 2225 2226 if (isZero()) { 2227 sign = origSign; // IEEE754 requires this 2228 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2229 // But some 8-bit floats only have positive 0. 2230 sign = false; 2231 } 2232 2233 else 2234 sign ^= origSign; 2235 return fs; 2236 } 2237 2238 /* Normalized llvm frem (C fmod). */ 2239 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2240 opStatus fs; 2241 fs = modSpecials(rhs); 2242 unsigned int origSign = sign; 2243 2244 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2245 compareAbsoluteValue(rhs) != cmpLessThan) { 2246 int Exp = ilogb(*this) - ilogb(rhs); 2247 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2248 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2249 // check for it. 2250 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2251 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2252 V.sign = sign; 2253 2254 fs = subtract(V, rmNearestTiesToEven); 2255 assert(fs==opOK); 2256 } 2257 if (isZero()) { 2258 sign = origSign; // fmod requires this 2259 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2260 sign = false; 2261 } 2262 return fs; 2263 } 2264 2265 /* Normalized fused-multiply-add. */ 2266 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2267 const IEEEFloat &addend, 2268 roundingMode rounding_mode) { 2269 opStatus fs; 2270 2271 /* Post-multiplication sign, before addition. */ 2272 sign ^= multiplicand.sign; 2273 2274 /* If and only if all arguments are normal do we need to do an 2275 extended-precision calculation. */ 2276 if (isFiniteNonZero() && 2277 multiplicand.isFiniteNonZero() && 2278 addend.isFinite()) { 2279 lostFraction lost_fraction; 2280 2281 lost_fraction = multiplySignificand(multiplicand, addend); 2282 fs = normalize(rounding_mode, lost_fraction); 2283 if (lost_fraction != lfExactlyZero) 2284 fs = (opStatus) (fs | opInexact); 2285 2286 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2287 positive zero unless rounding to minus infinity, except that 2288 adding two like-signed zeroes gives that zero. */ 2289 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2290 sign = (rounding_mode == rmTowardNegative); 2291 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2292 sign = false; 2293 } 2294 } else { 2295 fs = multiplySpecials(multiplicand); 2296 2297 /* FS can only be opOK or opInvalidOp. There is no more work 2298 to do in the latter case. The IEEE-754R standard says it is 2299 implementation-defined in this case whether, if ADDEND is a 2300 quiet NaN, we raise invalid op; this implementation does so. 2301 2302 If we need to do the addition we can do so with normal 2303 precision. */ 2304 if (fs == opOK) 2305 fs = addOrSubtract(addend, rounding_mode, false); 2306 } 2307 2308 return fs; 2309 } 2310 2311 /* Rounding-mode correct round to integral value. */ 2312 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2313 opStatus fs; 2314 2315 if (isInfinity()) 2316 // [IEEE Std 754-2008 6.1]: 2317 // The behavior of infinity in floating-point arithmetic is derived from the 2318 // limiting cases of real arithmetic with operands of arbitrarily 2319 // large magnitude, when such a limit exists. 2320 // ... 2321 // Operations on infinite operands are usually exact and therefore signal no 2322 // exceptions ... 2323 return opOK; 2324 2325 if (isNaN()) { 2326 if (isSignaling()) { 2327 // [IEEE Std 754-2008 6.2]: 2328 // Under default exception handling, any operation signaling an invalid 2329 // operation exception and for which a floating-point result is to be 2330 // delivered shall deliver a quiet NaN. 2331 makeQuiet(); 2332 // [IEEE Std 754-2008 6.2]: 2333 // Signaling NaNs shall be reserved operands that, under default exception 2334 // handling, signal the invalid operation exception(see 7.2) for every 2335 // general-computational and signaling-computational operation except for 2336 // the conversions described in 5.12. 2337 return opInvalidOp; 2338 } else { 2339 // [IEEE Std 754-2008 6.2]: 2340 // For an operation with quiet NaN inputs, other than maximum and minimum 2341 // operations, if a floating-point result is to be delivered the result 2342 // shall be a quiet NaN which should be one of the input NaNs. 2343 // ... 2344 // Every general-computational and quiet-computational operation involving 2345 // one or more input NaNs, none of them signaling, shall signal no 2346 // exception, except fusedMultiplyAdd might signal the invalid operation 2347 // exception(see 7.2). 2348 return opOK; 2349 } 2350 } 2351 2352 if (isZero()) { 2353 // [IEEE Std 754-2008 6.3]: 2354 // ... the sign of the result of conversions, the quantize operation, the 2355 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2356 // the sign of the first or only operand. 2357 return opOK; 2358 } 2359 2360 // If the exponent is large enough, we know that this value is already 2361 // integral, and the arithmetic below would potentially cause it to saturate 2362 // to +/-Inf. Bail out early instead. 2363 if (exponent+1 >= (int)semanticsPrecision(*semantics)) 2364 return opOK; 2365 2366 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2367 // precision of our format, and then subtract it back off again. The choice 2368 // of rounding modes for the addition/subtraction determines the rounding mode 2369 // for our integral rounding as well. 2370 // NOTE: When the input value is negative, we do subtraction followed by 2371 // addition instead. 2372 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 2373 IntegerConstant <<= semanticsPrecision(*semantics)-1; 2374 IEEEFloat MagicConstant(*semantics); 2375 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2376 rmNearestTiesToEven); 2377 assert(fs == opOK); 2378 MagicConstant.sign = sign; 2379 2380 // Preserve the input sign so that we can handle the case of zero result 2381 // correctly. 2382 bool inputSign = isNegative(); 2383 2384 fs = add(MagicConstant, rounding_mode); 2385 2386 // Current value and 'MagicConstant' are both integers, so the result of the 2387 // subtraction is always exact according to Sterbenz' lemma. 2388 subtract(MagicConstant, rounding_mode); 2389 2390 // Restore the input sign. 2391 if (inputSign != isNegative()) 2392 changeSign(); 2393 2394 return fs; 2395 } 2396 2397 2398 /* Comparison requires normalized numbers. */ 2399 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2400 cmpResult result; 2401 2402 assert(semantics == rhs.semantics); 2403 2404 switch (PackCategoriesIntoKey(category, rhs.category)) { 2405 default: 2406 llvm_unreachable(nullptr); 2407 2408 case PackCategoriesIntoKey(fcNaN, fcZero): 2409 case PackCategoriesIntoKey(fcNaN, fcNormal): 2410 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2411 case PackCategoriesIntoKey(fcNaN, fcNaN): 2412 case PackCategoriesIntoKey(fcZero, fcNaN): 2413 case PackCategoriesIntoKey(fcNormal, fcNaN): 2414 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2415 return cmpUnordered; 2416 2417 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2418 case PackCategoriesIntoKey(fcInfinity, fcZero): 2419 case PackCategoriesIntoKey(fcNormal, fcZero): 2420 if (sign) 2421 return cmpLessThan; 2422 else 2423 return cmpGreaterThan; 2424 2425 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2426 case PackCategoriesIntoKey(fcZero, fcInfinity): 2427 case PackCategoriesIntoKey(fcZero, fcNormal): 2428 if (rhs.sign) 2429 return cmpGreaterThan; 2430 else 2431 return cmpLessThan; 2432 2433 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2434 if (sign == rhs.sign) 2435 return cmpEqual; 2436 else if (sign) 2437 return cmpLessThan; 2438 else 2439 return cmpGreaterThan; 2440 2441 case PackCategoriesIntoKey(fcZero, fcZero): 2442 return cmpEqual; 2443 2444 case PackCategoriesIntoKey(fcNormal, fcNormal): 2445 break; 2446 } 2447 2448 /* Two normal numbers. Do they have the same sign? */ 2449 if (sign != rhs.sign) { 2450 if (sign) 2451 result = cmpLessThan; 2452 else 2453 result = cmpGreaterThan; 2454 } else { 2455 /* Compare absolute values; invert result if negative. */ 2456 result = compareAbsoluteValue(rhs); 2457 2458 if (sign) { 2459 if (result == cmpLessThan) 2460 result = cmpGreaterThan; 2461 else if (result == cmpGreaterThan) 2462 result = cmpLessThan; 2463 } 2464 } 2465 2466 return result; 2467 } 2468 2469 /// IEEEFloat::convert - convert a value of one floating point type to another. 2470 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2471 /// records whether the transformation lost information, i.e. whether 2472 /// converting the result back to the original type will produce the 2473 /// original value (this is almost the same as return value==fsOK, but there 2474 /// are edge cases where this is not so). 2475 2476 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2477 roundingMode rounding_mode, 2478 bool *losesInfo) { 2479 lostFraction lostFraction; 2480 unsigned int newPartCount, oldPartCount; 2481 opStatus fs; 2482 int shift; 2483 const fltSemantics &fromSemantics = *semantics; 2484 bool is_signaling = isSignaling(); 2485 2486 lostFraction = lfExactlyZero; 2487 newPartCount = partCountForBits(toSemantics.precision + 1); 2488 oldPartCount = partCount(); 2489 shift = toSemantics.precision - fromSemantics.precision; 2490 2491 bool X86SpecialNan = false; 2492 if (&fromSemantics == &semX87DoubleExtended && 2493 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2494 (!(*significandParts() & 0x8000000000000000ULL) || 2495 !(*significandParts() & 0x4000000000000000ULL))) { 2496 // x86 has some unusual NaNs which cannot be represented in any other 2497 // format; note them here. 2498 X86SpecialNan = true; 2499 } 2500 2501 // If this is a truncation of a denormal number, and the target semantics 2502 // has larger exponent range than the source semantics (this can happen 2503 // when truncating from PowerPC double-double to double format), the 2504 // right shift could lose result mantissa bits. Adjust exponent instead 2505 // of performing excessive shift. 2506 // Also do a similar trick in case shifting denormal would produce zero 2507 // significand as this case isn't handled correctly by normalize. 2508 if (shift < 0 && isFiniteNonZero()) { 2509 int omsb = significandMSB() + 1; 2510 int exponentChange = omsb - fromSemantics.precision; 2511 if (exponent + exponentChange < toSemantics.minExponent) 2512 exponentChange = toSemantics.minExponent - exponent; 2513 if (exponentChange < shift) 2514 exponentChange = shift; 2515 if (exponentChange < 0) { 2516 shift -= exponentChange; 2517 exponent += exponentChange; 2518 } else if (omsb <= -shift) { 2519 exponentChange = omsb + shift - 1; // leave at least one bit set 2520 shift -= exponentChange; 2521 exponent += exponentChange; 2522 } 2523 } 2524 2525 // If this is a truncation, perform the shift before we narrow the storage. 2526 if (shift < 0 && (isFiniteNonZero() || 2527 (category == fcNaN && semantics->nonFiniteBehavior != 2528 fltNonfiniteBehavior::NanOnly))) 2529 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2530 2531 // Fix the storage so it can hold to new value. 2532 if (newPartCount > oldPartCount) { 2533 // The new type requires more storage; make it available. 2534 integerPart *newParts; 2535 newParts = new integerPart[newPartCount]; 2536 APInt::tcSet(newParts, 0, newPartCount); 2537 if (isFiniteNonZero() || category==fcNaN) 2538 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2539 freeSignificand(); 2540 significand.parts = newParts; 2541 } else if (newPartCount == 1 && oldPartCount != 1) { 2542 // Switch to built-in storage for a single part. 2543 integerPart newPart = 0; 2544 if (isFiniteNonZero() || category==fcNaN) 2545 newPart = significandParts()[0]; 2546 freeSignificand(); 2547 significand.part = newPart; 2548 } 2549 2550 // Now that we have the right storage, switch the semantics. 2551 semantics = &toSemantics; 2552 2553 // If this is an extension, perform the shift now that the storage is 2554 // available. 2555 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2556 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2557 2558 if (isFiniteNonZero()) { 2559 fs = normalize(rounding_mode, lostFraction); 2560 *losesInfo = (fs != opOK); 2561 } else if (category == fcNaN) { 2562 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2563 *losesInfo = 2564 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2565 makeNaN(false, sign); 2566 return is_signaling ? opInvalidOp : opOK; 2567 } 2568 2569 // If NaN is negative zero, we need to create a new NaN to avoid converting 2570 // NaN to -Inf. 2571 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2572 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2573 makeNaN(false, false); 2574 2575 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2576 2577 // For x87 extended precision, we want to make a NaN, not a special NaN if 2578 // the input wasn't special either. 2579 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2580 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2581 2582 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2583 // This also guarantees that a sNaN does not become Inf on a truncation 2584 // that loses all payload bits. 2585 if (is_signaling) { 2586 makeQuiet(); 2587 fs = opInvalidOp; 2588 } else { 2589 fs = opOK; 2590 } 2591 } else if (category == fcInfinity && 2592 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2593 makeNaN(false, sign); 2594 *losesInfo = true; 2595 fs = opInexact; 2596 } else if (category == fcZero && 2597 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2598 // Negative zero loses info, but positive zero doesn't. 2599 *losesInfo = 2600 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2601 fs = *losesInfo ? opInexact : opOK; 2602 // NaN is negative zero means -0 -> +0, which can lose information 2603 sign = false; 2604 } else { 2605 *losesInfo = false; 2606 fs = opOK; 2607 } 2608 2609 return fs; 2610 } 2611 2612 /* Convert a floating point number to an integer according to the 2613 rounding mode. If the rounded integer value is out of range this 2614 returns an invalid operation exception and the contents of the 2615 destination parts are unspecified. If the rounded value is in 2616 range but the floating point number is not the exact integer, the C 2617 standard doesn't require an inexact exception to be raised. IEEE 2618 854 does require it so we do that. 2619 2620 Note that for conversions to integer type the C standard requires 2621 round-to-zero to always be used. */ 2622 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2623 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2624 roundingMode rounding_mode, bool *isExact) const { 2625 lostFraction lost_fraction; 2626 const integerPart *src; 2627 unsigned int dstPartsCount, truncatedBits; 2628 2629 *isExact = false; 2630 2631 /* Handle the three special cases first. */ 2632 if (category == fcInfinity || category == fcNaN) 2633 return opInvalidOp; 2634 2635 dstPartsCount = partCountForBits(width); 2636 assert(dstPartsCount <= parts.size() && "Integer too big"); 2637 2638 if (category == fcZero) { 2639 APInt::tcSet(parts.data(), 0, dstPartsCount); 2640 // Negative zero can't be represented as an int. 2641 *isExact = !sign; 2642 return opOK; 2643 } 2644 2645 src = significandParts(); 2646 2647 /* Step 1: place our absolute value, with any fraction truncated, in 2648 the destination. */ 2649 if (exponent < 0) { 2650 /* Our absolute value is less than one; truncate everything. */ 2651 APInt::tcSet(parts.data(), 0, dstPartsCount); 2652 /* For exponent -1 the integer bit represents .5, look at that. 2653 For smaller exponents leftmost truncated bit is 0. */ 2654 truncatedBits = semantics->precision -1U - exponent; 2655 } else { 2656 /* We want the most significant (exponent + 1) bits; the rest are 2657 truncated. */ 2658 unsigned int bits = exponent + 1U; 2659 2660 /* Hopelessly large in magnitude? */ 2661 if (bits > width) 2662 return opInvalidOp; 2663 2664 if (bits < semantics->precision) { 2665 /* We truncate (semantics->precision - bits) bits. */ 2666 truncatedBits = semantics->precision - bits; 2667 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2668 } else { 2669 /* We want at least as many bits as are available. */ 2670 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2671 0); 2672 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2673 bits - semantics->precision); 2674 truncatedBits = 0; 2675 } 2676 } 2677 2678 /* Step 2: work out any lost fraction, and increment the absolute 2679 value if we would round away from zero. */ 2680 if (truncatedBits) { 2681 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2682 truncatedBits); 2683 if (lost_fraction != lfExactlyZero && 2684 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2685 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2686 return opInvalidOp; /* Overflow. */ 2687 } 2688 } else { 2689 lost_fraction = lfExactlyZero; 2690 } 2691 2692 /* Step 3: check if we fit in the destination. */ 2693 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2694 2695 if (sign) { 2696 if (!isSigned) { 2697 /* Negative numbers cannot be represented as unsigned. */ 2698 if (omsb != 0) 2699 return opInvalidOp; 2700 } else { 2701 /* It takes omsb bits to represent the unsigned integer value. 2702 We lose a bit for the sign, but care is needed as the 2703 maximally negative integer is a special case. */ 2704 if (omsb == width && 2705 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2706 return opInvalidOp; 2707 2708 /* This case can happen because of rounding. */ 2709 if (omsb > width) 2710 return opInvalidOp; 2711 } 2712 2713 APInt::tcNegate (parts.data(), dstPartsCount); 2714 } else { 2715 if (omsb >= width + !isSigned) 2716 return opInvalidOp; 2717 } 2718 2719 if (lost_fraction == lfExactlyZero) { 2720 *isExact = true; 2721 return opOK; 2722 } else 2723 return opInexact; 2724 } 2725 2726 /* Same as convertToSignExtendedInteger, except we provide 2727 deterministic values in case of an invalid operation exception, 2728 namely zero for NaNs and the minimal or maximal value respectively 2729 for underflow or overflow. 2730 The *isExact output tells whether the result is exact, in the sense 2731 that converting it back to the original floating point type produces 2732 the original value. This is almost equivalent to result==opOK, 2733 except for negative zeroes. 2734 */ 2735 IEEEFloat::opStatus 2736 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2737 unsigned int width, bool isSigned, 2738 roundingMode rounding_mode, bool *isExact) const { 2739 opStatus fs; 2740 2741 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2742 isExact); 2743 2744 if (fs == opInvalidOp) { 2745 unsigned int bits, dstPartsCount; 2746 2747 dstPartsCount = partCountForBits(width); 2748 assert(dstPartsCount <= parts.size() && "Integer too big"); 2749 2750 if (category == fcNaN) 2751 bits = 0; 2752 else if (sign) 2753 bits = isSigned; 2754 else 2755 bits = width - isSigned; 2756 2757 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2758 if (sign && isSigned) 2759 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2760 } 2761 2762 return fs; 2763 } 2764 2765 /* Convert an unsigned integer SRC to a floating point number, 2766 rounding according to ROUNDING_MODE. The sign of the floating 2767 point number is not modified. */ 2768 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2769 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2770 unsigned int omsb, precision, dstCount; 2771 integerPart *dst; 2772 lostFraction lost_fraction; 2773 2774 category = fcNormal; 2775 omsb = APInt::tcMSB(src, srcCount) + 1; 2776 dst = significandParts(); 2777 dstCount = partCount(); 2778 precision = semantics->precision; 2779 2780 /* We want the most significant PRECISION bits of SRC. There may not 2781 be that many; extract what we can. */ 2782 if (precision <= omsb) { 2783 exponent = omsb - 1; 2784 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2785 omsb - precision); 2786 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2787 } else { 2788 exponent = precision - 1; 2789 lost_fraction = lfExactlyZero; 2790 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2791 } 2792 2793 return normalize(rounding_mode, lost_fraction); 2794 } 2795 2796 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2797 roundingMode rounding_mode) { 2798 unsigned int partCount = Val.getNumWords(); 2799 APInt api = Val; 2800 2801 sign = false; 2802 if (isSigned && api.isNegative()) { 2803 sign = true; 2804 api = -api; 2805 } 2806 2807 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2808 } 2809 2810 /* Convert a two's complement integer SRC to a floating point number, 2811 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2812 integer is signed, in which case it must be sign-extended. */ 2813 IEEEFloat::opStatus 2814 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2815 unsigned int srcCount, bool isSigned, 2816 roundingMode rounding_mode) { 2817 opStatus status; 2818 2819 if (isSigned && 2820 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2821 integerPart *copy; 2822 2823 /* If we're signed and negative negate a copy. */ 2824 sign = true; 2825 copy = new integerPart[srcCount]; 2826 APInt::tcAssign(copy, src, srcCount); 2827 APInt::tcNegate(copy, srcCount); 2828 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2829 delete [] copy; 2830 } else { 2831 sign = false; 2832 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2833 } 2834 2835 return status; 2836 } 2837 2838 /* FIXME: should this just take a const APInt reference? */ 2839 IEEEFloat::opStatus 2840 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2841 unsigned int width, bool isSigned, 2842 roundingMode rounding_mode) { 2843 unsigned int partCount = partCountForBits(width); 2844 APInt api = APInt(width, ArrayRef(parts, partCount)); 2845 2846 sign = false; 2847 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2848 sign = true; 2849 api = -api; 2850 } 2851 2852 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2853 } 2854 2855 Expected<IEEEFloat::opStatus> 2856 IEEEFloat::convertFromHexadecimalString(StringRef s, 2857 roundingMode rounding_mode) { 2858 lostFraction lost_fraction = lfExactlyZero; 2859 2860 category = fcNormal; 2861 zeroSignificand(); 2862 exponent = 0; 2863 2864 integerPart *significand = significandParts(); 2865 unsigned partsCount = partCount(); 2866 unsigned bitPos = partsCount * integerPartWidth; 2867 bool computedTrailingFraction = false; 2868 2869 // Skip leading zeroes and any (hexa)decimal point. 2870 StringRef::iterator begin = s.begin(); 2871 StringRef::iterator end = s.end(); 2872 StringRef::iterator dot; 2873 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2874 if (!PtrOrErr) 2875 return PtrOrErr.takeError(); 2876 StringRef::iterator p = *PtrOrErr; 2877 StringRef::iterator firstSignificantDigit = p; 2878 2879 while (p != end) { 2880 integerPart hex_value; 2881 2882 if (*p == '.') { 2883 if (dot != end) 2884 return createError("String contains multiple dots"); 2885 dot = p++; 2886 continue; 2887 } 2888 2889 hex_value = hexDigitValue(*p); 2890 if (hex_value == UINT_MAX) 2891 break; 2892 2893 p++; 2894 2895 // Store the number while we have space. 2896 if (bitPos) { 2897 bitPos -= 4; 2898 hex_value <<= bitPos % integerPartWidth; 2899 significand[bitPos / integerPartWidth] |= hex_value; 2900 } else if (!computedTrailingFraction) { 2901 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2902 if (!FractOrErr) 2903 return FractOrErr.takeError(); 2904 lost_fraction = *FractOrErr; 2905 computedTrailingFraction = true; 2906 } 2907 } 2908 2909 /* Hex floats require an exponent but not a hexadecimal point. */ 2910 if (p == end) 2911 return createError("Hex strings require an exponent"); 2912 if (*p != 'p' && *p != 'P') 2913 return createError("Invalid character in significand"); 2914 if (p == begin) 2915 return createError("Significand has no digits"); 2916 if (dot != end && p - begin == 1) 2917 return createError("Significand has no digits"); 2918 2919 /* Ignore the exponent if we are zero. */ 2920 if (p != firstSignificantDigit) { 2921 int expAdjustment; 2922 2923 /* Implicit hexadecimal point? */ 2924 if (dot == end) 2925 dot = p; 2926 2927 /* Calculate the exponent adjustment implicit in the number of 2928 significant digits. */ 2929 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2930 if (expAdjustment < 0) 2931 expAdjustment++; 2932 expAdjustment = expAdjustment * 4 - 1; 2933 2934 /* Adjust for writing the significand starting at the most 2935 significant nibble. */ 2936 expAdjustment += semantics->precision; 2937 expAdjustment -= partsCount * integerPartWidth; 2938 2939 /* Adjust for the given exponent. */ 2940 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 2941 if (!ExpOrErr) 2942 return ExpOrErr.takeError(); 2943 exponent = *ExpOrErr; 2944 } 2945 2946 return normalize(rounding_mode, lost_fraction); 2947 } 2948 2949 IEEEFloat::opStatus 2950 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2951 unsigned sigPartCount, int exp, 2952 roundingMode rounding_mode) { 2953 unsigned int parts, pow5PartCount; 2954 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 2955 integerPart pow5Parts[maxPowerOfFiveParts]; 2956 bool isNearest; 2957 2958 isNearest = (rounding_mode == rmNearestTiesToEven || 2959 rounding_mode == rmNearestTiesToAway); 2960 2961 parts = partCountForBits(semantics->precision + 11); 2962 2963 /* Calculate pow(5, abs(exp)). */ 2964 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2965 2966 for (;; parts *= 2) { 2967 opStatus sigStatus, powStatus; 2968 unsigned int excessPrecision, truncatedBits; 2969 2970 calcSemantics.precision = parts * integerPartWidth - 1; 2971 excessPrecision = calcSemantics.precision - semantics->precision; 2972 truncatedBits = excessPrecision; 2973 2974 IEEEFloat decSig(calcSemantics, uninitialized); 2975 decSig.makeZero(sign); 2976 IEEEFloat pow5(calcSemantics); 2977 2978 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2979 rmNearestTiesToEven); 2980 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2981 rmNearestTiesToEven); 2982 /* Add exp, as 10^n = 5^n * 2^n. */ 2983 decSig.exponent += exp; 2984 2985 lostFraction calcLostFraction; 2986 integerPart HUerr, HUdistance; 2987 unsigned int powHUerr; 2988 2989 if (exp >= 0) { 2990 /* multiplySignificand leaves the precision-th bit set to 1. */ 2991 calcLostFraction = decSig.multiplySignificand(pow5); 2992 powHUerr = powStatus != opOK; 2993 } else { 2994 calcLostFraction = decSig.divideSignificand(pow5); 2995 /* Denormal numbers have less precision. */ 2996 if (decSig.exponent < semantics->minExponent) { 2997 excessPrecision += (semantics->minExponent - decSig.exponent); 2998 truncatedBits = excessPrecision; 2999 if (excessPrecision > calcSemantics.precision) 3000 excessPrecision = calcSemantics.precision; 3001 } 3002 /* Extra half-ulp lost in reciprocal of exponent. */ 3003 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 3004 } 3005 3006 /* Both multiplySignificand and divideSignificand return the 3007 result with the integer bit set. */ 3008 assert(APInt::tcExtractBit 3009 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3010 3011 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3012 powHUerr); 3013 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3014 excessPrecision, isNearest); 3015 3016 /* Are we guaranteed to round correctly if we truncate? */ 3017 if (HUdistance >= HUerr) { 3018 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3019 calcSemantics.precision - excessPrecision, 3020 excessPrecision); 3021 /* Take the exponent of decSig. If we tcExtract-ed less bits 3022 above we must adjust our exponent to compensate for the 3023 implicit right shift. */ 3024 exponent = (decSig.exponent + semantics->precision 3025 - (calcSemantics.precision - excessPrecision)); 3026 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3027 decSig.partCount(), 3028 truncatedBits); 3029 return normalize(rounding_mode, calcLostFraction); 3030 } 3031 } 3032 } 3033 3034 Expected<IEEEFloat::opStatus> 3035 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3036 decimalInfo D; 3037 opStatus fs; 3038 3039 /* Scan the text. */ 3040 StringRef::iterator p = str.begin(); 3041 if (Error Err = interpretDecimal(p, str.end(), &D)) 3042 return std::move(Err); 3043 3044 /* Handle the quick cases. First the case of no significant digits, 3045 i.e. zero, and then exponents that are obviously too large or too 3046 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3047 definitely overflows if 3048 3049 (exp - 1) * L >= maxExponent 3050 3051 and definitely underflows to zero where 3052 3053 (exp + 1) * L <= minExponent - precision 3054 3055 With integer arithmetic the tightest bounds for L are 3056 3057 93/28 < L < 196/59 [ numerator <= 256 ] 3058 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3059 */ 3060 3061 // Test if we have a zero number allowing for strings with no null terminators 3062 // and zero decimals with non-zero exponents. 3063 // 3064 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3065 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3066 // be at most one dot. On the other hand, if we have a zero with a non-zero 3067 // exponent, then we know that D.firstSigDigit will be non-numeric. 3068 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3069 category = fcZero; 3070 fs = opOK; 3071 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3072 sign = false; 3073 3074 /* Check whether the normalized exponent is high enough to overflow 3075 max during the log-rebasing in the max-exponent check below. */ 3076 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3077 fs = handleOverflow(rounding_mode); 3078 3079 /* If it wasn't, then it also wasn't high enough to overflow max 3080 during the log-rebasing in the min-exponent check. Check that it 3081 won't overflow min in either check, then perform the min-exponent 3082 check. */ 3083 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3084 (D.normalizedExponent + 1) * 28738 <= 3085 8651 * (semantics->minExponent - (int) semantics->precision)) { 3086 /* Underflow to zero and round. */ 3087 category = fcNormal; 3088 zeroSignificand(); 3089 fs = normalize(rounding_mode, lfLessThanHalf); 3090 3091 /* We can finally safely perform the max-exponent check. */ 3092 } else if ((D.normalizedExponent - 1) * 42039 3093 >= 12655 * semantics->maxExponent) { 3094 /* Overflow and round. */ 3095 fs = handleOverflow(rounding_mode); 3096 } else { 3097 integerPart *decSignificand; 3098 unsigned int partCount; 3099 3100 /* A tight upper bound on number of bits required to hold an 3101 N-digit decimal integer is N * 196 / 59. Allocate enough space 3102 to hold the full significand, and an extra part required by 3103 tcMultiplyPart. */ 3104 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3105 partCount = partCountForBits(1 + 196 * partCount / 59); 3106 decSignificand = new integerPart[partCount + 1]; 3107 partCount = 0; 3108 3109 /* Convert to binary efficiently - we do almost all multiplication 3110 in an integerPart. When this would overflow do we do a single 3111 bignum multiplication, and then revert again to multiplication 3112 in an integerPart. */ 3113 do { 3114 integerPart decValue, val, multiplier; 3115 3116 val = 0; 3117 multiplier = 1; 3118 3119 do { 3120 if (*p == '.') { 3121 p++; 3122 if (p == str.end()) { 3123 break; 3124 } 3125 } 3126 decValue = decDigitValue(*p++); 3127 if (decValue >= 10U) { 3128 delete[] decSignificand; 3129 return createError("Invalid character in significand"); 3130 } 3131 multiplier *= 10; 3132 val = val * 10 + decValue; 3133 /* The maximum number that can be multiplied by ten with any 3134 digit added without overflowing an integerPart. */ 3135 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3136 3137 /* Multiply out the current part. */ 3138 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3139 partCount, partCount + 1, false); 3140 3141 /* If we used another part (likely but not guaranteed), increase 3142 the count. */ 3143 if (decSignificand[partCount]) 3144 partCount++; 3145 } while (p <= D.lastSigDigit); 3146 3147 category = fcNormal; 3148 fs = roundSignificandWithExponent(decSignificand, partCount, 3149 D.exponent, rounding_mode); 3150 3151 delete [] decSignificand; 3152 } 3153 3154 return fs; 3155 } 3156 3157 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3158 const size_t MIN_NAME_SIZE = 3; 3159 3160 if (str.size() < MIN_NAME_SIZE) 3161 return false; 3162 3163 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3164 makeInf(false); 3165 return true; 3166 } 3167 3168 bool IsNegative = str.front() == '-'; 3169 if (IsNegative) { 3170 str = str.drop_front(); 3171 if (str.size() < MIN_NAME_SIZE) 3172 return false; 3173 3174 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3175 makeInf(true); 3176 return true; 3177 } 3178 } 3179 3180 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3181 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3182 if (IsSignaling) { 3183 str = str.drop_front(); 3184 if (str.size() < MIN_NAME_SIZE) 3185 return false; 3186 } 3187 3188 if (str.starts_with("nan") || str.starts_with("NaN")) { 3189 str = str.drop_front(3); 3190 3191 // A NaN without payload. 3192 if (str.empty()) { 3193 makeNaN(IsSignaling, IsNegative); 3194 return true; 3195 } 3196 3197 // Allow the payload to be inside parentheses. 3198 if (str.front() == '(') { 3199 // Parentheses should be balanced (and not empty). 3200 if (str.size() <= 2 || str.back() != ')') 3201 return false; 3202 3203 str = str.slice(1, str.size() - 1); 3204 } 3205 3206 // Determine the payload number's radix. 3207 unsigned Radix = 10; 3208 if (str[0] == '0') { 3209 if (str.size() > 1 && tolower(str[1]) == 'x') { 3210 str = str.drop_front(2); 3211 Radix = 16; 3212 } else 3213 Radix = 8; 3214 } 3215 3216 // Parse the payload and make the NaN. 3217 APInt Payload; 3218 if (!str.getAsInteger(Radix, Payload)) { 3219 makeNaN(IsSignaling, IsNegative, &Payload); 3220 return true; 3221 } 3222 } 3223 3224 return false; 3225 } 3226 3227 Expected<IEEEFloat::opStatus> 3228 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3229 if (str.empty()) 3230 return createError("Invalid string length"); 3231 3232 // Handle special cases. 3233 if (convertFromStringSpecials(str)) 3234 return opOK; 3235 3236 /* Handle a leading minus sign. */ 3237 StringRef::iterator p = str.begin(); 3238 size_t slen = str.size(); 3239 sign = *p == '-' ? 1 : 0; 3240 if (*p == '-' || *p == '+') { 3241 p++; 3242 slen--; 3243 if (!slen) 3244 return createError("String has no digits"); 3245 } 3246 3247 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3248 if (slen == 2) 3249 return createError("Invalid string"); 3250 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3251 rounding_mode); 3252 } 3253 3254 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3255 } 3256 3257 /* Write out a hexadecimal representation of the floating point value 3258 to DST, which must be of sufficient size, in the C99 form 3259 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3260 excluding the terminating NUL. 3261 3262 If UPPERCASE, the output is in upper case, otherwise in lower case. 3263 3264 HEXDIGITS digits appear altogether, rounding the value if 3265 necessary. If HEXDIGITS is 0, the minimal precision to display the 3266 number precisely is used instead. If nothing would appear after 3267 the decimal point it is suppressed. 3268 3269 The decimal exponent is always printed and has at least one digit. 3270 Zero values display an exponent of zero. Infinities and NaNs 3271 appear as "infinity" or "nan" respectively. 3272 3273 The above rules are as specified by C99. There is ambiguity about 3274 what the leading hexadecimal digit should be. This implementation 3275 uses whatever is necessary so that the exponent is displayed as 3276 stored. This implies the exponent will fall within the IEEE format 3277 range, and the leading hexadecimal digit will be 0 (for denormals), 3278 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3279 any other digits zero). 3280 */ 3281 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3282 bool upperCase, 3283 roundingMode rounding_mode) const { 3284 char *p; 3285 3286 p = dst; 3287 if (sign) 3288 *dst++ = '-'; 3289 3290 switch (category) { 3291 case fcInfinity: 3292 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3293 dst += sizeof infinityL - 1; 3294 break; 3295 3296 case fcNaN: 3297 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3298 dst += sizeof NaNU - 1; 3299 break; 3300 3301 case fcZero: 3302 *dst++ = '0'; 3303 *dst++ = upperCase ? 'X': 'x'; 3304 *dst++ = '0'; 3305 if (hexDigits > 1) { 3306 *dst++ = '.'; 3307 memset (dst, '0', hexDigits - 1); 3308 dst += hexDigits - 1; 3309 } 3310 *dst++ = upperCase ? 'P': 'p'; 3311 *dst++ = '0'; 3312 break; 3313 3314 case fcNormal: 3315 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3316 break; 3317 } 3318 3319 *dst = 0; 3320 3321 return static_cast<unsigned int>(dst - p); 3322 } 3323 3324 /* Does the hard work of outputting the correctly rounded hexadecimal 3325 form of a normal floating point number with the specified number of 3326 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3327 digits necessary to print the value precisely is output. */ 3328 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3329 bool upperCase, 3330 roundingMode rounding_mode) const { 3331 unsigned int count, valueBits, shift, partsCount, outputDigits; 3332 const char *hexDigitChars; 3333 const integerPart *significand; 3334 char *p; 3335 bool roundUp; 3336 3337 *dst++ = '0'; 3338 *dst++ = upperCase ? 'X': 'x'; 3339 3340 roundUp = false; 3341 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3342 3343 significand = significandParts(); 3344 partsCount = partCount(); 3345 3346 /* +3 because the first digit only uses the single integer bit, so 3347 we have 3 virtual zero most-significant-bits. */ 3348 valueBits = semantics->precision + 3; 3349 shift = integerPartWidth - valueBits % integerPartWidth; 3350 3351 /* The natural number of digits required ignoring trailing 3352 insignificant zeroes. */ 3353 outputDigits = (valueBits - significandLSB () + 3) / 4; 3354 3355 /* hexDigits of zero means use the required number for the 3356 precision. Otherwise, see if we are truncating. If we are, 3357 find out if we need to round away from zero. */ 3358 if (hexDigits) { 3359 if (hexDigits < outputDigits) { 3360 /* We are dropping non-zero bits, so need to check how to round. 3361 "bits" is the number of dropped bits. */ 3362 unsigned int bits; 3363 lostFraction fraction; 3364 3365 bits = valueBits - hexDigits * 4; 3366 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3367 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3368 } 3369 outputDigits = hexDigits; 3370 } 3371 3372 /* Write the digits consecutively, and start writing in the location 3373 of the hexadecimal point. We move the most significant digit 3374 left and add the hexadecimal point later. */ 3375 p = ++dst; 3376 3377 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3378 3379 while (outputDigits && count) { 3380 integerPart part; 3381 3382 /* Put the most significant integerPartWidth bits in "part". */ 3383 if (--count == partsCount) 3384 part = 0; /* An imaginary higher zero part. */ 3385 else 3386 part = significand[count] << shift; 3387 3388 if (count && shift) 3389 part |= significand[count - 1] >> (integerPartWidth - shift); 3390 3391 /* Convert as much of "part" to hexdigits as we can. */ 3392 unsigned int curDigits = integerPartWidth / 4; 3393 3394 if (curDigits > outputDigits) 3395 curDigits = outputDigits; 3396 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3397 outputDigits -= curDigits; 3398 } 3399 3400 if (roundUp) { 3401 char *q = dst; 3402 3403 /* Note that hexDigitChars has a trailing '0'. */ 3404 do { 3405 q--; 3406 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3407 } while (*q == '0'); 3408 assert(q >= p); 3409 } else { 3410 /* Add trailing zeroes. */ 3411 memset (dst, '0', outputDigits); 3412 dst += outputDigits; 3413 } 3414 3415 /* Move the most significant digit to before the point, and if there 3416 is something after the decimal point add it. This must come 3417 after rounding above. */ 3418 p[-1] = p[0]; 3419 if (dst -1 == p) 3420 dst--; 3421 else 3422 p[0] = '.'; 3423 3424 /* Finally output the exponent. */ 3425 *dst++ = upperCase ? 'P': 'p'; 3426 3427 return writeSignedDecimal (dst, exponent); 3428 } 3429 3430 hash_code hash_value(const IEEEFloat &Arg) { 3431 if (!Arg.isFiniteNonZero()) 3432 return hash_combine((uint8_t)Arg.category, 3433 // NaN has no sign, fix it at zero. 3434 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3435 Arg.semantics->precision); 3436 3437 // Normal floats need their exponent and significand hashed. 3438 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3439 Arg.semantics->precision, Arg.exponent, 3440 hash_combine_range( 3441 Arg.significandParts(), 3442 Arg.significandParts() + Arg.partCount())); 3443 } 3444 3445 // Conversion from APFloat to/from host float/double. It may eventually be 3446 // possible to eliminate these and have everybody deal with APFloats, but that 3447 // will take a while. This approach will not easily extend to long double. 3448 // Current implementation requires integerPartWidth==64, which is correct at 3449 // the moment but could be made more general. 3450 3451 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3452 // the actual IEEE respresentations. We compensate for that here. 3453 3454 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3455 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3456 assert(partCount()==2); 3457 3458 uint64_t myexponent, mysignificand; 3459 3460 if (isFiniteNonZero()) { 3461 myexponent = exponent+16383; //bias 3462 mysignificand = significandParts()[0]; 3463 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3464 myexponent = 0; // denormal 3465 } else if (category==fcZero) { 3466 myexponent = 0; 3467 mysignificand = 0; 3468 } else if (category==fcInfinity) { 3469 myexponent = 0x7fff; 3470 mysignificand = 0x8000000000000000ULL; 3471 } else { 3472 assert(category == fcNaN && "Unknown category"); 3473 myexponent = 0x7fff; 3474 mysignificand = significandParts()[0]; 3475 } 3476 3477 uint64_t words[2]; 3478 words[0] = mysignificand; 3479 words[1] = ((uint64_t)(sign & 1) << 15) | 3480 (myexponent & 0x7fffLL); 3481 return APInt(80, words); 3482 } 3483 3484 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3485 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3486 assert(partCount()==2); 3487 3488 uint64_t words[2]; 3489 opStatus fs; 3490 bool losesInfo; 3491 3492 // Convert number to double. To avoid spurious underflows, we re- 3493 // normalize against the "double" minExponent first, and only *then* 3494 // truncate the mantissa. The result of that second conversion 3495 // may be inexact, but should never underflow. 3496 // Declare fltSemantics before APFloat that uses it (and 3497 // saves pointer to it) to ensure correct destruction order. 3498 fltSemantics extendedSemantics = *semantics; 3499 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3500 IEEEFloat extended(*this); 3501 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3502 assert(fs == opOK && !losesInfo); 3503 (void)fs; 3504 3505 IEEEFloat u(extended); 3506 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3507 assert(fs == opOK || fs == opInexact); 3508 (void)fs; 3509 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3510 3511 // If conversion was exact or resulted in a special case, we're done; 3512 // just set the second double to zero. Otherwise, re-convert back to 3513 // the extended format and compute the difference. This now should 3514 // convert exactly to double. 3515 if (u.isFiniteNonZero() && losesInfo) { 3516 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3517 assert(fs == opOK && !losesInfo); 3518 (void)fs; 3519 3520 IEEEFloat v(extended); 3521 v.subtract(u, rmNearestTiesToEven); 3522 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3523 assert(fs == opOK && !losesInfo); 3524 (void)fs; 3525 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3526 } else { 3527 words[1] = 0; 3528 } 3529 3530 return APInt(128, words); 3531 } 3532 3533 template <const fltSemantics &S> 3534 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3535 assert(semantics == &S); 3536 3537 constexpr int bias = -(S.minExponent - 1); 3538 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3539 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3540 constexpr integerPart integer_bit = 3541 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3542 constexpr uint64_t significand_mask = integer_bit - 1; 3543 constexpr unsigned int exponent_bits = 3544 S.sizeInBits - 1 - trailing_significand_bits; 3545 static_assert(exponent_bits < 64); 3546 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3547 3548 uint64_t myexponent; 3549 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3550 mysignificand; 3551 3552 if (isFiniteNonZero()) { 3553 myexponent = exponent + bias; 3554 std::copy_n(significandParts(), mysignificand.size(), 3555 mysignificand.begin()); 3556 if (myexponent == 1 && 3557 !(significandParts()[integer_bit_part] & integer_bit)) 3558 myexponent = 0; // denormal 3559 } else if (category == fcZero) { 3560 myexponent = ::exponentZero(S) + bias; 3561 mysignificand.fill(0); 3562 } else if (category == fcInfinity) { 3563 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3564 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3565 llvm_unreachable("semantics don't support inf!"); 3566 myexponent = ::exponentInf(S) + bias; 3567 mysignificand.fill(0); 3568 } else { 3569 assert(category == fcNaN && "Unknown category!"); 3570 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3571 llvm_unreachable("semantics don't support NaN!"); 3572 myexponent = ::exponentNaN(S) + bias; 3573 std::copy_n(significandParts(), mysignificand.size(), 3574 mysignificand.begin()); 3575 } 3576 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3577 auto words_iter = 3578 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3579 if constexpr (significand_mask != 0) { 3580 // Clear the integer bit. 3581 words[mysignificand.size() - 1] &= significand_mask; 3582 } 3583 std::fill(words_iter, words.end(), uint64_t{0}); 3584 constexpr size_t last_word = words.size() - 1; 3585 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3586 << ((S.sizeInBits - 1) % 64); 3587 words[last_word] |= shifted_sign; 3588 uint64_t shifted_exponent = (myexponent & exponent_mask) 3589 << (trailing_significand_bits % 64); 3590 words[last_word] |= shifted_exponent; 3591 if constexpr (last_word == 0) { 3592 return APInt(S.sizeInBits, words[0]); 3593 } 3594 return APInt(S.sizeInBits, words); 3595 } 3596 3597 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3598 assert(partCount() == 2); 3599 return convertIEEEFloatToAPInt<semIEEEquad>(); 3600 } 3601 3602 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3603 assert(partCount()==1); 3604 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3605 } 3606 3607 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3608 assert(partCount()==1); 3609 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3610 } 3611 3612 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3613 assert(partCount() == 1); 3614 return convertIEEEFloatToAPInt<semBFloat>(); 3615 } 3616 3617 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3618 assert(partCount()==1); 3619 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3620 } 3621 3622 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3623 assert(partCount() == 1); 3624 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3625 } 3626 3627 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3628 assert(partCount() == 1); 3629 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3630 } 3631 3632 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3633 assert(partCount() == 1); 3634 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3635 } 3636 3637 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3638 assert(partCount() == 1); 3639 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3640 } 3641 3642 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3643 assert(partCount() == 1); 3644 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3645 } 3646 3647 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3648 assert(partCount() == 1); 3649 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3650 } 3651 3652 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { 3653 assert(partCount() == 1); 3654 return convertIEEEFloatToAPInt<semFloat8E3M4>(); 3655 } 3656 3657 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3658 assert(partCount() == 1); 3659 return convertIEEEFloatToAPInt<semFloatTF32>(); 3660 } 3661 3662 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3663 assert(partCount() == 1); 3664 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3665 } 3666 3667 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3668 assert(partCount() == 1); 3669 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3670 } 3671 3672 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3673 assert(partCount() == 1); 3674 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3675 } 3676 3677 // This function creates an APInt that is just a bit map of the floating 3678 // point constant as it would appear in memory. It is not a conversion, 3679 // and treating the result as a normal integer is unlikely to be useful. 3680 3681 APInt IEEEFloat::bitcastToAPInt() const { 3682 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3683 return convertHalfAPFloatToAPInt(); 3684 3685 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3686 return convertBFloatAPFloatToAPInt(); 3687 3688 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3689 return convertFloatAPFloatToAPInt(); 3690 3691 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3692 return convertDoubleAPFloatToAPInt(); 3693 3694 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3695 return convertQuadrupleAPFloatToAPInt(); 3696 3697 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3698 return convertPPCDoubleDoubleAPFloatToAPInt(); 3699 3700 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3701 return convertFloat8E5M2APFloatToAPInt(); 3702 3703 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3704 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3705 3706 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3707 return convertFloat8E4M3APFloatToAPInt(); 3708 3709 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3710 return convertFloat8E4M3FNAPFloatToAPInt(); 3711 3712 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3713 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3714 3715 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3716 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3717 3718 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) 3719 return convertFloat8E3M4APFloatToAPInt(); 3720 3721 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3722 return convertFloatTF32APFloatToAPInt(); 3723 3724 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3725 return convertFloat6E3M2FNAPFloatToAPInt(); 3726 3727 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3728 return convertFloat6E2M3FNAPFloatToAPInt(); 3729 3730 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3731 return convertFloat4E2M1FNAPFloatToAPInt(); 3732 3733 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3734 "unknown format!"); 3735 return convertF80LongDoubleAPFloatToAPInt(); 3736 } 3737 3738 float IEEEFloat::convertToFloat() const { 3739 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3740 "Float semantics are not IEEEsingle"); 3741 APInt api = bitcastToAPInt(); 3742 return api.bitsToFloat(); 3743 } 3744 3745 double IEEEFloat::convertToDouble() const { 3746 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3747 "Float semantics are not IEEEdouble"); 3748 APInt api = bitcastToAPInt(); 3749 return api.bitsToDouble(); 3750 } 3751 3752 #ifdef HAS_IEE754_FLOAT128 3753 float128 IEEEFloat::convertToQuad() const { 3754 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3755 "Float semantics are not IEEEquads"); 3756 APInt api = bitcastToAPInt(); 3757 return api.bitsToQuad(); 3758 } 3759 #endif 3760 3761 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3762 /// does not support these bit patterns: 3763 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3764 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3765 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3766 /// exponent = 0, integer bit 1 ("pseudodenormal") 3767 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3768 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3769 uint64_t i1 = api.getRawData()[0]; 3770 uint64_t i2 = api.getRawData()[1]; 3771 uint64_t myexponent = (i2 & 0x7fff); 3772 uint64_t mysignificand = i1; 3773 uint8_t myintegerbit = mysignificand >> 63; 3774 3775 initialize(&semX87DoubleExtended); 3776 assert(partCount()==2); 3777 3778 sign = static_cast<unsigned int>(i2>>15); 3779 if (myexponent == 0 && mysignificand == 0) { 3780 makeZero(sign); 3781 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3782 makeInf(sign); 3783 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3784 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3785 category = fcNaN; 3786 exponent = exponentNaN(); 3787 significandParts()[0] = mysignificand; 3788 significandParts()[1] = 0; 3789 } else { 3790 category = fcNormal; 3791 exponent = myexponent - 16383; 3792 significandParts()[0] = mysignificand; 3793 significandParts()[1] = 0; 3794 if (myexponent==0) // denormal 3795 exponent = -16382; 3796 } 3797 } 3798 3799 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3800 uint64_t i1 = api.getRawData()[0]; 3801 uint64_t i2 = api.getRawData()[1]; 3802 opStatus fs; 3803 bool losesInfo; 3804 3805 // Get the first double and convert to our format. 3806 initFromDoubleAPInt(APInt(64, i1)); 3807 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3808 assert(fs == opOK && !losesInfo); 3809 (void)fs; 3810 3811 // Unless we have a special case, add in second double. 3812 if (isFiniteNonZero()) { 3813 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3814 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3815 assert(fs == opOK && !losesInfo); 3816 (void)fs; 3817 3818 add(v, rmNearestTiesToEven); 3819 } 3820 } 3821 3822 template <const fltSemantics &S> 3823 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3824 assert(api.getBitWidth() == S.sizeInBits); 3825 constexpr integerPart integer_bit = integerPart{1} 3826 << ((S.precision - 1) % integerPartWidth); 3827 constexpr uint64_t significand_mask = integer_bit - 1; 3828 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3829 constexpr unsigned int stored_significand_parts = 3830 partCountForBits(trailing_significand_bits); 3831 constexpr unsigned int exponent_bits = 3832 S.sizeInBits - 1 - trailing_significand_bits; 3833 static_assert(exponent_bits < 64); 3834 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3835 constexpr int bias = -(S.minExponent - 1); 3836 3837 // Copy the bits of the significand. We need to clear out the exponent and 3838 // sign bit in the last word. 3839 std::array<integerPart, stored_significand_parts> mysignificand; 3840 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3841 if constexpr (significand_mask != 0) { 3842 mysignificand[mysignificand.size() - 1] &= significand_mask; 3843 } 3844 3845 // We assume the last word holds the sign bit, the exponent, and potentially 3846 // some of the trailing significand field. 3847 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3848 uint64_t myexponent = 3849 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3850 3851 initialize(&S); 3852 assert(partCount() == mysignificand.size()); 3853 3854 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3855 3856 bool all_zero_significand = 3857 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3858 3859 bool is_zero = myexponent == 0 && all_zero_significand; 3860 3861 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3862 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3863 makeInf(sign); 3864 return; 3865 } 3866 } 3867 3868 bool is_nan = false; 3869 3870 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 3871 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 3872 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 3873 bool all_ones_significand = 3874 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 3875 [](integerPart bits) { return bits == ~integerPart{0}; }) && 3876 (!significand_mask || 3877 mysignificand[mysignificand.size() - 1] == significand_mask); 3878 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 3879 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 3880 is_nan = is_zero && sign; 3881 } 3882 3883 if (is_nan) { 3884 category = fcNaN; 3885 exponent = ::exponentNaN(S); 3886 std::copy_n(mysignificand.begin(), mysignificand.size(), 3887 significandParts()); 3888 return; 3889 } 3890 3891 if (is_zero) { 3892 makeZero(sign); 3893 return; 3894 } 3895 3896 category = fcNormal; 3897 exponent = myexponent - bias; 3898 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 3899 if (myexponent == 0) // denormal 3900 exponent = S.minExponent; 3901 else 3902 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 3903 } 3904 3905 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 3906 initFromIEEEAPInt<semIEEEquad>(api); 3907 } 3908 3909 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 3910 initFromIEEEAPInt<semIEEEdouble>(api); 3911 } 3912 3913 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 3914 initFromIEEEAPInt<semIEEEsingle>(api); 3915 } 3916 3917 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 3918 initFromIEEEAPInt<semBFloat>(api); 3919 } 3920 3921 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 3922 initFromIEEEAPInt<semIEEEhalf>(api); 3923 } 3924 3925 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 3926 initFromIEEEAPInt<semFloat8E5M2>(api); 3927 } 3928 3929 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 3930 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 3931 } 3932 3933 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 3934 initFromIEEEAPInt<semFloat8E4M3>(api); 3935 } 3936 3937 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 3938 initFromIEEEAPInt<semFloat8E4M3FN>(api); 3939 } 3940 3941 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 3942 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 3943 } 3944 3945 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 3946 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 3947 } 3948 3949 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { 3950 initFromIEEEAPInt<semFloat8E3M4>(api); 3951 } 3952 3953 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 3954 initFromIEEEAPInt<semFloatTF32>(api); 3955 } 3956 3957 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 3958 initFromIEEEAPInt<semFloat6E3M2FN>(api); 3959 } 3960 3961 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 3962 initFromIEEEAPInt<semFloat6E2M3FN>(api); 3963 } 3964 3965 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 3966 initFromIEEEAPInt<semFloat4E2M1FN>(api); 3967 } 3968 3969 /// Treat api as containing the bits of a floating point number. 3970 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 3971 assert(api.getBitWidth() == Sem->sizeInBits); 3972 if (Sem == &semIEEEhalf) 3973 return initFromHalfAPInt(api); 3974 if (Sem == &semBFloat) 3975 return initFromBFloatAPInt(api); 3976 if (Sem == &semIEEEsingle) 3977 return initFromFloatAPInt(api); 3978 if (Sem == &semIEEEdouble) 3979 return initFromDoubleAPInt(api); 3980 if (Sem == &semX87DoubleExtended) 3981 return initFromF80LongDoubleAPInt(api); 3982 if (Sem == &semIEEEquad) 3983 return initFromQuadrupleAPInt(api); 3984 if (Sem == &semPPCDoubleDoubleLegacy) 3985 return initFromPPCDoubleDoubleAPInt(api); 3986 if (Sem == &semFloat8E5M2) 3987 return initFromFloat8E5M2APInt(api); 3988 if (Sem == &semFloat8E5M2FNUZ) 3989 return initFromFloat8E5M2FNUZAPInt(api); 3990 if (Sem == &semFloat8E4M3) 3991 return initFromFloat8E4M3APInt(api); 3992 if (Sem == &semFloat8E4M3FN) 3993 return initFromFloat8E4M3FNAPInt(api); 3994 if (Sem == &semFloat8E4M3FNUZ) 3995 return initFromFloat8E4M3FNUZAPInt(api); 3996 if (Sem == &semFloat8E4M3B11FNUZ) 3997 return initFromFloat8E4M3B11FNUZAPInt(api); 3998 if (Sem == &semFloat8E3M4) 3999 return initFromFloat8E3M4APInt(api); 4000 if (Sem == &semFloatTF32) 4001 return initFromFloatTF32APInt(api); 4002 if (Sem == &semFloat6E3M2FN) 4003 return initFromFloat6E3M2FNAPInt(api); 4004 if (Sem == &semFloat6E2M3FN) 4005 return initFromFloat6E2M3FNAPInt(api); 4006 if (Sem == &semFloat4E2M1FN) 4007 return initFromFloat4E2M1FNAPInt(api); 4008 4009 llvm_unreachable(nullptr); 4010 } 4011 4012 /// Make this number the largest magnitude normal number in the given 4013 /// semantics. 4014 void IEEEFloat::makeLargest(bool Negative) { 4015 // We want (in interchange format): 4016 // sign = {Negative} 4017 // exponent = 1..10 4018 // significand = 1..1 4019 category = fcNormal; 4020 sign = Negative; 4021 exponent = semantics->maxExponent; 4022 4023 // Use memset to set all but the highest integerPart to all ones. 4024 integerPart *significand = significandParts(); 4025 unsigned PartCount = partCount(); 4026 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4027 4028 // Set the high integerPart especially setting all unused top bits for 4029 // internal consistency. 4030 const unsigned NumUnusedHighBits = 4031 PartCount*integerPartWidth - semantics->precision; 4032 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4033 ? (~integerPart(0) >> NumUnusedHighBits) 4034 : 0; 4035 4036 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4037 semantics->nanEncoding == fltNanEncoding::AllOnes) 4038 significand[0] &= ~integerPart(1); 4039 } 4040 4041 /// Make this number the smallest magnitude denormal number in the given 4042 /// semantics. 4043 void IEEEFloat::makeSmallest(bool Negative) { 4044 // We want (in interchange format): 4045 // sign = {Negative} 4046 // exponent = 0..0 4047 // significand = 0..01 4048 category = fcNormal; 4049 sign = Negative; 4050 exponent = semantics->minExponent; 4051 APInt::tcSet(significandParts(), 1, partCount()); 4052 } 4053 4054 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4055 // We want (in interchange format): 4056 // sign = {Negative} 4057 // exponent = 0..0 4058 // significand = 10..0 4059 4060 category = fcNormal; 4061 zeroSignificand(); 4062 sign = Negative; 4063 exponent = semantics->minExponent; 4064 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4065 } 4066 4067 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4068 initFromAPInt(&Sem, API); 4069 } 4070 4071 IEEEFloat::IEEEFloat(float f) { 4072 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4073 } 4074 4075 IEEEFloat::IEEEFloat(double d) { 4076 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4077 } 4078 4079 namespace { 4080 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4081 Buffer.append(Str.begin(), Str.end()); 4082 } 4083 4084 /// Removes data from the given significand until it is no more 4085 /// precise than is required for the desired precision. 4086 void AdjustToPrecision(APInt &significand, 4087 int &exp, unsigned FormatPrecision) { 4088 unsigned bits = significand.getActiveBits(); 4089 4090 // 196/59 is a very slight overestimate of lg_2(10). 4091 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4092 4093 if (bits <= bitsRequired) return; 4094 4095 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4096 if (!tensRemovable) return; 4097 4098 exp += tensRemovable; 4099 4100 APInt divisor(significand.getBitWidth(), 1); 4101 APInt powten(significand.getBitWidth(), 10); 4102 while (true) { 4103 if (tensRemovable & 1) 4104 divisor *= powten; 4105 tensRemovable >>= 1; 4106 if (!tensRemovable) break; 4107 powten *= powten; 4108 } 4109 4110 significand = significand.udiv(divisor); 4111 4112 // Truncate the significand down to its active bit count. 4113 significand = significand.trunc(significand.getActiveBits()); 4114 } 4115 4116 4117 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4118 int &exp, unsigned FormatPrecision) { 4119 unsigned N = buffer.size(); 4120 if (N <= FormatPrecision) return; 4121 4122 // The most significant figures are the last ones in the buffer. 4123 unsigned FirstSignificant = N - FormatPrecision; 4124 4125 // Round. 4126 // FIXME: this probably shouldn't use 'round half up'. 4127 4128 // Rounding down is just a truncation, except we also want to drop 4129 // trailing zeros from the new result. 4130 if (buffer[FirstSignificant - 1] < '5') { 4131 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4132 FirstSignificant++; 4133 4134 exp += FirstSignificant; 4135 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4136 return; 4137 } 4138 4139 // Rounding up requires a decimal add-with-carry. If we continue 4140 // the carry, the newly-introduced zeros will just be truncated. 4141 for (unsigned I = FirstSignificant; I != N; ++I) { 4142 if (buffer[I] == '9') { 4143 FirstSignificant++; 4144 } else { 4145 buffer[I]++; 4146 break; 4147 } 4148 } 4149 4150 // If we carried through, we have exactly one digit of precision. 4151 if (FirstSignificant == N) { 4152 exp += FirstSignificant; 4153 buffer.clear(); 4154 buffer.push_back('1'); 4155 return; 4156 } 4157 4158 exp += FirstSignificant; 4159 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4160 } 4161 4162 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4163 APInt significand, unsigned FormatPrecision, 4164 unsigned FormatMaxPadding, bool TruncateZero) { 4165 const int semanticsPrecision = significand.getBitWidth(); 4166 4167 if (isNeg) 4168 Str.push_back('-'); 4169 4170 // Set FormatPrecision if zero. We want to do this before we 4171 // truncate trailing zeros, as those are part of the precision. 4172 if (!FormatPrecision) { 4173 // We use enough digits so the number can be round-tripped back to an 4174 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4175 // Accurately" by Steele and White. 4176 // FIXME: Using a formula based purely on the precision is conservative; 4177 // we can print fewer digits depending on the actual value being printed. 4178 4179 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4180 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4181 } 4182 4183 // Ignore trailing binary zeros. 4184 int trailingZeros = significand.countr_zero(); 4185 exp += trailingZeros; 4186 significand.lshrInPlace(trailingZeros); 4187 4188 // Change the exponent from 2^e to 10^e. 4189 if (exp == 0) { 4190 // Nothing to do. 4191 } else if (exp > 0) { 4192 // Just shift left. 4193 significand = significand.zext(semanticsPrecision + exp); 4194 significand <<= exp; 4195 exp = 0; 4196 } else { /* exp < 0 */ 4197 int texp = -exp; 4198 4199 // We transform this using the identity: 4200 // (N)(2^-e) == (N)(5^e)(10^-e) 4201 // This means we have to multiply N (the significand) by 5^e. 4202 // To avoid overflow, we have to operate on numbers large 4203 // enough to store N * 5^e: 4204 // log2(N * 5^e) == log2(N) + e * log2(5) 4205 // <= semantics->precision + e * 137 / 59 4206 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4207 4208 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4209 4210 // Multiply significand by 5^e. 4211 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4212 significand = significand.zext(precision); 4213 APInt five_to_the_i(precision, 5); 4214 while (true) { 4215 if (texp & 1) 4216 significand *= five_to_the_i; 4217 4218 texp >>= 1; 4219 if (!texp) 4220 break; 4221 five_to_the_i *= five_to_the_i; 4222 } 4223 } 4224 4225 AdjustToPrecision(significand, exp, FormatPrecision); 4226 4227 SmallVector<char, 256> buffer; 4228 4229 // Fill the buffer. 4230 unsigned precision = significand.getBitWidth(); 4231 if (precision < 4) { 4232 // We need enough precision to store the value 10. 4233 precision = 4; 4234 significand = significand.zext(precision); 4235 } 4236 APInt ten(precision, 10); 4237 APInt digit(precision, 0); 4238 4239 bool inTrail = true; 4240 while (significand != 0) { 4241 // digit <- significand % 10 4242 // significand <- significand / 10 4243 APInt::udivrem(significand, ten, significand, digit); 4244 4245 unsigned d = digit.getZExtValue(); 4246 4247 // Drop trailing zeros. 4248 if (inTrail && !d) 4249 exp++; 4250 else { 4251 buffer.push_back((char) ('0' + d)); 4252 inTrail = false; 4253 } 4254 } 4255 4256 assert(!buffer.empty() && "no characters in buffer!"); 4257 4258 // Drop down to FormatPrecision. 4259 // TODO: don't do more precise calculations above than are required. 4260 AdjustToPrecision(buffer, exp, FormatPrecision); 4261 4262 unsigned NDigits = buffer.size(); 4263 4264 // Check whether we should use scientific notation. 4265 bool FormatScientific; 4266 if (!FormatMaxPadding) 4267 FormatScientific = true; 4268 else { 4269 if (exp >= 0) { 4270 // 765e3 --> 765000 4271 // ^^^ 4272 // But we shouldn't make the number look more precise than it is. 4273 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4274 NDigits + (unsigned) exp > FormatPrecision); 4275 } else { 4276 // Power of the most significant digit. 4277 int MSD = exp + (int) (NDigits - 1); 4278 if (MSD >= 0) { 4279 // 765e-2 == 7.65 4280 FormatScientific = false; 4281 } else { 4282 // 765e-5 == 0.00765 4283 // ^ ^^ 4284 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4285 } 4286 } 4287 } 4288 4289 // Scientific formatting is pretty straightforward. 4290 if (FormatScientific) { 4291 exp += (NDigits - 1); 4292 4293 Str.push_back(buffer[NDigits-1]); 4294 Str.push_back('.'); 4295 if (NDigits == 1 && TruncateZero) 4296 Str.push_back('0'); 4297 else 4298 for (unsigned I = 1; I != NDigits; ++I) 4299 Str.push_back(buffer[NDigits-1-I]); 4300 // Fill with zeros up to FormatPrecision. 4301 if (!TruncateZero && FormatPrecision > NDigits - 1) 4302 Str.append(FormatPrecision - NDigits + 1, '0'); 4303 // For !TruncateZero we use lower 'e'. 4304 Str.push_back(TruncateZero ? 'E' : 'e'); 4305 4306 Str.push_back(exp >= 0 ? '+' : '-'); 4307 if (exp < 0) 4308 exp = -exp; 4309 SmallVector<char, 6> expbuf; 4310 do { 4311 expbuf.push_back((char) ('0' + (exp % 10))); 4312 exp /= 10; 4313 } while (exp); 4314 // Exponent always at least two digits if we do not truncate zeros. 4315 if (!TruncateZero && expbuf.size() < 2) 4316 expbuf.push_back('0'); 4317 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4318 Str.push_back(expbuf[E-1-I]); 4319 return; 4320 } 4321 4322 // Non-scientific, positive exponents. 4323 if (exp >= 0) { 4324 for (unsigned I = 0; I != NDigits; ++I) 4325 Str.push_back(buffer[NDigits-1-I]); 4326 for (unsigned I = 0; I != (unsigned) exp; ++I) 4327 Str.push_back('0'); 4328 return; 4329 } 4330 4331 // Non-scientific, negative exponents. 4332 4333 // The number of digits to the left of the decimal point. 4334 int NWholeDigits = exp + (int) NDigits; 4335 4336 unsigned I = 0; 4337 if (NWholeDigits > 0) { 4338 for (; I != (unsigned) NWholeDigits; ++I) 4339 Str.push_back(buffer[NDigits-I-1]); 4340 Str.push_back('.'); 4341 } else { 4342 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4343 4344 Str.push_back('0'); 4345 Str.push_back('.'); 4346 for (unsigned Z = 1; Z != NZeros; ++Z) 4347 Str.push_back('0'); 4348 } 4349 4350 for (; I != NDigits; ++I) 4351 Str.push_back(buffer[NDigits-I-1]); 4352 4353 } 4354 } // namespace 4355 4356 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4357 unsigned FormatMaxPadding, bool TruncateZero) const { 4358 switch (category) { 4359 case fcInfinity: 4360 if (isNegative()) 4361 return append(Str, "-Inf"); 4362 else 4363 return append(Str, "+Inf"); 4364 4365 case fcNaN: return append(Str, "NaN"); 4366 4367 case fcZero: 4368 if (isNegative()) 4369 Str.push_back('-'); 4370 4371 if (!FormatMaxPadding) { 4372 if (TruncateZero) 4373 append(Str, "0.0E+0"); 4374 else { 4375 append(Str, "0.0"); 4376 if (FormatPrecision > 1) 4377 Str.append(FormatPrecision - 1, '0'); 4378 append(Str, "e+00"); 4379 } 4380 } else 4381 Str.push_back('0'); 4382 return; 4383 4384 case fcNormal: 4385 break; 4386 } 4387 4388 // Decompose the number into an APInt and an exponent. 4389 int exp = exponent - ((int) semantics->precision - 1); 4390 APInt significand( 4391 semantics->precision, 4392 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4393 4394 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4395 FormatMaxPadding, TruncateZero); 4396 4397 } 4398 4399 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4400 // Special floats and denormals have no exact inverse. 4401 if (!isFiniteNonZero()) 4402 return false; 4403 4404 // Check that the number is a power of two by making sure that only the 4405 // integer bit is set in the significand. 4406 if (significandLSB() != semantics->precision - 1) 4407 return false; 4408 4409 // Get the inverse. 4410 IEEEFloat reciprocal(*semantics, 1ULL); 4411 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4412 return false; 4413 4414 // Avoid multiplication with a denormal, it is not safe on all platforms and 4415 // may be slower than a normal division. 4416 if (reciprocal.isDenormal()) 4417 return false; 4418 4419 assert(reciprocal.isFiniteNonZero() && 4420 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4421 4422 if (inv) 4423 *inv = APFloat(reciprocal, *semantics); 4424 4425 return true; 4426 } 4427 4428 int IEEEFloat::getExactLog2Abs() const { 4429 if (!isFinite() || isZero()) 4430 return INT_MIN; 4431 4432 const integerPart *Parts = significandParts(); 4433 const int PartCount = partCountForBits(semantics->precision); 4434 4435 int PopCount = 0; 4436 for (int i = 0; i < PartCount; ++i) { 4437 PopCount += llvm::popcount(Parts[i]); 4438 if (PopCount > 1) 4439 return INT_MIN; 4440 } 4441 4442 if (exponent != semantics->minExponent) 4443 return exponent; 4444 4445 int CountrParts = 0; 4446 for (int i = 0; i < PartCount; 4447 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4448 if (Parts[i] != 0) { 4449 return exponent - semantics->precision + CountrParts + 4450 llvm::countr_zero(Parts[i]) + 1; 4451 } 4452 } 4453 4454 llvm_unreachable("didn't find the set bit"); 4455 } 4456 4457 bool IEEEFloat::isSignaling() const { 4458 if (!isNaN()) 4459 return false; 4460 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4461 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4462 return false; 4463 4464 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4465 // first bit of the trailing significand being 0. 4466 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4467 } 4468 4469 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4470 /// 4471 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4472 /// appropriate sign switching before/after the computation. 4473 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { 4474 // If we are performing nextDown, swap sign so we have -x. 4475 if (nextDown) 4476 changeSign(); 4477 4478 // Compute nextUp(x) 4479 opStatus result = opOK; 4480 4481 // Handle each float category separately. 4482 switch (category) { 4483 case fcInfinity: 4484 // nextUp(+inf) = +inf 4485 if (!isNegative()) 4486 break; 4487 // nextUp(-inf) = -getLargest() 4488 makeLargest(true); 4489 break; 4490 case fcNaN: 4491 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4492 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4493 // change the payload. 4494 if (isSignaling()) { 4495 result = opInvalidOp; 4496 // For consistency, propagate the sign of the sNaN to the qNaN. 4497 makeNaN(false, isNegative(), nullptr); 4498 } 4499 break; 4500 case fcZero: 4501 // nextUp(pm 0) = +getSmallest() 4502 makeSmallest(false); 4503 break; 4504 case fcNormal: 4505 // nextUp(-getSmallest()) = -0 4506 if (isSmallest() && isNegative()) { 4507 APInt::tcSet(significandParts(), 0, partCount()); 4508 category = fcZero; 4509 exponent = 0; 4510 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4511 sign = false; 4512 break; 4513 } 4514 4515 if (isLargest() && !isNegative()) { 4516 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4517 // nextUp(getLargest()) == NAN 4518 makeNaN(); 4519 break; 4520 } else if (semantics->nonFiniteBehavior == 4521 fltNonfiniteBehavior::FiniteOnly) { 4522 // nextUp(getLargest()) == getLargest() 4523 break; 4524 } else { 4525 // nextUp(getLargest()) == INFINITY 4526 APInt::tcSet(significandParts(), 0, partCount()); 4527 category = fcInfinity; 4528 exponent = semantics->maxExponent + 1; 4529 break; 4530 } 4531 } 4532 4533 // nextUp(normal) == normal + inc. 4534 if (isNegative()) { 4535 // If we are negative, we need to decrement the significand. 4536 4537 // We only cross a binade boundary that requires adjusting the exponent 4538 // if: 4539 // 1. exponent != semantics->minExponent. This implies we are not in the 4540 // smallest binade or are dealing with denormals. 4541 // 2. Our significand excluding the integral bit is all zeros. 4542 bool WillCrossBinadeBoundary = 4543 exponent != semantics->minExponent && isSignificandAllZeros(); 4544 4545 // Decrement the significand. 4546 // 4547 // We always do this since: 4548 // 1. If we are dealing with a non-binade decrement, by definition we 4549 // just decrement the significand. 4550 // 2. If we are dealing with a normal -> normal binade decrement, since 4551 // we have an explicit integral bit the fact that all bits but the 4552 // integral bit are zero implies that subtracting one will yield a 4553 // significand with 0 integral bit and 1 in all other spots. Thus we 4554 // must just adjust the exponent and set the integral bit to 1. 4555 // 3. If we are dealing with a normal -> denormal binade decrement, 4556 // since we set the integral bit to 0 when we represent denormals, we 4557 // just decrement the significand. 4558 integerPart *Parts = significandParts(); 4559 APInt::tcDecrement(Parts, partCount()); 4560 4561 if (WillCrossBinadeBoundary) { 4562 // Our result is a normal number. Do the following: 4563 // 1. Set the integral bit to 1. 4564 // 2. Decrement the exponent. 4565 APInt::tcSetBit(Parts, semantics->precision - 1); 4566 exponent--; 4567 } 4568 } else { 4569 // If we are positive, we need to increment the significand. 4570 4571 // We only cross a binade boundary that requires adjusting the exponent if 4572 // the input is not a denormal and all of said input's significand bits 4573 // are set. If all of said conditions are true: clear the significand, set 4574 // the integral bit to 1, and increment the exponent. If we have a 4575 // denormal always increment since moving denormals and the numbers in the 4576 // smallest normal binade have the same exponent in our representation. 4577 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 4578 4579 if (WillCrossBinadeBoundary) { 4580 integerPart *Parts = significandParts(); 4581 APInt::tcSet(Parts, 0, partCount()); 4582 APInt::tcSetBit(Parts, semantics->precision - 1); 4583 assert(exponent != semantics->maxExponent && 4584 "We can not increment an exponent beyond the maxExponent allowed" 4585 " by the given floating point semantics."); 4586 exponent++; 4587 } else { 4588 incrementSignificand(); 4589 } 4590 } 4591 break; 4592 } 4593 4594 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4595 if (nextDown) 4596 changeSign(); 4597 4598 return result; 4599 } 4600 4601 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4602 return ::exponentNaN(*semantics); 4603 } 4604 4605 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4606 return ::exponentInf(*semantics); 4607 } 4608 4609 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4610 return ::exponentZero(*semantics); 4611 } 4612 4613 void IEEEFloat::makeInf(bool Negative) { 4614 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4615 llvm_unreachable("This floating point format does not support Inf"); 4616 4617 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4618 // There is no Inf, so make NaN instead. 4619 makeNaN(false, Negative); 4620 return; 4621 } 4622 category = fcInfinity; 4623 sign = Negative; 4624 exponent = exponentInf(); 4625 APInt::tcSet(significandParts(), 0, partCount()); 4626 } 4627 4628 void IEEEFloat::makeZero(bool Negative) { 4629 category = fcZero; 4630 sign = Negative; 4631 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4632 // Merge negative zero to positive because 0b10000...000 is used for NaN 4633 sign = false; 4634 } 4635 exponent = exponentZero(); 4636 APInt::tcSet(significandParts(), 0, partCount()); 4637 } 4638 4639 void IEEEFloat::makeQuiet() { 4640 assert(isNaN()); 4641 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4642 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4643 } 4644 4645 int ilogb(const IEEEFloat &Arg) { 4646 if (Arg.isNaN()) 4647 return IEEEFloat::IEK_NaN; 4648 if (Arg.isZero()) 4649 return IEEEFloat::IEK_Zero; 4650 if (Arg.isInfinity()) 4651 return IEEEFloat::IEK_Inf; 4652 if (!Arg.isDenormal()) 4653 return Arg.exponent; 4654 4655 IEEEFloat Normalized(Arg); 4656 int SignificandBits = Arg.getSemantics().precision - 1; 4657 4658 Normalized.exponent += SignificandBits; 4659 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero); 4660 return Normalized.exponent - SignificandBits; 4661 } 4662 4663 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { 4664 auto MaxExp = X.getSemantics().maxExponent; 4665 auto MinExp = X.getSemantics().minExponent; 4666 4667 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4668 // overflow; clamp it to a safe range before adding, but ensure that the range 4669 // is large enough that the clamp does not change the result. The range we 4670 // need to support is the difference between the largest possible exponent and 4671 // the normalized exponent of half the smallest denormal. 4672 4673 int SignificandBits = X.getSemantics().precision - 1; 4674 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4675 4676 // Clamp to one past the range ends to let normalize handle overlflow. 4677 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4678 X.normalize(RoundingMode, lfExactlyZero); 4679 if (X.isNaN()) 4680 X.makeQuiet(); 4681 return X; 4682 } 4683 4684 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) { 4685 Exp = ilogb(Val); 4686 4687 // Quiet signalling nans. 4688 if (Exp == IEEEFloat::IEK_NaN) { 4689 IEEEFloat Quiet(Val); 4690 Quiet.makeQuiet(); 4691 return Quiet; 4692 } 4693 4694 if (Exp == IEEEFloat::IEK_Inf) 4695 return Val; 4696 4697 // 1 is added because frexp is defined to return a normalized fraction in 4698 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4699 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1; 4700 return scalbn(Val, -Exp, RM); 4701 } 4702 4703 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4704 : Semantics(&S), 4705 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4706 assert(Semantics == &semPPCDoubleDouble); 4707 } 4708 4709 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4710 : Semantics(&S), 4711 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4712 APFloat(semIEEEdouble, uninitialized)}) { 4713 assert(Semantics == &semPPCDoubleDouble); 4714 } 4715 4716 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4717 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4718 APFloat(semIEEEdouble)}) { 4719 assert(Semantics == &semPPCDoubleDouble); 4720 } 4721 4722 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4723 : Semantics(&S), 4724 Floats(new APFloat[2]{ 4725 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4726 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4727 assert(Semantics == &semPPCDoubleDouble); 4728 } 4729 4730 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4731 APFloat &&Second) 4732 : Semantics(&S), 4733 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4734 assert(Semantics == &semPPCDoubleDouble); 4735 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4736 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4737 } 4738 4739 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4740 : Semantics(RHS.Semantics), 4741 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4742 APFloat(RHS.Floats[1])} 4743 : nullptr) { 4744 assert(Semantics == &semPPCDoubleDouble); 4745 } 4746 4747 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4748 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4749 RHS.Semantics = &semBogus; 4750 assert(Semantics == &semPPCDoubleDouble); 4751 } 4752 4753 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4754 if (Semantics == RHS.Semantics && RHS.Floats) { 4755 Floats[0] = RHS.Floats[0]; 4756 Floats[1] = RHS.Floats[1]; 4757 } else if (this != &RHS) { 4758 this->~DoubleAPFloat(); 4759 new (this) DoubleAPFloat(RHS); 4760 } 4761 return *this; 4762 } 4763 4764 // Implement addition, subtraction, multiplication and division based on: 4765 // "Software for Doubled-Precision Floating-Point Computations", 4766 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4767 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4768 const APFloat &c, const APFloat &cc, 4769 roundingMode RM) { 4770 int Status = opOK; 4771 APFloat z = a; 4772 Status |= z.add(c, RM); 4773 if (!z.isFinite()) { 4774 if (!z.isInfinity()) { 4775 Floats[0] = std::move(z); 4776 Floats[1].makeZero(/* Neg = */ false); 4777 return (opStatus)Status; 4778 } 4779 Status = opOK; 4780 auto AComparedToC = a.compareAbsoluteValue(c); 4781 z = cc; 4782 Status |= z.add(aa, RM); 4783 if (AComparedToC == APFloat::cmpGreaterThan) { 4784 // z = cc + aa + c + a; 4785 Status |= z.add(c, RM); 4786 Status |= z.add(a, RM); 4787 } else { 4788 // z = cc + aa + a + c; 4789 Status |= z.add(a, RM); 4790 Status |= z.add(c, RM); 4791 } 4792 if (!z.isFinite()) { 4793 Floats[0] = std::move(z); 4794 Floats[1].makeZero(/* Neg = */ false); 4795 return (opStatus)Status; 4796 } 4797 Floats[0] = z; 4798 APFloat zz = aa; 4799 Status |= zz.add(cc, RM); 4800 if (AComparedToC == APFloat::cmpGreaterThan) { 4801 // Floats[1] = a - z + c + zz; 4802 Floats[1] = a; 4803 Status |= Floats[1].subtract(z, RM); 4804 Status |= Floats[1].add(c, RM); 4805 Status |= Floats[1].add(zz, RM); 4806 } else { 4807 // Floats[1] = c - z + a + zz; 4808 Floats[1] = c; 4809 Status |= Floats[1].subtract(z, RM); 4810 Status |= Floats[1].add(a, RM); 4811 Status |= Floats[1].add(zz, RM); 4812 } 4813 } else { 4814 // q = a - z; 4815 APFloat q = a; 4816 Status |= q.subtract(z, RM); 4817 4818 // zz = q + c + (a - (q + z)) + aa + cc; 4819 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4820 auto zz = q; 4821 Status |= zz.add(c, RM); 4822 Status |= q.add(z, RM); 4823 Status |= q.subtract(a, RM); 4824 q.changeSign(); 4825 Status |= zz.add(q, RM); 4826 Status |= zz.add(aa, RM); 4827 Status |= zz.add(cc, RM); 4828 if (zz.isZero() && !zz.isNegative()) { 4829 Floats[0] = std::move(z); 4830 Floats[1].makeZero(/* Neg = */ false); 4831 return opOK; 4832 } 4833 Floats[0] = z; 4834 Status |= Floats[0].add(zz, RM); 4835 if (!Floats[0].isFinite()) { 4836 Floats[1].makeZero(/* Neg = */ false); 4837 return (opStatus)Status; 4838 } 4839 Floats[1] = std::move(z); 4840 Status |= Floats[1].subtract(Floats[0], RM); 4841 Status |= Floats[1].add(zz, RM); 4842 } 4843 return (opStatus)Status; 4844 } 4845 4846 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4847 const DoubleAPFloat &RHS, 4848 DoubleAPFloat &Out, 4849 roundingMode RM) { 4850 if (LHS.getCategory() == fcNaN) { 4851 Out = LHS; 4852 return opOK; 4853 } 4854 if (RHS.getCategory() == fcNaN) { 4855 Out = RHS; 4856 return opOK; 4857 } 4858 if (LHS.getCategory() == fcZero) { 4859 Out = RHS; 4860 return opOK; 4861 } 4862 if (RHS.getCategory() == fcZero) { 4863 Out = LHS; 4864 return opOK; 4865 } 4866 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 4867 LHS.isNegative() != RHS.isNegative()) { 4868 Out.makeNaN(false, Out.isNegative(), nullptr); 4869 return opInvalidOp; 4870 } 4871 if (LHS.getCategory() == fcInfinity) { 4872 Out = LHS; 4873 return opOK; 4874 } 4875 if (RHS.getCategory() == fcInfinity) { 4876 Out = RHS; 4877 return opOK; 4878 } 4879 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 4880 4881 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 4882 CC(RHS.Floats[1]); 4883 assert(&A.getSemantics() == &semIEEEdouble); 4884 assert(&AA.getSemantics() == &semIEEEdouble); 4885 assert(&C.getSemantics() == &semIEEEdouble); 4886 assert(&CC.getSemantics() == &semIEEEdouble); 4887 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 4888 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 4889 return Out.addImpl(A, AA, C, CC, RM); 4890 } 4891 4892 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 4893 roundingMode RM) { 4894 return addWithSpecial(*this, RHS, *this, RM); 4895 } 4896 4897 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 4898 roundingMode RM) { 4899 changeSign(); 4900 auto Ret = add(RHS, RM); 4901 changeSign(); 4902 return Ret; 4903 } 4904 4905 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 4906 APFloat::roundingMode RM) { 4907 const auto &LHS = *this; 4908 auto &Out = *this; 4909 /* Interesting observation: For special categories, finding the lowest 4910 common ancestor of the following layered graph gives the correct 4911 return category: 4912 4913 NaN 4914 / \ 4915 Zero Inf 4916 \ / 4917 Normal 4918 4919 e.g. NaN * NaN = NaN 4920 Zero * Inf = NaN 4921 Normal * Zero = Zero 4922 Normal * Inf = Inf 4923 */ 4924 if (LHS.getCategory() == fcNaN) { 4925 Out = LHS; 4926 return opOK; 4927 } 4928 if (RHS.getCategory() == fcNaN) { 4929 Out = RHS; 4930 return opOK; 4931 } 4932 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 4933 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 4934 Out.makeNaN(false, false, nullptr); 4935 return opOK; 4936 } 4937 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 4938 Out = LHS; 4939 return opOK; 4940 } 4941 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 4942 Out = RHS; 4943 return opOK; 4944 } 4945 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 4946 "Special cases not handled exhaustively"); 4947 4948 int Status = opOK; 4949 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 4950 // t = a * c 4951 APFloat T = A; 4952 Status |= T.multiply(C, RM); 4953 if (!T.isFiniteNonZero()) { 4954 Floats[0] = T; 4955 Floats[1].makeZero(/* Neg = */ false); 4956 return (opStatus)Status; 4957 } 4958 4959 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 4960 APFloat Tau = A; 4961 T.changeSign(); 4962 Status |= Tau.fusedMultiplyAdd(C, T, RM); 4963 T.changeSign(); 4964 { 4965 // v = a * d 4966 APFloat V = A; 4967 Status |= V.multiply(D, RM); 4968 // w = b * c 4969 APFloat W = B; 4970 Status |= W.multiply(C, RM); 4971 Status |= V.add(W, RM); 4972 // tau += v + w 4973 Status |= Tau.add(V, RM); 4974 } 4975 // u = t + tau 4976 APFloat U = T; 4977 Status |= U.add(Tau, RM); 4978 4979 Floats[0] = U; 4980 if (!U.isFinite()) { 4981 Floats[1].makeZero(/* Neg = */ false); 4982 } else { 4983 // Floats[1] = (t - u) + tau 4984 Status |= T.subtract(U, RM); 4985 Status |= T.add(Tau, RM); 4986 Floats[1] = T; 4987 } 4988 return (opStatus)Status; 4989 } 4990 4991 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 4992 APFloat::roundingMode RM) { 4993 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 4994 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 4995 auto Ret = 4996 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 4997 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 4998 return Ret; 4999 } 5000 5001 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 5002 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5003 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5004 auto Ret = 5005 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5006 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5007 return Ret; 5008 } 5009 5010 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 5011 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5012 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5013 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5014 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5015 return Ret; 5016 } 5017 5018 APFloat::opStatus 5019 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5020 const DoubleAPFloat &Addend, 5021 APFloat::roundingMode RM) { 5022 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5023 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5024 auto Ret = Tmp.fusedMultiplyAdd( 5025 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5026 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5027 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5028 return Ret; 5029 } 5030 5031 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5032 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5033 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5034 auto Ret = Tmp.roundToIntegral(RM); 5035 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5036 return Ret; 5037 } 5038 5039 void DoubleAPFloat::changeSign() { 5040 Floats[0].changeSign(); 5041 Floats[1].changeSign(); 5042 } 5043 5044 APFloat::cmpResult 5045 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5046 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5047 if (Result != cmpEqual) 5048 return Result; 5049 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5050 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5051 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5052 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5053 if (Against && !RHSAgainst) 5054 return cmpLessThan; 5055 if (!Against && RHSAgainst) 5056 return cmpGreaterThan; 5057 if (!Against && !RHSAgainst) 5058 return Result; 5059 if (Against && RHSAgainst) 5060 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5061 } 5062 return Result; 5063 } 5064 5065 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5066 return Floats[0].getCategory(); 5067 } 5068 5069 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5070 5071 void DoubleAPFloat::makeInf(bool Neg) { 5072 Floats[0].makeInf(Neg); 5073 Floats[1].makeZero(/* Neg = */ false); 5074 } 5075 5076 void DoubleAPFloat::makeZero(bool Neg) { 5077 Floats[0].makeZero(Neg); 5078 Floats[1].makeZero(/* Neg = */ false); 5079 } 5080 5081 void DoubleAPFloat::makeLargest(bool Neg) { 5082 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5083 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5084 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5085 if (Neg) 5086 changeSign(); 5087 } 5088 5089 void DoubleAPFloat::makeSmallest(bool Neg) { 5090 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5091 Floats[0].makeSmallest(Neg); 5092 Floats[1].makeZero(/* Neg = */ false); 5093 } 5094 5095 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5096 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5097 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5098 if (Neg) 5099 Floats[0].changeSign(); 5100 Floats[1].makeZero(/* Neg = */ false); 5101 } 5102 5103 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5104 Floats[0].makeNaN(SNaN, Neg, fill); 5105 Floats[1].makeZero(/* Neg = */ false); 5106 } 5107 5108 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5109 auto Result = Floats[0].compare(RHS.Floats[0]); 5110 // |Float[0]| > |Float[1]| 5111 if (Result == APFloat::cmpEqual) 5112 return Floats[1].compare(RHS.Floats[1]); 5113 return Result; 5114 } 5115 5116 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5117 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5118 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5119 } 5120 5121 hash_code hash_value(const DoubleAPFloat &Arg) { 5122 if (Arg.Floats) 5123 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5124 return hash_combine(Arg.Semantics); 5125 } 5126 5127 APInt DoubleAPFloat::bitcastToAPInt() const { 5128 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5129 uint64_t Data[] = { 5130 Floats[0].bitcastToAPInt().getRawData()[0], 5131 Floats[1].bitcastToAPInt().getRawData()[0], 5132 }; 5133 return APInt(128, 2, Data); 5134 } 5135 5136 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5137 roundingMode RM) { 5138 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5139 APFloat Tmp(semPPCDoubleDoubleLegacy); 5140 auto Ret = Tmp.convertFromString(S, RM); 5141 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5142 return Ret; 5143 } 5144 5145 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5146 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5147 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5148 auto Ret = Tmp.next(nextDown); 5149 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5150 return Ret; 5151 } 5152 5153 APFloat::opStatus 5154 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5155 unsigned int Width, bool IsSigned, 5156 roundingMode RM, bool *IsExact) const { 5157 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5158 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5159 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5160 } 5161 5162 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5163 bool IsSigned, 5164 roundingMode RM) { 5165 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5166 APFloat Tmp(semPPCDoubleDoubleLegacy); 5167 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5168 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5169 return Ret; 5170 } 5171 5172 APFloat::opStatus 5173 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5174 unsigned int InputSize, 5175 bool IsSigned, roundingMode RM) { 5176 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5177 APFloat Tmp(semPPCDoubleDoubleLegacy); 5178 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5179 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5180 return Ret; 5181 } 5182 5183 APFloat::opStatus 5184 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5185 unsigned int InputSize, 5186 bool IsSigned, roundingMode RM) { 5187 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5188 APFloat Tmp(semPPCDoubleDoubleLegacy); 5189 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5190 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5191 return Ret; 5192 } 5193 5194 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5195 unsigned int HexDigits, 5196 bool UpperCase, 5197 roundingMode RM) const { 5198 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5199 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5200 .convertToHexString(DST, HexDigits, UpperCase, RM); 5201 } 5202 5203 bool DoubleAPFloat::isDenormal() const { 5204 return getCategory() == fcNormal && 5205 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5206 // (double)(Hi + Lo) == Hi defines a normal number. 5207 Floats[0] != Floats[0] + Floats[1]); 5208 } 5209 5210 bool DoubleAPFloat::isSmallest() const { 5211 if (getCategory() != fcNormal) 5212 return false; 5213 DoubleAPFloat Tmp(*this); 5214 Tmp.makeSmallest(this->isNegative()); 5215 return Tmp.compare(*this) == cmpEqual; 5216 } 5217 5218 bool DoubleAPFloat::isSmallestNormalized() const { 5219 if (getCategory() != fcNormal) 5220 return false; 5221 5222 DoubleAPFloat Tmp(*this); 5223 Tmp.makeSmallestNormalized(this->isNegative()); 5224 return Tmp.compare(*this) == cmpEqual; 5225 } 5226 5227 bool DoubleAPFloat::isLargest() const { 5228 if (getCategory() != fcNormal) 5229 return false; 5230 DoubleAPFloat Tmp(*this); 5231 Tmp.makeLargest(this->isNegative()); 5232 return Tmp.compare(*this) == cmpEqual; 5233 } 5234 5235 bool DoubleAPFloat::isInteger() const { 5236 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5237 return Floats[0].isInteger() && Floats[1].isInteger(); 5238 } 5239 5240 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5241 unsigned FormatPrecision, 5242 unsigned FormatMaxPadding, 5243 bool TruncateZero) const { 5244 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5245 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5246 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5247 } 5248 5249 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5250 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5251 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5252 if (!inv) 5253 return Tmp.getExactInverse(nullptr); 5254 APFloat Inv(semPPCDoubleDoubleLegacy); 5255 auto Ret = Tmp.getExactInverse(&Inv); 5256 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5257 return Ret; 5258 } 5259 5260 int DoubleAPFloat::getExactLog2() const { 5261 // TODO: Implement me 5262 return INT_MIN; 5263 } 5264 5265 int DoubleAPFloat::getExactLog2Abs() const { 5266 // TODO: Implement me 5267 return INT_MIN; 5268 } 5269 5270 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5271 APFloat::roundingMode RM) { 5272 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5273 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5274 scalbn(Arg.Floats[1], Exp, RM)); 5275 } 5276 5277 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5278 APFloat::roundingMode RM) { 5279 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5280 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5281 APFloat Second = Arg.Floats[1]; 5282 if (Arg.getCategory() == APFloat::fcNormal) 5283 Second = scalbn(Second, -Exp, RM); 5284 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5285 } 5286 5287 } // namespace detail 5288 5289 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5290 if (usesLayout<IEEEFloat>(Semantics)) { 5291 new (&IEEE) IEEEFloat(std::move(F)); 5292 return; 5293 } 5294 if (usesLayout<DoubleAPFloat>(Semantics)) { 5295 const fltSemantics& S = F.getSemantics(); 5296 new (&Double) 5297 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5298 APFloat(semIEEEdouble)); 5299 return; 5300 } 5301 llvm_unreachable("Unexpected semantics"); 5302 } 5303 5304 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5305 roundingMode RM) { 5306 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5307 } 5308 5309 hash_code hash_value(const APFloat &Arg) { 5310 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5311 return hash_value(Arg.U.IEEE); 5312 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5313 return hash_value(Arg.U.Double); 5314 llvm_unreachable("Unexpected semantics"); 5315 } 5316 5317 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5318 : APFloat(Semantics) { 5319 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5320 assert(StatusOrErr && "Invalid floating point representation"); 5321 consumeError(StatusOrErr.takeError()); 5322 } 5323 5324 FPClassTest APFloat::classify() const { 5325 if (isZero()) 5326 return isNegative() ? fcNegZero : fcPosZero; 5327 if (isNormal()) 5328 return isNegative() ? fcNegNormal : fcPosNormal; 5329 if (isDenormal()) 5330 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5331 if (isInfinity()) 5332 return isNegative() ? fcNegInf : fcPosInf; 5333 assert(isNaN() && "Other class of FP constant"); 5334 return isSignaling() ? fcSNan : fcQNan; 5335 } 5336 5337 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5338 roundingMode RM, bool *losesInfo) { 5339 if (&getSemantics() == &ToSemantics) { 5340 *losesInfo = false; 5341 return opOK; 5342 } 5343 if (usesLayout<IEEEFloat>(getSemantics()) && 5344 usesLayout<IEEEFloat>(ToSemantics)) 5345 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5346 if (usesLayout<IEEEFloat>(getSemantics()) && 5347 usesLayout<DoubleAPFloat>(ToSemantics)) { 5348 assert(&ToSemantics == &semPPCDoubleDouble); 5349 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5350 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5351 return Ret; 5352 } 5353 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5354 usesLayout<IEEEFloat>(ToSemantics)) { 5355 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5356 *this = APFloat(std::move(getIEEE()), ToSemantics); 5357 return Ret; 5358 } 5359 llvm_unreachable("Unexpected semantics"); 5360 } 5361 5362 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5363 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5364 } 5365 5366 void APFloat::print(raw_ostream &OS) const { 5367 SmallVector<char, 16> Buffer; 5368 toString(Buffer); 5369 OS << Buffer << "\n"; 5370 } 5371 5372 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5373 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } 5374 #endif 5375 5376 void APFloat::Profile(FoldingSetNodeID &NID) const { 5377 NID.Add(bitcastToAPInt()); 5378 } 5379 5380 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5381 an APSInt, whose initial bit-width and signed-ness are used to determine the 5382 precision of the conversion. 5383 */ 5384 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5385 roundingMode rounding_mode, 5386 bool *isExact) const { 5387 unsigned bitWidth = result.getBitWidth(); 5388 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5389 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5390 rounding_mode, isExact); 5391 // Keeps the original signed-ness. 5392 result = APInt(bitWidth, parts); 5393 return status; 5394 } 5395 5396 double APFloat::convertToDouble() const { 5397 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5398 return getIEEE().convertToDouble(); 5399 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5400 "Float semantics is not representable by IEEEdouble"); 5401 APFloat Temp = *this; 5402 bool LosesInfo; 5403 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5404 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5405 (void)St; 5406 return Temp.getIEEE().convertToDouble(); 5407 } 5408 5409 #ifdef HAS_IEE754_FLOAT128 5410 float128 APFloat::convertToQuad() const { 5411 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5412 return getIEEE().convertToQuad(); 5413 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5414 "Float semantics is not representable by IEEEquad"); 5415 APFloat Temp = *this; 5416 bool LosesInfo; 5417 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5418 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5419 (void)St; 5420 return Temp.getIEEE().convertToQuad(); 5421 } 5422 #endif 5423 5424 float APFloat::convertToFloat() const { 5425 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5426 return getIEEE().convertToFloat(); 5427 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5428 "Float semantics is not representable by IEEEsingle"); 5429 APFloat Temp = *this; 5430 bool LosesInfo; 5431 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5432 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5433 (void)St; 5434 return Temp.getIEEE().convertToFloat(); 5435 } 5436 5437 } // namespace llvm 5438 5439 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5440