1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 123 /* Whether this semantics has an encoding for Zero */ 124 bool hasZero = true; 125 126 /* Whether this semantics can represent signed values */ 127 bool hasSignedRepr = true; 128 129 // Returns true if any number described by this semantics can be precisely 130 // represented by the specified semantics. Does not take into account 131 // the value of fltNonfiniteBehavior. 132 bool isRepresentableBy(const fltSemantics &S) const { 133 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 134 precision <= S.precision; 135 } 136 }; 137 138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 144 static constexpr fltSemantics semFloat8E5M2FNUZ = { 145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 147 static constexpr fltSemantics semFloat8E4M3FN = { 148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 149 static constexpr fltSemantics semFloat8E4M3FNUZ = { 150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; 154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 155 static constexpr fltSemantics semFloat8E8M0FNU = { 156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes, 157 false, false}; 158 159 static constexpr fltSemantics semFloat6E3M2FN = { 160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 161 static constexpr fltSemantics semFloat6E2M3FN = { 162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 163 static constexpr fltSemantics semFloat4E2M1FN = { 164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 166 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 167 168 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 169 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 170 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 171 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 172 to each other, and two 11-bit exponents. 173 174 Note: we need to make the value different from semBogus as otherwise 175 an unsafe optimization may collapse both values to a single address, 176 and we heavily rely on them having distinct addresses. */ 177 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 178 179 /* These are legacy semantics for the fallback, inaccrurate implementation of 180 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 181 operation. It's equivalent to having an IEEE number with consecutive 106 182 bits of mantissa and 11 bits of exponent. 183 184 It's not equivalent to IBM double-double. For example, a legit IBM 185 double-double, 1 + epsilon: 186 187 1 + epsilon = 1 + (1 >> 1076) 188 189 is not representable by a consecutive 106 bits of mantissa. 190 191 Currently, these semantics are used in the following way: 192 193 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 194 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 195 semPPCDoubleDoubleLegacy -> IEEE operations 196 197 We use bitcastToAPInt() to get the bit representation (in APInt) of the 198 underlying IEEEdouble, then use the APInt constructor to construct the 199 legacy IEEE float. 200 201 TODO: Implement all operations in semPPCDoubleDouble, and delete these 202 semantics. */ 203 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 204 53 + 53, 128}; 205 206 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 207 switch (S) { 208 case S_IEEEhalf: 209 return IEEEhalf(); 210 case S_BFloat: 211 return BFloat(); 212 case S_IEEEsingle: 213 return IEEEsingle(); 214 case S_IEEEdouble: 215 return IEEEdouble(); 216 case S_IEEEquad: 217 return IEEEquad(); 218 case S_PPCDoubleDouble: 219 return PPCDoubleDouble(); 220 case S_Float8E5M2: 221 return Float8E5M2(); 222 case S_Float8E5M2FNUZ: 223 return Float8E5M2FNUZ(); 224 case S_Float8E4M3: 225 return Float8E4M3(); 226 case S_Float8E4M3FN: 227 return Float8E4M3FN(); 228 case S_Float8E4M3FNUZ: 229 return Float8E4M3FNUZ(); 230 case S_Float8E4M3B11FNUZ: 231 return Float8E4M3B11FNUZ(); 232 case S_Float8E3M4: 233 return Float8E3M4(); 234 case S_FloatTF32: 235 return FloatTF32(); 236 case S_Float8E8M0FNU: 237 return Float8E8M0FNU(); 238 case S_Float6E3M2FN: 239 return Float6E3M2FN(); 240 case S_Float6E2M3FN: 241 return Float6E2M3FN(); 242 case S_Float4E2M1FN: 243 return Float4E2M1FN(); 244 case S_x87DoubleExtended: 245 return x87DoubleExtended(); 246 } 247 llvm_unreachable("Unrecognised floating semantics"); 248 } 249 250 APFloatBase::Semantics 251 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 252 if (&Sem == &llvm::APFloat::IEEEhalf()) 253 return S_IEEEhalf; 254 else if (&Sem == &llvm::APFloat::BFloat()) 255 return S_BFloat; 256 else if (&Sem == &llvm::APFloat::IEEEsingle()) 257 return S_IEEEsingle; 258 else if (&Sem == &llvm::APFloat::IEEEdouble()) 259 return S_IEEEdouble; 260 else if (&Sem == &llvm::APFloat::IEEEquad()) 261 return S_IEEEquad; 262 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 263 return S_PPCDoubleDouble; 264 else if (&Sem == &llvm::APFloat::Float8E5M2()) 265 return S_Float8E5M2; 266 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 267 return S_Float8E5M2FNUZ; 268 else if (&Sem == &llvm::APFloat::Float8E4M3()) 269 return S_Float8E4M3; 270 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 271 return S_Float8E4M3FN; 272 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 273 return S_Float8E4M3FNUZ; 274 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 275 return S_Float8E4M3B11FNUZ; 276 else if (&Sem == &llvm::APFloat::Float8E3M4()) 277 return S_Float8E3M4; 278 else if (&Sem == &llvm::APFloat::FloatTF32()) 279 return S_FloatTF32; 280 else if (&Sem == &llvm::APFloat::Float8E8M0FNU()) 281 return S_Float8E8M0FNU; 282 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 283 return S_Float6E3M2FN; 284 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 285 return S_Float6E2M3FN; 286 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 287 return S_Float4E2M1FN; 288 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 289 return S_x87DoubleExtended; 290 else 291 llvm_unreachable("Unknown floating semantics"); 292 } 293 294 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 295 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 296 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 297 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 298 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 299 const fltSemantics &APFloatBase::PPCDoubleDouble() { 300 return semPPCDoubleDouble; 301 } 302 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 303 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 304 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 305 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 306 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 307 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 308 return semFloat8E4M3B11FNUZ; 309 } 310 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } 311 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 312 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; } 313 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 314 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 315 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 316 const fltSemantics &APFloatBase::x87DoubleExtended() { 317 return semX87DoubleExtended; 318 } 319 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 320 321 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 322 constexpr RoundingMode APFloatBase::rmTowardPositive; 323 constexpr RoundingMode APFloatBase::rmTowardNegative; 324 constexpr RoundingMode APFloatBase::rmTowardZero; 325 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 326 327 /* A tight upper bound on number of parts required to hold the value 328 pow(5, power) is 329 330 power * 815 / (351 * integerPartWidth) + 1 331 332 However, whilst the result may require only this many parts, 333 because we are multiplying two values to get it, the 334 multiplication may require an extra part with the excess part 335 being zero (consider the trivial case of 1 * 1, tcFullMultiply 336 requires two parts to hold the single-part result). So we add an 337 extra one to guarantee enough space whilst multiplying. */ 338 const unsigned int maxExponent = 16383; 339 const unsigned int maxPrecision = 113; 340 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 341 const unsigned int maxPowerOfFiveParts = 342 2 + 343 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 344 345 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 346 return semantics.precision; 347 } 348 APFloatBase::ExponentType 349 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 350 return semantics.maxExponent; 351 } 352 APFloatBase::ExponentType 353 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 354 return semantics.minExponent; 355 } 356 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 357 return semantics.sizeInBits; 358 } 359 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 360 bool isSigned) { 361 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 362 // at least one more bit than the MaxExponent to hold the max FP value. 363 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 364 // Extra sign bit needed. 365 if (isSigned) 366 ++MinBitWidth; 367 return MinBitWidth; 368 } 369 370 bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) { 371 return semantics.hasZero; 372 } 373 374 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) { 375 return semantics.hasSignedRepr; 376 } 377 378 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 379 const fltSemantics &Dst) { 380 // Exponent range must be larger. 381 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 382 return false; 383 384 // If the mantissa is long enough, the result value could still be denormal 385 // with a larger exponent range. 386 // 387 // FIXME: This condition is probably not accurate but also shouldn't be a 388 // practical concern with existing types. 389 return Dst.precision >= Src.precision; 390 } 391 392 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 393 return Sem.sizeInBits; 394 } 395 396 static constexpr APFloatBase::ExponentType 397 exponentZero(const fltSemantics &semantics) { 398 return semantics.minExponent - 1; 399 } 400 401 static constexpr APFloatBase::ExponentType 402 exponentInf(const fltSemantics &semantics) { 403 return semantics.maxExponent + 1; 404 } 405 406 static constexpr APFloatBase::ExponentType 407 exponentNaN(const fltSemantics &semantics) { 408 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 409 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 410 return exponentZero(semantics); 411 return semantics.maxExponent; 412 } 413 return semantics.maxExponent + 1; 414 } 415 416 /* A bunch of private, handy routines. */ 417 418 static inline Error createError(const Twine &Err) { 419 return make_error<StringError>(Err, inconvertibleErrorCode()); 420 } 421 422 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 423 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) / 424 APFloatBase::integerPartWidth); 425 } 426 427 /* Returns 0U-9U. Return values >= 10U are not digits. */ 428 static inline unsigned int 429 decDigitValue(unsigned int c) 430 { 431 return c - '0'; 432 } 433 434 /* Return the value of a decimal exponent of the form 435 [+-]ddddddd. 436 437 If the exponent overflows, returns a large exponent with the 438 appropriate sign. */ 439 static Expected<int> readExponent(StringRef::iterator begin, 440 StringRef::iterator end) { 441 bool isNegative; 442 unsigned int absExponent; 443 const unsigned int overlargeExponent = 24000; /* FIXME. */ 444 StringRef::iterator p = begin; 445 446 // Treat no exponent as 0 to match binutils 447 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 448 return 0; 449 } 450 451 isNegative = (*p == '-'); 452 if (*p == '-' || *p == '+') { 453 p++; 454 if (p == end) 455 return createError("Exponent has no digits"); 456 } 457 458 absExponent = decDigitValue(*p++); 459 if (absExponent >= 10U) 460 return createError("Invalid character in exponent"); 461 462 for (; p != end; ++p) { 463 unsigned int value; 464 465 value = decDigitValue(*p); 466 if (value >= 10U) 467 return createError("Invalid character in exponent"); 468 469 absExponent = absExponent * 10U + value; 470 if (absExponent >= overlargeExponent) { 471 absExponent = overlargeExponent; 472 break; 473 } 474 } 475 476 if (isNegative) 477 return -(int) absExponent; 478 else 479 return (int) absExponent; 480 } 481 482 /* This is ugly and needs cleaning up, but I don't immediately see 483 how whilst remaining safe. */ 484 static Expected<int> totalExponent(StringRef::iterator p, 485 StringRef::iterator end, 486 int exponentAdjustment) { 487 int unsignedExponent; 488 bool negative, overflow; 489 int exponent = 0; 490 491 if (p == end) 492 return createError("Exponent has no digits"); 493 494 negative = *p == '-'; 495 if (*p == '-' || *p == '+') { 496 p++; 497 if (p == end) 498 return createError("Exponent has no digits"); 499 } 500 501 unsignedExponent = 0; 502 overflow = false; 503 for (; p != end; ++p) { 504 unsigned int value; 505 506 value = decDigitValue(*p); 507 if (value >= 10U) 508 return createError("Invalid character in exponent"); 509 510 unsignedExponent = unsignedExponent * 10 + value; 511 if (unsignedExponent > 32767) { 512 overflow = true; 513 break; 514 } 515 } 516 517 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 518 overflow = true; 519 520 if (!overflow) { 521 exponent = unsignedExponent; 522 if (negative) 523 exponent = -exponent; 524 exponent += exponentAdjustment; 525 if (exponent > 32767 || exponent < -32768) 526 overflow = true; 527 } 528 529 if (overflow) 530 exponent = negative ? -32768: 32767; 531 532 return exponent; 533 } 534 535 static Expected<StringRef::iterator> 536 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 537 StringRef::iterator *dot) { 538 StringRef::iterator p = begin; 539 *dot = end; 540 while (p != end && *p == '0') 541 p++; 542 543 if (p != end && *p == '.') { 544 *dot = p++; 545 546 if (end - begin == 1) 547 return createError("Significand has no digits"); 548 549 while (p != end && *p == '0') 550 p++; 551 } 552 553 return p; 554 } 555 556 /* Given a normal decimal floating point number of the form 557 558 dddd.dddd[eE][+-]ddd 559 560 where the decimal point and exponent are optional, fill out the 561 structure D. Exponent is appropriate if the significand is 562 treated as an integer, and normalizedExponent if the significand 563 is taken to have the decimal point after a single leading 564 non-zero digit. 565 566 If the value is zero, V->firstSigDigit points to a non-digit, and 567 the return exponent is zero. 568 */ 569 struct decimalInfo { 570 const char *firstSigDigit; 571 const char *lastSigDigit; 572 int exponent; 573 int normalizedExponent; 574 }; 575 576 static Error interpretDecimal(StringRef::iterator begin, 577 StringRef::iterator end, decimalInfo *D) { 578 StringRef::iterator dot = end; 579 580 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 581 if (!PtrOrErr) 582 return PtrOrErr.takeError(); 583 StringRef::iterator p = *PtrOrErr; 584 585 D->firstSigDigit = p; 586 D->exponent = 0; 587 D->normalizedExponent = 0; 588 589 for (; p != end; ++p) { 590 if (*p == '.') { 591 if (dot != end) 592 return createError("String contains multiple dots"); 593 dot = p++; 594 if (p == end) 595 break; 596 } 597 if (decDigitValue(*p) >= 10U) 598 break; 599 } 600 601 if (p != end) { 602 if (*p != 'e' && *p != 'E') 603 return createError("Invalid character in significand"); 604 if (p == begin) 605 return createError("Significand has no digits"); 606 if (dot != end && p - begin == 1) 607 return createError("Significand has no digits"); 608 609 /* p points to the first non-digit in the string */ 610 auto ExpOrErr = readExponent(p + 1, end); 611 if (!ExpOrErr) 612 return ExpOrErr.takeError(); 613 D->exponent = *ExpOrErr; 614 615 /* Implied decimal point? */ 616 if (dot == end) 617 dot = p; 618 } 619 620 /* If number is all zeroes accept any exponent. */ 621 if (p != D->firstSigDigit) { 622 /* Drop insignificant trailing zeroes. */ 623 if (p != begin) { 624 do 625 do 626 p--; 627 while (p != begin && *p == '0'); 628 while (p != begin && *p == '.'); 629 } 630 631 /* Adjust the exponents for any decimal point. */ 632 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 633 D->normalizedExponent = (D->exponent + 634 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 635 - (dot > D->firstSigDigit && dot < p))); 636 } 637 638 D->lastSigDigit = p; 639 return Error::success(); 640 } 641 642 /* Return the trailing fraction of a hexadecimal number. 643 DIGITVALUE is the first hex digit of the fraction, P points to 644 the next digit. */ 645 static Expected<lostFraction> 646 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 647 unsigned int digitValue) { 648 unsigned int hexDigit; 649 650 /* If the first trailing digit isn't 0 or 8 we can work out the 651 fraction immediately. */ 652 if (digitValue > 8) 653 return lfMoreThanHalf; 654 else if (digitValue < 8 && digitValue > 0) 655 return lfLessThanHalf; 656 657 // Otherwise we need to find the first non-zero digit. 658 while (p != end && (*p == '0' || *p == '.')) 659 p++; 660 661 if (p == end) 662 return createError("Invalid trailing hexadecimal fraction!"); 663 664 hexDigit = hexDigitValue(*p); 665 666 /* If we ran off the end it is exactly zero or one-half, otherwise 667 a little more. */ 668 if (hexDigit == UINT_MAX) 669 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 670 else 671 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 672 } 673 674 /* Return the fraction lost were a bignum truncated losing the least 675 significant BITS bits. */ 676 static lostFraction 677 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 678 unsigned int partCount, 679 unsigned int bits) 680 { 681 unsigned int lsb; 682 683 lsb = APInt::tcLSB(parts, partCount); 684 685 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 686 if (bits <= lsb) 687 return lfExactlyZero; 688 if (bits == lsb + 1) 689 return lfExactlyHalf; 690 if (bits <= partCount * APFloatBase::integerPartWidth && 691 APInt::tcExtractBit(parts, bits - 1)) 692 return lfMoreThanHalf; 693 694 return lfLessThanHalf; 695 } 696 697 /* Shift DST right BITS bits noting lost fraction. */ 698 static lostFraction 699 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 700 { 701 lostFraction lost_fraction; 702 703 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 704 705 APInt::tcShiftRight(dst, parts, bits); 706 707 return lost_fraction; 708 } 709 710 /* Combine the effect of two lost fractions. */ 711 static lostFraction 712 combineLostFractions(lostFraction moreSignificant, 713 lostFraction lessSignificant) 714 { 715 if (lessSignificant != lfExactlyZero) { 716 if (moreSignificant == lfExactlyZero) 717 moreSignificant = lfLessThanHalf; 718 else if (moreSignificant == lfExactlyHalf) 719 moreSignificant = lfMoreThanHalf; 720 } 721 722 return moreSignificant; 723 } 724 725 /* The error from the true value, in half-ulps, on multiplying two 726 floating point numbers, which differ from the value they 727 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 728 than the returned value. 729 730 See "How to Read Floating Point Numbers Accurately" by William D 731 Clinger. */ 732 static unsigned int 733 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 734 { 735 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 736 737 if (HUerr1 + HUerr2 == 0) 738 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 739 else 740 return inexactMultiply + 2 * (HUerr1 + HUerr2); 741 } 742 743 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 744 when the least significant BITS are truncated. BITS cannot be 745 zero. */ 746 static APFloatBase::integerPart 747 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 748 bool isNearest) { 749 unsigned int count, partBits; 750 APFloatBase::integerPart part, boundary; 751 752 assert(bits != 0); 753 754 bits--; 755 count = bits / APFloatBase::integerPartWidth; 756 partBits = bits % APFloatBase::integerPartWidth + 1; 757 758 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 759 760 if (isNearest) 761 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 762 else 763 boundary = 0; 764 765 if (count == 0) { 766 if (part - boundary <= boundary - part) 767 return part - boundary; 768 else 769 return boundary - part; 770 } 771 772 if (part == boundary) { 773 while (--count) 774 if (parts[count]) 775 return ~(APFloatBase::integerPart) 0; /* A lot. */ 776 777 return parts[0]; 778 } else if (part == boundary - 1) { 779 while (--count) 780 if (~parts[count]) 781 return ~(APFloatBase::integerPart) 0; /* A lot. */ 782 783 return -parts[0]; 784 } 785 786 return ~(APFloatBase::integerPart) 0; /* A lot. */ 787 } 788 789 /* Place pow(5, power) in DST, and return the number of parts used. 790 DST must be at least one part larger than size of the answer. */ 791 static unsigned int 792 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 793 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 794 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 795 pow5s[0] = 78125 * 5; 796 797 unsigned int partsCount = 1; 798 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 799 unsigned int result; 800 assert(power <= maxExponent); 801 802 p1 = dst; 803 p2 = scratch; 804 805 *p1 = firstEightPowers[power & 7]; 806 power >>= 3; 807 808 result = 1; 809 pow5 = pow5s; 810 811 for (unsigned int n = 0; power; power >>= 1, n++) { 812 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 813 if (n != 0) { 814 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 815 partsCount, partsCount); 816 partsCount *= 2; 817 if (pow5[partsCount - 1] == 0) 818 partsCount--; 819 } 820 821 if (power & 1) { 822 APFloatBase::integerPart *tmp; 823 824 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 825 result += partsCount; 826 if (p2[result - 1] == 0) 827 result--; 828 829 /* Now result is in p1 with partsCount parts and p2 is scratch 830 space. */ 831 tmp = p1; 832 p1 = p2; 833 p2 = tmp; 834 } 835 836 pow5 += partsCount; 837 } 838 839 if (p1 != dst) 840 APInt::tcAssign(dst, p1, result); 841 842 return result; 843 } 844 845 /* Zero at the end to avoid modular arithmetic when adding one; used 846 when rounding up during hexadecimal output. */ 847 static const char hexDigitsLower[] = "0123456789abcdef0"; 848 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 849 static const char infinityL[] = "infinity"; 850 static const char infinityU[] = "INFINITY"; 851 static const char NaNL[] = "nan"; 852 static const char NaNU[] = "NAN"; 853 854 /* Write out an integerPart in hexadecimal, starting with the most 855 significant nibble. Write out exactly COUNT hexdigits, return 856 COUNT. */ 857 static unsigned int 858 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 859 const char *hexDigitChars) 860 { 861 unsigned int result = count; 862 863 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 864 865 part >>= (APFloatBase::integerPartWidth - 4 * count); 866 while (count--) { 867 dst[count] = hexDigitChars[part & 0xf]; 868 part >>= 4; 869 } 870 871 return result; 872 } 873 874 /* Write out an unsigned decimal integer. */ 875 static char * 876 writeUnsignedDecimal (char *dst, unsigned int n) 877 { 878 char buff[40], *p; 879 880 p = buff; 881 do 882 *p++ = '0' + n % 10; 883 while (n /= 10); 884 885 do 886 *dst++ = *--p; 887 while (p != buff); 888 889 return dst; 890 } 891 892 /* Write out a signed decimal integer. */ 893 static char * 894 writeSignedDecimal (char *dst, int value) 895 { 896 if (value < 0) { 897 *dst++ = '-'; 898 dst = writeUnsignedDecimal(dst, -(unsigned) value); 899 } else 900 dst = writeUnsignedDecimal(dst, value); 901 902 return dst; 903 } 904 905 namespace detail { 906 /* Constructors. */ 907 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 908 unsigned int count; 909 910 semantics = ourSemantics; 911 count = partCount(); 912 if (count > 1) 913 significand.parts = new integerPart[count]; 914 } 915 916 void IEEEFloat::freeSignificand() { 917 if (needsCleanup()) 918 delete [] significand.parts; 919 } 920 921 void IEEEFloat::assign(const IEEEFloat &rhs) { 922 assert(semantics == rhs.semantics); 923 924 sign = rhs.sign; 925 category = rhs.category; 926 exponent = rhs.exponent; 927 if (isFiniteNonZero() || category == fcNaN) 928 copySignificand(rhs); 929 } 930 931 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 932 assert(isFiniteNonZero() || category == fcNaN); 933 assert(rhs.partCount() >= partCount()); 934 935 APInt::tcAssign(significandParts(), rhs.significandParts(), 936 partCount()); 937 } 938 939 /* Make this number a NaN, with an arbitrary but deterministic value 940 for the significand. If double or longer, this is a signalling NaN, 941 which may not be ideal. If float, this is QNaN(0). */ 942 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 943 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 944 llvm_unreachable("This floating point format does not support NaN"); 945 946 if (Negative && !semantics->hasSignedRepr) 947 llvm_unreachable( 948 "This floating point format does not support signed values"); 949 950 category = fcNaN; 951 sign = Negative; 952 exponent = exponentNaN(); 953 954 integerPart *significand = significandParts(); 955 unsigned numParts = partCount(); 956 957 APInt fill_storage; 958 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 959 // Finite-only types do not distinguish signalling and quiet NaN, so 960 // make them all signalling. 961 SNaN = false; 962 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 963 sign = true; 964 fill_storage = APInt::getZero(semantics->precision - 1); 965 } else { 966 fill_storage = APInt::getAllOnes(semantics->precision - 1); 967 } 968 fill = &fill_storage; 969 } 970 971 // Set the significand bits to the fill. 972 if (!fill || fill->getNumWords() < numParts) 973 APInt::tcSet(significand, 0, numParts); 974 if (fill) { 975 APInt::tcAssign(significand, fill->getRawData(), 976 std::min(fill->getNumWords(), numParts)); 977 978 // Zero out the excess bits of the significand. 979 unsigned bitsToPreserve = semantics->precision - 1; 980 unsigned part = bitsToPreserve / 64; 981 bitsToPreserve %= 64; 982 significand[part] &= ((1ULL << bitsToPreserve) - 1); 983 for (part++; part != numParts; ++part) 984 significand[part] = 0; 985 } 986 987 unsigned QNaNBit = 988 (semantics->precision >= 2) ? (semantics->precision - 2) : 0; 989 990 if (SNaN) { 991 // We always have to clear the QNaN bit to make it an SNaN. 992 APInt::tcClearBit(significand, QNaNBit); 993 994 // If there are no bits set in the payload, we have to set 995 // *something* to make it a NaN instead of an infinity; 996 // conventionally, this is the next bit down from the QNaN bit. 997 if (APInt::tcIsZero(significand, numParts)) 998 APInt::tcSetBit(significand, QNaNBit - 1); 999 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 1000 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 1001 // Do nothing. 1002 } else { 1003 // We always have to set the QNaN bit to make it a QNaN. 1004 APInt::tcSetBit(significand, QNaNBit); 1005 } 1006 1007 // For x87 extended precision, we want to make a NaN, not a 1008 // pseudo-NaN. Maybe we should expose the ability to make 1009 // pseudo-NaNs? 1010 if (semantics == &semX87DoubleExtended) 1011 APInt::tcSetBit(significand, QNaNBit + 1); 1012 } 1013 1014 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 1015 if (this != &rhs) { 1016 if (semantics != rhs.semantics) { 1017 freeSignificand(); 1018 initialize(rhs.semantics); 1019 } 1020 assign(rhs); 1021 } 1022 1023 return *this; 1024 } 1025 1026 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 1027 freeSignificand(); 1028 1029 semantics = rhs.semantics; 1030 significand = rhs.significand; 1031 exponent = rhs.exponent; 1032 category = rhs.category; 1033 sign = rhs.sign; 1034 1035 rhs.semantics = &semBogus; 1036 return *this; 1037 } 1038 1039 bool IEEEFloat::isDenormal() const { 1040 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1041 (APInt::tcExtractBit(significandParts(), 1042 semantics->precision - 1) == 0); 1043 } 1044 1045 bool IEEEFloat::isSmallest() const { 1046 // The smallest number by magnitude in our format will be the smallest 1047 // denormal, i.e. the floating point number with exponent being minimum 1048 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1049 return isFiniteNonZero() && exponent == semantics->minExponent && 1050 significandMSB() == 0; 1051 } 1052 1053 bool IEEEFloat::isSmallestNormalized() const { 1054 return getCategory() == fcNormal && exponent == semantics->minExponent && 1055 isSignificandAllZerosExceptMSB(); 1056 } 1057 1058 unsigned int IEEEFloat::getNumHighBits() const { 1059 const unsigned int PartCount = partCountForBits(semantics->precision); 1060 const unsigned int Bits = PartCount * integerPartWidth; 1061 1062 // Compute how many bits are used in the final word. 1063 // When precision is just 1, it represents the 'Pth' 1064 // Precision bit and not the actual significand bit. 1065 const unsigned int NumHighBits = (semantics->precision > 1) 1066 ? (Bits - semantics->precision + 1) 1067 : (Bits - semantics->precision); 1068 return NumHighBits; 1069 } 1070 1071 bool IEEEFloat::isSignificandAllOnes() const { 1072 // Test if the significand excluding the integral bit is all ones. This allows 1073 // us to test for binade boundaries. 1074 const integerPart *Parts = significandParts(); 1075 const unsigned PartCount = partCountForBits(semantics->precision); 1076 for (unsigned i = 0; i < PartCount - 1; i++) 1077 if (~Parts[i]) 1078 return false; 1079 1080 // Set the unused high bits to all ones when we compare. 1081 const unsigned NumHighBits = getNumHighBits(); 1082 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1083 "Can not have more high bits to fill than integerPartWidth"); 1084 const integerPart HighBitFill = 1085 ~integerPart(0) << (integerPartWidth - NumHighBits); 1086 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill))) 1087 return false; 1088 1089 return true; 1090 } 1091 1092 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1093 // Test if the significand excluding the integral bit is all ones except for 1094 // the least significant bit. 1095 const integerPart *Parts = significandParts(); 1096 1097 if (Parts[0] & 1) 1098 return false; 1099 1100 const unsigned PartCount = partCountForBits(semantics->precision); 1101 for (unsigned i = 0; i < PartCount - 1; i++) { 1102 if (~Parts[i] & ~unsigned{!i}) 1103 return false; 1104 } 1105 1106 // Set the unused high bits to all ones when we compare. 1107 const unsigned NumHighBits = getNumHighBits(); 1108 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1109 "Can not have more high bits to fill than integerPartWidth"); 1110 const integerPart HighBitFill = ~integerPart(0) 1111 << (integerPartWidth - NumHighBits); 1112 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1113 return false; 1114 1115 return true; 1116 } 1117 1118 bool IEEEFloat::isSignificandAllZeros() const { 1119 // Test if the significand excluding the integral bit is all zeros. This 1120 // allows us to test for binade boundaries. 1121 const integerPart *Parts = significandParts(); 1122 const unsigned PartCount = partCountForBits(semantics->precision); 1123 1124 for (unsigned i = 0; i < PartCount - 1; i++) 1125 if (Parts[i]) 1126 return false; 1127 1128 // Compute how many bits are used in the final word. 1129 const unsigned NumHighBits = getNumHighBits(); 1130 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1131 "clear than integerPartWidth"); 1132 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1133 1134 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask)) 1135 return false; 1136 1137 return true; 1138 } 1139 1140 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1141 const integerPart *Parts = significandParts(); 1142 const unsigned PartCount = partCountForBits(semantics->precision); 1143 1144 for (unsigned i = 0; i < PartCount - 1; i++) { 1145 if (Parts[i]) 1146 return false; 1147 } 1148 1149 const unsigned NumHighBits = getNumHighBits(); 1150 const integerPart MSBMask = integerPart(1) 1151 << (integerPartWidth - NumHighBits); 1152 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask)); 1153 } 1154 1155 bool IEEEFloat::isLargest() const { 1156 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent; 1157 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1158 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1159 // The largest number by magnitude in our format will be the floating point 1160 // number with maximum exponent and with significand that is all ones except 1161 // the LSB. 1162 return (IsMaxExp && APFloat::hasSignificand(*semantics)) 1163 ? isSignificandAllOnesExceptLSB() 1164 : IsMaxExp; 1165 } else { 1166 // The largest number by magnitude in our format will be the floating point 1167 // number with maximum exponent and with significand that is all ones. 1168 return IsMaxExp && isSignificandAllOnes(); 1169 } 1170 } 1171 1172 bool IEEEFloat::isInteger() const { 1173 // This could be made more efficient; I'm going for obviously correct. 1174 if (!isFinite()) return false; 1175 IEEEFloat truncated = *this; 1176 truncated.roundToIntegral(rmTowardZero); 1177 return compare(truncated) == cmpEqual; 1178 } 1179 1180 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1181 if (this == &rhs) 1182 return true; 1183 if (semantics != rhs.semantics || 1184 category != rhs.category || 1185 sign != rhs.sign) 1186 return false; 1187 if (category==fcZero || category==fcInfinity) 1188 return true; 1189 1190 if (isFiniteNonZero() && exponent != rhs.exponent) 1191 return false; 1192 1193 return std::equal(significandParts(), significandParts() + partCount(), 1194 rhs.significandParts()); 1195 } 1196 1197 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1198 initialize(&ourSemantics); 1199 sign = 0; 1200 category = fcNormal; 1201 zeroSignificand(); 1202 exponent = ourSemantics.precision - 1; 1203 significandParts()[0] = value; 1204 normalize(rmNearestTiesToEven, lfExactlyZero); 1205 } 1206 1207 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1208 initialize(&ourSemantics); 1209 // The Float8E8MOFNU format does not have a representation 1210 // for zero. So, use the closest representation instead. 1211 // Moreover, the all-zero encoding represents a valid 1212 // normal value (which is the smallestNormalized here). 1213 // Hence, we call makeSmallestNormalized (where category is 1214 // 'fcNormal') instead of makeZero (where category is 'fcZero'). 1215 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false); 1216 } 1217 1218 // Delegate to the previous constructor, because later copy constructor may 1219 // actually inspects category, which can't be garbage. 1220 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1221 : IEEEFloat(ourSemantics) {} 1222 1223 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1224 initialize(rhs.semantics); 1225 assign(rhs); 1226 } 1227 1228 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1229 *this = std::move(rhs); 1230 } 1231 1232 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1233 1234 unsigned int IEEEFloat::partCount() const { 1235 return partCountForBits(semantics->precision + 1); 1236 } 1237 1238 const APFloat::integerPart *IEEEFloat::significandParts() const { 1239 return const_cast<IEEEFloat *>(this)->significandParts(); 1240 } 1241 1242 APFloat::integerPart *IEEEFloat::significandParts() { 1243 if (partCount() > 1) 1244 return significand.parts; 1245 else 1246 return &significand.part; 1247 } 1248 1249 void IEEEFloat::zeroSignificand() { 1250 APInt::tcSet(significandParts(), 0, partCount()); 1251 } 1252 1253 /* Increment an fcNormal floating point number's significand. */ 1254 void IEEEFloat::incrementSignificand() { 1255 integerPart carry; 1256 1257 carry = APInt::tcIncrement(significandParts(), partCount()); 1258 1259 /* Our callers should never cause us to overflow. */ 1260 assert(carry == 0); 1261 (void)carry; 1262 } 1263 1264 /* Add the significand of the RHS. Returns the carry flag. */ 1265 APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1266 integerPart *parts; 1267 1268 parts = significandParts(); 1269 1270 assert(semantics == rhs.semantics); 1271 assert(exponent == rhs.exponent); 1272 1273 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1274 } 1275 1276 /* Subtract the significand of the RHS with a borrow flag. Returns 1277 the borrow flag. */ 1278 APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1279 integerPart borrow) { 1280 integerPart *parts; 1281 1282 parts = significandParts(); 1283 1284 assert(semantics == rhs.semantics); 1285 assert(exponent == rhs.exponent); 1286 1287 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1288 partCount()); 1289 } 1290 1291 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1292 on to the full-precision result of the multiplication. Returns the 1293 lost fraction. */ 1294 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1295 IEEEFloat addend, 1296 bool ignoreAddend) { 1297 unsigned int omsb; // One, not zero, based MSB. 1298 unsigned int partsCount, newPartsCount, precision; 1299 integerPart *lhsSignificand; 1300 integerPart scratch[4]; 1301 integerPart *fullSignificand; 1302 lostFraction lost_fraction; 1303 bool ignored; 1304 1305 assert(semantics == rhs.semantics); 1306 1307 precision = semantics->precision; 1308 1309 // Allocate space for twice as many bits as the original significand, plus one 1310 // extra bit for the addition to overflow into. 1311 newPartsCount = partCountForBits(precision * 2 + 1); 1312 1313 if (newPartsCount > 4) 1314 fullSignificand = new integerPart[newPartsCount]; 1315 else 1316 fullSignificand = scratch; 1317 1318 lhsSignificand = significandParts(); 1319 partsCount = partCount(); 1320 1321 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1322 rhs.significandParts(), partsCount, partsCount); 1323 1324 lost_fraction = lfExactlyZero; 1325 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1326 exponent += rhs.exponent; 1327 1328 // Assume the operands involved in the multiplication are single-precision 1329 // FP, and the two multiplicants are: 1330 // *this = a23 . a22 ... a0 * 2^e1 1331 // rhs = b23 . b22 ... b0 * 2^e2 1332 // the result of multiplication is: 1333 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1334 // Note that there are three significant bits at the left-hand side of the 1335 // radix point: two for the multiplication, and an overflow bit for the 1336 // addition (that will always be zero at this point). Move the radix point 1337 // toward left by two bits, and adjust exponent accordingly. 1338 exponent += 2; 1339 1340 if (!ignoreAddend && addend.isNonZero()) { 1341 // The intermediate result of the multiplication has "2 * precision" 1342 // signicant bit; adjust the addend to be consistent with mul result. 1343 // 1344 Significand savedSignificand = significand; 1345 const fltSemantics *savedSemantics = semantics; 1346 fltSemantics extendedSemantics; 1347 opStatus status; 1348 unsigned int extendedPrecision; 1349 1350 // Normalize our MSB to one below the top bit to allow for overflow. 1351 extendedPrecision = 2 * precision + 1; 1352 if (omsb != extendedPrecision - 1) { 1353 assert(extendedPrecision > omsb); 1354 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1355 (extendedPrecision - 1) - omsb); 1356 exponent -= (extendedPrecision - 1) - omsb; 1357 } 1358 1359 /* Create new semantics. */ 1360 extendedSemantics = *semantics; 1361 extendedSemantics.precision = extendedPrecision; 1362 1363 if (newPartsCount == 1) 1364 significand.part = fullSignificand[0]; 1365 else 1366 significand.parts = fullSignificand; 1367 semantics = &extendedSemantics; 1368 1369 // Make a copy so we can convert it to the extended semantics. 1370 // Note that we cannot convert the addend directly, as the extendedSemantics 1371 // is a local variable (which we take a reference to). 1372 IEEEFloat extendedAddend(addend); 1373 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero, 1374 &ignored); 1375 assert(status == APFloat::opOK); 1376 (void)status; 1377 1378 // Shift the significand of the addend right by one bit. This guarantees 1379 // that the high bit of the significand is zero (same as fullSignificand), 1380 // so the addition will overflow (if it does overflow at all) into the top bit. 1381 lost_fraction = extendedAddend.shiftSignificandRight(1); 1382 assert(lost_fraction == lfExactlyZero && 1383 "Lost precision while shifting addend for fused-multiply-add."); 1384 1385 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1386 1387 /* Restore our state. */ 1388 if (newPartsCount == 1) 1389 fullSignificand[0] = significand.part; 1390 significand = savedSignificand; 1391 semantics = savedSemantics; 1392 1393 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1394 } 1395 1396 // Convert the result having "2 * precision" significant-bits back to the one 1397 // having "precision" significant-bits. First, move the radix point from 1398 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1399 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1400 exponent -= precision + 1; 1401 1402 // In case MSB resides at the left-hand side of radix point, shift the 1403 // mantissa right by some amount to make sure the MSB reside right before 1404 // the radix point (i.e. "MSB . rest-significant-bits"). 1405 // 1406 // Note that the result is not normalized when "omsb < precision". So, the 1407 // caller needs to call IEEEFloat::normalize() if normalized value is 1408 // expected. 1409 if (omsb > precision) { 1410 unsigned int bits, significantParts; 1411 lostFraction lf; 1412 1413 bits = omsb - precision; 1414 significantParts = partCountForBits(omsb); 1415 lf = shiftRight(fullSignificand, significantParts, bits); 1416 lost_fraction = combineLostFractions(lf, lost_fraction); 1417 exponent += bits; 1418 } 1419 1420 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1421 1422 if (newPartsCount > 4) 1423 delete [] fullSignificand; 1424 1425 return lost_fraction; 1426 } 1427 1428 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1429 // When the given semantics has zero, the addend here is a zero. 1430 // i.e . it belongs to the 'fcZero' category. 1431 // But when the semantics does not support zero, we need to 1432 // explicitly convey that this addend should be ignored 1433 // for multiplication. 1434 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero); 1435 } 1436 1437 /* Multiply the significands of LHS and RHS to DST. */ 1438 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1439 unsigned int bit, i, partsCount; 1440 const integerPart *rhsSignificand; 1441 integerPart *lhsSignificand, *dividend, *divisor; 1442 integerPart scratch[4]; 1443 lostFraction lost_fraction; 1444 1445 assert(semantics == rhs.semantics); 1446 1447 lhsSignificand = significandParts(); 1448 rhsSignificand = rhs.significandParts(); 1449 partsCount = partCount(); 1450 1451 if (partsCount > 2) 1452 dividend = new integerPart[partsCount * 2]; 1453 else 1454 dividend = scratch; 1455 1456 divisor = dividend + partsCount; 1457 1458 /* Copy the dividend and divisor as they will be modified in-place. */ 1459 for (i = 0; i < partsCount; i++) { 1460 dividend[i] = lhsSignificand[i]; 1461 divisor[i] = rhsSignificand[i]; 1462 lhsSignificand[i] = 0; 1463 } 1464 1465 exponent -= rhs.exponent; 1466 1467 unsigned int precision = semantics->precision; 1468 1469 /* Normalize the divisor. */ 1470 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1471 if (bit) { 1472 exponent += bit; 1473 APInt::tcShiftLeft(divisor, partsCount, bit); 1474 } 1475 1476 /* Normalize the dividend. */ 1477 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1478 if (bit) { 1479 exponent -= bit; 1480 APInt::tcShiftLeft(dividend, partsCount, bit); 1481 } 1482 1483 /* Ensure the dividend >= divisor initially for the loop below. 1484 Incidentally, this means that the division loop below is 1485 guaranteed to set the integer bit to one. */ 1486 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1487 exponent--; 1488 APInt::tcShiftLeft(dividend, partsCount, 1); 1489 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1490 } 1491 1492 /* Long division. */ 1493 for (bit = precision; bit; bit -= 1) { 1494 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1495 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1496 APInt::tcSetBit(lhsSignificand, bit - 1); 1497 } 1498 1499 APInt::tcShiftLeft(dividend, partsCount, 1); 1500 } 1501 1502 /* Figure out the lost fraction. */ 1503 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1504 1505 if (cmp > 0) 1506 lost_fraction = lfMoreThanHalf; 1507 else if (cmp == 0) 1508 lost_fraction = lfExactlyHalf; 1509 else if (APInt::tcIsZero(dividend, partsCount)) 1510 lost_fraction = lfExactlyZero; 1511 else 1512 lost_fraction = lfLessThanHalf; 1513 1514 if (partsCount > 2) 1515 delete [] dividend; 1516 1517 return lost_fraction; 1518 } 1519 1520 unsigned int IEEEFloat::significandMSB() const { 1521 return APInt::tcMSB(significandParts(), partCount()); 1522 } 1523 1524 unsigned int IEEEFloat::significandLSB() const { 1525 return APInt::tcLSB(significandParts(), partCount()); 1526 } 1527 1528 /* Note that a zero result is NOT normalized to fcZero. */ 1529 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1530 /* Our exponent should not overflow. */ 1531 assert((ExponentType) (exponent + bits) >= exponent); 1532 1533 exponent += bits; 1534 1535 return shiftRight(significandParts(), partCount(), bits); 1536 } 1537 1538 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1539 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1540 assert(bits < semantics->precision || 1541 (semantics->precision == 1 && bits <= 1)); 1542 1543 if (bits) { 1544 unsigned int partsCount = partCount(); 1545 1546 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1547 exponent -= bits; 1548 1549 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1550 } 1551 } 1552 1553 APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1554 int compare; 1555 1556 assert(semantics == rhs.semantics); 1557 assert(isFiniteNonZero()); 1558 assert(rhs.isFiniteNonZero()); 1559 1560 compare = exponent - rhs.exponent; 1561 1562 /* If exponents are equal, do an unsigned bignum comparison of the 1563 significands. */ 1564 if (compare == 0) 1565 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1566 partCount()); 1567 1568 if (compare > 0) 1569 return cmpGreaterThan; 1570 else if (compare < 0) 1571 return cmpLessThan; 1572 else 1573 return cmpEqual; 1574 } 1575 1576 /* Set the least significant BITS bits of a bignum, clear the 1577 rest. */ 1578 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1579 unsigned bits) { 1580 unsigned i = 0; 1581 while (bits > APInt::APINT_BITS_PER_WORD) { 1582 dst[i++] = ~(APInt::WordType)0; 1583 bits -= APInt::APINT_BITS_PER_WORD; 1584 } 1585 1586 if (bits) 1587 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1588 1589 while (i < parts) 1590 dst[i++] = 0; 1591 } 1592 1593 /* Handle overflow. Sign is preserved. We either become infinity or 1594 the largest finite number. */ 1595 APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1596 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1597 /* Infinity? */ 1598 if (rounding_mode == rmNearestTiesToEven || 1599 rounding_mode == rmNearestTiesToAway || 1600 (rounding_mode == rmTowardPositive && !sign) || 1601 (rounding_mode == rmTowardNegative && sign)) { 1602 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1603 makeNaN(false, sign); 1604 else 1605 category = fcInfinity; 1606 return static_cast<opStatus>(opOverflow | opInexact); 1607 } 1608 } 1609 1610 /* Otherwise we become the largest finite number. */ 1611 category = fcNormal; 1612 exponent = semantics->maxExponent; 1613 tcSetLeastSignificantBits(significandParts(), partCount(), 1614 semantics->precision); 1615 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1616 semantics->nanEncoding == fltNanEncoding::AllOnes) 1617 APInt::tcClearBit(significandParts(), 0); 1618 1619 return opInexact; 1620 } 1621 1622 /* Returns TRUE if, when truncating the current number, with BIT the 1623 new LSB, with the given lost fraction and rounding mode, the result 1624 would need to be rounded away from zero (i.e., by increasing the 1625 signficand). This routine must work for fcZero of both signs, and 1626 fcNormal numbers. */ 1627 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1628 lostFraction lost_fraction, 1629 unsigned int bit) const { 1630 /* NaNs and infinities should not have lost fractions. */ 1631 assert(isFiniteNonZero() || category == fcZero); 1632 1633 /* Current callers never pass this so we don't handle it. */ 1634 assert(lost_fraction != lfExactlyZero); 1635 1636 switch (rounding_mode) { 1637 case rmNearestTiesToAway: 1638 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1639 1640 case rmNearestTiesToEven: 1641 if (lost_fraction == lfMoreThanHalf) 1642 return true; 1643 1644 /* Our zeroes don't have a significand to test. */ 1645 if (lost_fraction == lfExactlyHalf && category != fcZero) 1646 return APInt::tcExtractBit(significandParts(), bit); 1647 1648 return false; 1649 1650 case rmTowardZero: 1651 return false; 1652 1653 case rmTowardPositive: 1654 return !sign; 1655 1656 case rmTowardNegative: 1657 return sign; 1658 1659 default: 1660 break; 1661 } 1662 llvm_unreachable("Invalid rounding mode found"); 1663 } 1664 1665 APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1666 lostFraction lost_fraction) { 1667 unsigned int omsb; /* One, not zero, based MSB. */ 1668 int exponentChange; 1669 1670 if (!isFiniteNonZero()) 1671 return opOK; 1672 1673 /* Before rounding normalize the exponent of fcNormal numbers. */ 1674 omsb = significandMSB() + 1; 1675 1676 if (omsb) { 1677 /* OMSB is numbered from 1. We want to place it in the integer 1678 bit numbered PRECISION if possible, with a compensating change in 1679 the exponent. */ 1680 exponentChange = omsb - semantics->precision; 1681 1682 /* If the resulting exponent is too high, overflow according to 1683 the rounding mode. */ 1684 if (exponent + exponentChange > semantics->maxExponent) 1685 return handleOverflow(rounding_mode); 1686 1687 /* Subnormal numbers have exponent minExponent, and their MSB 1688 is forced based on that. */ 1689 if (exponent + exponentChange < semantics->minExponent) 1690 exponentChange = semantics->minExponent - exponent; 1691 1692 /* Shifting left is easy as we don't lose precision. */ 1693 if (exponentChange < 0) { 1694 assert(lost_fraction == lfExactlyZero); 1695 1696 shiftSignificandLeft(-exponentChange); 1697 1698 return opOK; 1699 } 1700 1701 if (exponentChange > 0) { 1702 lostFraction lf; 1703 1704 /* Shift right and capture any new lost fraction. */ 1705 lf = shiftSignificandRight(exponentChange); 1706 1707 lost_fraction = combineLostFractions(lf, lost_fraction); 1708 1709 /* Keep OMSB up-to-date. */ 1710 if (omsb > (unsigned) exponentChange) 1711 omsb -= exponentChange; 1712 else 1713 omsb = 0; 1714 } 1715 } 1716 1717 // The all-ones values is an overflow if NaN is all ones. If NaN is 1718 // represented by negative zero, then it is a valid finite value. 1719 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1720 semantics->nanEncoding == fltNanEncoding::AllOnes && 1721 exponent == semantics->maxExponent && isSignificandAllOnes()) 1722 return handleOverflow(rounding_mode); 1723 1724 /* Now round the number according to rounding_mode given the lost 1725 fraction. */ 1726 1727 /* As specified in IEEE 754, since we do not trap we do not report 1728 underflow for exact results. */ 1729 if (lost_fraction == lfExactlyZero) { 1730 /* Canonicalize zeroes. */ 1731 if (omsb == 0) { 1732 category = fcZero; 1733 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1734 sign = false; 1735 if (!semantics->hasZero) 1736 makeSmallestNormalized(false); 1737 } 1738 1739 return opOK; 1740 } 1741 1742 /* Increment the significand if we're rounding away from zero. */ 1743 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1744 if (omsb == 0) 1745 exponent = semantics->minExponent; 1746 1747 incrementSignificand(); 1748 omsb = significandMSB() + 1; 1749 1750 /* Did the significand increment overflow? */ 1751 if (omsb == (unsigned) semantics->precision + 1) { 1752 /* Renormalize by incrementing the exponent and shifting our 1753 significand right one. However if we already have the 1754 maximum exponent we overflow to infinity. */ 1755 if (exponent == semantics->maxExponent) 1756 // Invoke overflow handling with a rounding mode that will guarantee 1757 // that the result gets turned into the correct infinity representation. 1758 // This is needed instead of just setting the category to infinity to 1759 // account for 8-bit floating point types that have no inf, only NaN. 1760 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1761 1762 shiftSignificandRight(1); 1763 1764 return opInexact; 1765 } 1766 1767 // The all-ones values is an overflow if NaN is all ones. If NaN is 1768 // represented by negative zero, then it is a valid finite value. 1769 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1770 semantics->nanEncoding == fltNanEncoding::AllOnes && 1771 exponent == semantics->maxExponent && isSignificandAllOnes()) 1772 return handleOverflow(rounding_mode); 1773 } 1774 1775 /* The normal case - we were and are not denormal, and any 1776 significand increment above didn't overflow. */ 1777 if (omsb == semantics->precision) 1778 return opInexact; 1779 1780 /* We have a non-zero denormal. */ 1781 assert(omsb < semantics->precision); 1782 1783 /* Canonicalize zeroes. */ 1784 if (omsb == 0) { 1785 category = fcZero; 1786 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1787 sign = false; 1788 // This condition handles the case where the semantics 1789 // does not have zero but uses the all-zero encoding 1790 // to represent the smallest normal value. 1791 if (!semantics->hasZero) 1792 makeSmallestNormalized(false); 1793 } 1794 1795 /* The fcZero case is a denormal that underflowed to zero. */ 1796 return (opStatus) (opUnderflow | opInexact); 1797 } 1798 1799 APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1800 bool subtract) { 1801 switch (PackCategoriesIntoKey(category, rhs.category)) { 1802 default: 1803 llvm_unreachable(nullptr); 1804 1805 case PackCategoriesIntoKey(fcZero, fcNaN): 1806 case PackCategoriesIntoKey(fcNormal, fcNaN): 1807 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1808 assign(rhs); 1809 [[fallthrough]]; 1810 case PackCategoriesIntoKey(fcNaN, fcZero): 1811 case PackCategoriesIntoKey(fcNaN, fcNormal): 1812 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1813 case PackCategoriesIntoKey(fcNaN, fcNaN): 1814 if (isSignaling()) { 1815 makeQuiet(); 1816 return opInvalidOp; 1817 } 1818 return rhs.isSignaling() ? opInvalidOp : opOK; 1819 1820 case PackCategoriesIntoKey(fcNormal, fcZero): 1821 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1822 case PackCategoriesIntoKey(fcInfinity, fcZero): 1823 return opOK; 1824 1825 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1826 case PackCategoriesIntoKey(fcZero, fcInfinity): 1827 category = fcInfinity; 1828 sign = rhs.sign ^ subtract; 1829 return opOK; 1830 1831 case PackCategoriesIntoKey(fcZero, fcNormal): 1832 assign(rhs); 1833 sign = rhs.sign ^ subtract; 1834 return opOK; 1835 1836 case PackCategoriesIntoKey(fcZero, fcZero): 1837 /* Sign depends on rounding mode; handled by caller. */ 1838 return opOK; 1839 1840 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1841 /* Differently signed infinities can only be validly 1842 subtracted. */ 1843 if (((sign ^ rhs.sign)!=0) != subtract) { 1844 makeNaN(); 1845 return opInvalidOp; 1846 } 1847 1848 return opOK; 1849 1850 case PackCategoriesIntoKey(fcNormal, fcNormal): 1851 return opDivByZero; 1852 } 1853 } 1854 1855 /* Add or subtract two normal numbers. */ 1856 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1857 bool subtract) { 1858 integerPart carry; 1859 lostFraction lost_fraction; 1860 int bits; 1861 1862 /* Determine if the operation on the absolute values is effectively 1863 an addition or subtraction. */ 1864 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1865 1866 /* Are we bigger exponent-wise than the RHS? */ 1867 bits = exponent - rhs.exponent; 1868 1869 /* Subtraction is more subtle than one might naively expect. */ 1870 if (subtract) { 1871 if ((bits < 0) && !semantics->hasSignedRepr) 1872 llvm_unreachable( 1873 "This floating point format does not support signed values"); 1874 1875 IEEEFloat temp_rhs(rhs); 1876 1877 if (bits == 0) 1878 lost_fraction = lfExactlyZero; 1879 else if (bits > 0) { 1880 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1881 shiftSignificandLeft(1); 1882 } else { 1883 lost_fraction = shiftSignificandRight(-bits - 1); 1884 temp_rhs.shiftSignificandLeft(1); 1885 } 1886 1887 // Should we reverse the subtraction. 1888 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1889 carry = temp_rhs.subtractSignificand 1890 (*this, lost_fraction != lfExactlyZero); 1891 copySignificand(temp_rhs); 1892 sign = !sign; 1893 } else { 1894 carry = subtractSignificand 1895 (temp_rhs, lost_fraction != lfExactlyZero); 1896 } 1897 1898 /* Invert the lost fraction - it was on the RHS and 1899 subtracted. */ 1900 if (lost_fraction == lfLessThanHalf) 1901 lost_fraction = lfMoreThanHalf; 1902 else if (lost_fraction == lfMoreThanHalf) 1903 lost_fraction = lfLessThanHalf; 1904 1905 /* The code above is intended to ensure that no borrow is 1906 necessary. */ 1907 assert(!carry); 1908 (void)carry; 1909 } else { 1910 if (bits > 0) { 1911 IEEEFloat temp_rhs(rhs); 1912 1913 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1914 carry = addSignificand(temp_rhs); 1915 } else { 1916 lost_fraction = shiftSignificandRight(-bits); 1917 carry = addSignificand(rhs); 1918 } 1919 1920 /* We have a guard bit; generating a carry cannot happen. */ 1921 assert(!carry); 1922 (void)carry; 1923 } 1924 1925 return lost_fraction; 1926 } 1927 1928 APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1929 switch (PackCategoriesIntoKey(category, rhs.category)) { 1930 default: 1931 llvm_unreachable(nullptr); 1932 1933 case PackCategoriesIntoKey(fcZero, fcNaN): 1934 case PackCategoriesIntoKey(fcNormal, fcNaN): 1935 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1936 assign(rhs); 1937 sign = false; 1938 [[fallthrough]]; 1939 case PackCategoriesIntoKey(fcNaN, fcZero): 1940 case PackCategoriesIntoKey(fcNaN, fcNormal): 1941 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1942 case PackCategoriesIntoKey(fcNaN, fcNaN): 1943 sign ^= rhs.sign; // restore the original sign 1944 if (isSignaling()) { 1945 makeQuiet(); 1946 return opInvalidOp; 1947 } 1948 return rhs.isSignaling() ? opInvalidOp : opOK; 1949 1950 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1951 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1952 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1953 category = fcInfinity; 1954 return opOK; 1955 1956 case PackCategoriesIntoKey(fcZero, fcNormal): 1957 case PackCategoriesIntoKey(fcNormal, fcZero): 1958 case PackCategoriesIntoKey(fcZero, fcZero): 1959 category = fcZero; 1960 return opOK; 1961 1962 case PackCategoriesIntoKey(fcZero, fcInfinity): 1963 case PackCategoriesIntoKey(fcInfinity, fcZero): 1964 makeNaN(); 1965 return opInvalidOp; 1966 1967 case PackCategoriesIntoKey(fcNormal, fcNormal): 1968 return opOK; 1969 } 1970 } 1971 1972 APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1973 switch (PackCategoriesIntoKey(category, rhs.category)) { 1974 default: 1975 llvm_unreachable(nullptr); 1976 1977 case PackCategoriesIntoKey(fcZero, fcNaN): 1978 case PackCategoriesIntoKey(fcNormal, fcNaN): 1979 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1980 assign(rhs); 1981 sign = false; 1982 [[fallthrough]]; 1983 case PackCategoriesIntoKey(fcNaN, fcZero): 1984 case PackCategoriesIntoKey(fcNaN, fcNormal): 1985 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1986 case PackCategoriesIntoKey(fcNaN, fcNaN): 1987 sign ^= rhs.sign; // restore the original sign 1988 if (isSignaling()) { 1989 makeQuiet(); 1990 return opInvalidOp; 1991 } 1992 return rhs.isSignaling() ? opInvalidOp : opOK; 1993 1994 case PackCategoriesIntoKey(fcInfinity, fcZero): 1995 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1996 case PackCategoriesIntoKey(fcZero, fcInfinity): 1997 case PackCategoriesIntoKey(fcZero, fcNormal): 1998 return opOK; 1999 2000 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2001 category = fcZero; 2002 return opOK; 2003 2004 case PackCategoriesIntoKey(fcNormal, fcZero): 2005 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 2006 makeNaN(false, sign); 2007 else 2008 category = fcInfinity; 2009 return opDivByZero; 2010 2011 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2012 case PackCategoriesIntoKey(fcZero, fcZero): 2013 makeNaN(); 2014 return opInvalidOp; 2015 2016 case PackCategoriesIntoKey(fcNormal, fcNormal): 2017 return opOK; 2018 } 2019 } 2020 2021 APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 2022 switch (PackCategoriesIntoKey(category, rhs.category)) { 2023 default: 2024 llvm_unreachable(nullptr); 2025 2026 case PackCategoriesIntoKey(fcZero, fcNaN): 2027 case PackCategoriesIntoKey(fcNormal, fcNaN): 2028 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2029 assign(rhs); 2030 [[fallthrough]]; 2031 case PackCategoriesIntoKey(fcNaN, fcZero): 2032 case PackCategoriesIntoKey(fcNaN, fcNormal): 2033 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2034 case PackCategoriesIntoKey(fcNaN, fcNaN): 2035 if (isSignaling()) { 2036 makeQuiet(); 2037 return opInvalidOp; 2038 } 2039 return rhs.isSignaling() ? opInvalidOp : opOK; 2040 2041 case PackCategoriesIntoKey(fcZero, fcInfinity): 2042 case PackCategoriesIntoKey(fcZero, fcNormal): 2043 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2044 return opOK; 2045 2046 case PackCategoriesIntoKey(fcNormal, fcZero): 2047 case PackCategoriesIntoKey(fcInfinity, fcZero): 2048 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2049 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2050 case PackCategoriesIntoKey(fcZero, fcZero): 2051 makeNaN(); 2052 return opInvalidOp; 2053 2054 case PackCategoriesIntoKey(fcNormal, fcNormal): 2055 return opOK; 2056 } 2057 } 2058 2059 APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 2060 switch (PackCategoriesIntoKey(category, rhs.category)) { 2061 default: 2062 llvm_unreachable(nullptr); 2063 2064 case PackCategoriesIntoKey(fcZero, fcNaN): 2065 case PackCategoriesIntoKey(fcNormal, fcNaN): 2066 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2067 assign(rhs); 2068 [[fallthrough]]; 2069 case PackCategoriesIntoKey(fcNaN, fcZero): 2070 case PackCategoriesIntoKey(fcNaN, fcNormal): 2071 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2072 case PackCategoriesIntoKey(fcNaN, fcNaN): 2073 if (isSignaling()) { 2074 makeQuiet(); 2075 return opInvalidOp; 2076 } 2077 return rhs.isSignaling() ? opInvalidOp : opOK; 2078 2079 case PackCategoriesIntoKey(fcZero, fcInfinity): 2080 case PackCategoriesIntoKey(fcZero, fcNormal): 2081 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2082 return opOK; 2083 2084 case PackCategoriesIntoKey(fcNormal, fcZero): 2085 case PackCategoriesIntoKey(fcInfinity, fcZero): 2086 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2087 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2088 case PackCategoriesIntoKey(fcZero, fcZero): 2089 makeNaN(); 2090 return opInvalidOp; 2091 2092 case PackCategoriesIntoKey(fcNormal, fcNormal): 2093 return opDivByZero; // fake status, indicating this is not a special case 2094 } 2095 } 2096 2097 /* Change sign. */ 2098 void IEEEFloat::changeSign() { 2099 // With NaN-as-negative-zero, neither NaN or negative zero can change 2100 // their signs. 2101 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2102 (isZero() || isNaN())) 2103 return; 2104 /* Look mummy, this one's easy. */ 2105 sign = !sign; 2106 } 2107 2108 /* Normalized addition or subtraction. */ 2109 APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2110 roundingMode rounding_mode, 2111 bool subtract) { 2112 opStatus fs; 2113 2114 fs = addOrSubtractSpecials(rhs, subtract); 2115 2116 /* This return code means it was not a simple case. */ 2117 if (fs == opDivByZero) { 2118 lostFraction lost_fraction; 2119 2120 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2121 fs = normalize(rounding_mode, lost_fraction); 2122 2123 /* Can only be zero if we lost no fraction. */ 2124 assert(category != fcZero || lost_fraction == lfExactlyZero); 2125 } 2126 2127 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2128 positive zero unless rounding to minus infinity, except that 2129 adding two like-signed zeroes gives that zero. */ 2130 if (category == fcZero) { 2131 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2132 sign = (rounding_mode == rmTowardNegative); 2133 // NaN-in-negative-zero means zeros need to be normalized to +0. 2134 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2135 sign = false; 2136 } 2137 2138 return fs; 2139 } 2140 2141 /* Normalized addition. */ 2142 APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2143 roundingMode rounding_mode) { 2144 return addOrSubtract(rhs, rounding_mode, false); 2145 } 2146 2147 /* Normalized subtraction. */ 2148 APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2149 roundingMode rounding_mode) { 2150 return addOrSubtract(rhs, rounding_mode, true); 2151 } 2152 2153 /* Normalized multiply. */ 2154 APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2155 roundingMode rounding_mode) { 2156 opStatus fs; 2157 2158 sign ^= rhs.sign; 2159 fs = multiplySpecials(rhs); 2160 2161 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2162 sign = false; 2163 if (isFiniteNonZero()) { 2164 lostFraction lost_fraction = multiplySignificand(rhs); 2165 fs = normalize(rounding_mode, lost_fraction); 2166 if (lost_fraction != lfExactlyZero) 2167 fs = (opStatus) (fs | opInexact); 2168 } 2169 2170 return fs; 2171 } 2172 2173 /* Normalized divide. */ 2174 APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2175 roundingMode rounding_mode) { 2176 opStatus fs; 2177 2178 sign ^= rhs.sign; 2179 fs = divideSpecials(rhs); 2180 2181 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2182 sign = false; 2183 if (isFiniteNonZero()) { 2184 lostFraction lost_fraction = divideSignificand(rhs); 2185 fs = normalize(rounding_mode, lost_fraction); 2186 if (lost_fraction != lfExactlyZero) 2187 fs = (opStatus) (fs | opInexact); 2188 } 2189 2190 return fs; 2191 } 2192 2193 /* Normalized remainder. */ 2194 APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2195 opStatus fs; 2196 unsigned int origSign = sign; 2197 2198 // First handle the special cases. 2199 fs = remainderSpecials(rhs); 2200 if (fs != opDivByZero) 2201 return fs; 2202 2203 fs = opOK; 2204 2205 // Make sure the current value is less than twice the denom. If the addition 2206 // did not succeed (an overflow has happened), which means that the finite 2207 // value we currently posses must be less than twice the denom (as we are 2208 // using the same semantics). 2209 IEEEFloat P2 = rhs; 2210 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2211 fs = mod(P2); 2212 assert(fs == opOK); 2213 } 2214 2215 // Lets work with absolute numbers. 2216 IEEEFloat P = rhs; 2217 P.sign = false; 2218 sign = false; 2219 2220 // 2221 // To calculate the remainder we use the following scheme. 2222 // 2223 // The remainder is defained as follows: 2224 // 2225 // remainder = numer - rquot * denom = x - r * p 2226 // 2227 // Where r is the result of: x/p, rounded toward the nearest integral value 2228 // (with halfway cases rounded toward the even number). 2229 // 2230 // Currently, (after x mod 2p): 2231 // r is the number of 2p's present inside x, which is inherently, an even 2232 // number of p's. 2233 // 2234 // We may split the remaining calculation into 4 options: 2235 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2236 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2237 // are done as well. 2238 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2239 // to subtract 1p at least once. 2240 // - if x >= p then we must subtract p at least once, as x must be a 2241 // remainder. 2242 // 2243 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2244 // 2245 // We can now split the remaining calculation to the following 3 options: 2246 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2247 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2248 // must round up to the next even number. so we must subtract p once more. 2249 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2250 // integral, and subtract p once more. 2251 // 2252 2253 // Extend the semantics to prevent an overflow/underflow or inexact result. 2254 bool losesInfo; 2255 fltSemantics extendedSemantics = *semantics; 2256 extendedSemantics.maxExponent++; 2257 extendedSemantics.minExponent--; 2258 extendedSemantics.precision += 2; 2259 2260 IEEEFloat VEx = *this; 2261 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2262 assert(fs == opOK && !losesInfo); 2263 IEEEFloat PEx = P; 2264 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2265 assert(fs == opOK && !losesInfo); 2266 2267 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2268 // any fraction. 2269 fs = VEx.add(VEx, rmNearestTiesToEven); 2270 assert(fs == opOK); 2271 2272 if (VEx.compare(PEx) == cmpGreaterThan) { 2273 fs = subtract(P, rmNearestTiesToEven); 2274 assert(fs == opOK); 2275 2276 // Make VEx = this.add(this), but because we have different semantics, we do 2277 // not want to `convert` again, so we just subtract PEx twice (which equals 2278 // to the desired value). 2279 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2280 assert(fs == opOK); 2281 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2282 assert(fs == opOK); 2283 2284 cmpResult result = VEx.compare(PEx); 2285 if (result == cmpGreaterThan || result == cmpEqual) { 2286 fs = subtract(P, rmNearestTiesToEven); 2287 assert(fs == opOK); 2288 } 2289 } 2290 2291 if (isZero()) { 2292 sign = origSign; // IEEE754 requires this 2293 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2294 // But some 8-bit floats only have positive 0. 2295 sign = false; 2296 } 2297 2298 else 2299 sign ^= origSign; 2300 return fs; 2301 } 2302 2303 /* Normalized llvm frem (C fmod). */ 2304 APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2305 opStatus fs; 2306 fs = modSpecials(rhs); 2307 unsigned int origSign = sign; 2308 2309 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2310 compareAbsoluteValue(rhs) != cmpLessThan) { 2311 int Exp = ilogb(*this) - ilogb(rhs); 2312 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2313 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2314 // check for it. 2315 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2316 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2317 V.sign = sign; 2318 2319 fs = subtract(V, rmNearestTiesToEven); 2320 2321 // When the semantics supports zero, this loop's 2322 // exit-condition is handled by the 'isFiniteNonZero' 2323 // category check above. However, when the semantics 2324 // does not have 'fcZero' and we have reached the 2325 // minimum possible value, (and any further subtract 2326 // will underflow to the same value) explicitly 2327 // provide an exit-path here. 2328 if (!semantics->hasZero && this->isSmallest()) 2329 break; 2330 2331 assert(fs==opOK); 2332 } 2333 if (isZero()) { 2334 sign = origSign; // fmod requires this 2335 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2336 sign = false; 2337 } 2338 return fs; 2339 } 2340 2341 /* Normalized fused-multiply-add. */ 2342 APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2343 const IEEEFloat &addend, 2344 roundingMode rounding_mode) { 2345 opStatus fs; 2346 2347 /* Post-multiplication sign, before addition. */ 2348 sign ^= multiplicand.sign; 2349 2350 /* If and only if all arguments are normal do we need to do an 2351 extended-precision calculation. */ 2352 if (isFiniteNonZero() && 2353 multiplicand.isFiniteNonZero() && 2354 addend.isFinite()) { 2355 lostFraction lost_fraction; 2356 2357 lost_fraction = multiplySignificand(multiplicand, addend); 2358 fs = normalize(rounding_mode, lost_fraction); 2359 if (lost_fraction != lfExactlyZero) 2360 fs = (opStatus) (fs | opInexact); 2361 2362 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2363 positive zero unless rounding to minus infinity, except that 2364 adding two like-signed zeroes gives that zero. */ 2365 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2366 sign = (rounding_mode == rmTowardNegative); 2367 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2368 sign = false; 2369 } 2370 } else { 2371 fs = multiplySpecials(multiplicand); 2372 2373 /* FS can only be opOK or opInvalidOp. There is no more work 2374 to do in the latter case. The IEEE-754R standard says it is 2375 implementation-defined in this case whether, if ADDEND is a 2376 quiet NaN, we raise invalid op; this implementation does so. 2377 2378 If we need to do the addition we can do so with normal 2379 precision. */ 2380 if (fs == opOK) 2381 fs = addOrSubtract(addend, rounding_mode, false); 2382 } 2383 2384 return fs; 2385 } 2386 2387 /* Rounding-mode correct round to integral value. */ 2388 APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2389 opStatus fs; 2390 2391 if (isInfinity()) 2392 // [IEEE Std 754-2008 6.1]: 2393 // The behavior of infinity in floating-point arithmetic is derived from the 2394 // limiting cases of real arithmetic with operands of arbitrarily 2395 // large magnitude, when such a limit exists. 2396 // ... 2397 // Operations on infinite operands are usually exact and therefore signal no 2398 // exceptions ... 2399 return opOK; 2400 2401 if (isNaN()) { 2402 if (isSignaling()) { 2403 // [IEEE Std 754-2008 6.2]: 2404 // Under default exception handling, any operation signaling an invalid 2405 // operation exception and for which a floating-point result is to be 2406 // delivered shall deliver a quiet NaN. 2407 makeQuiet(); 2408 // [IEEE Std 754-2008 6.2]: 2409 // Signaling NaNs shall be reserved operands that, under default exception 2410 // handling, signal the invalid operation exception(see 7.2) for every 2411 // general-computational and signaling-computational operation except for 2412 // the conversions described in 5.12. 2413 return opInvalidOp; 2414 } else { 2415 // [IEEE Std 754-2008 6.2]: 2416 // For an operation with quiet NaN inputs, other than maximum and minimum 2417 // operations, if a floating-point result is to be delivered the result 2418 // shall be a quiet NaN which should be one of the input NaNs. 2419 // ... 2420 // Every general-computational and quiet-computational operation involving 2421 // one or more input NaNs, none of them signaling, shall signal no 2422 // exception, except fusedMultiplyAdd might signal the invalid operation 2423 // exception(see 7.2). 2424 return opOK; 2425 } 2426 } 2427 2428 if (isZero()) { 2429 // [IEEE Std 754-2008 6.3]: 2430 // ... the sign of the result of conversions, the quantize operation, the 2431 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2432 // the sign of the first or only operand. 2433 return opOK; 2434 } 2435 2436 // If the exponent is large enough, we know that this value is already 2437 // integral, and the arithmetic below would potentially cause it to saturate 2438 // to +/-Inf. Bail out early instead. 2439 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics)) 2440 return opOK; 2441 2442 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2443 // precision of our format, and then subtract it back off again. The choice 2444 // of rounding modes for the addition/subtraction determines the rounding mode 2445 // for our integral rounding as well. 2446 // NOTE: When the input value is negative, we do subtraction followed by 2447 // addition instead. 2448 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)), 2449 1); 2450 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1; 2451 IEEEFloat MagicConstant(*semantics); 2452 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2453 rmNearestTiesToEven); 2454 assert(fs == opOK); 2455 MagicConstant.sign = sign; 2456 2457 // Preserve the input sign so that we can handle the case of zero result 2458 // correctly. 2459 bool inputSign = isNegative(); 2460 2461 fs = add(MagicConstant, rounding_mode); 2462 2463 // Current value and 'MagicConstant' are both integers, so the result of the 2464 // subtraction is always exact according to Sterbenz' lemma. 2465 subtract(MagicConstant, rounding_mode); 2466 2467 // Restore the input sign. 2468 if (inputSign != isNegative()) 2469 changeSign(); 2470 2471 return fs; 2472 } 2473 2474 /* Comparison requires normalized numbers. */ 2475 APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2476 cmpResult result; 2477 2478 assert(semantics == rhs.semantics); 2479 2480 switch (PackCategoriesIntoKey(category, rhs.category)) { 2481 default: 2482 llvm_unreachable(nullptr); 2483 2484 case PackCategoriesIntoKey(fcNaN, fcZero): 2485 case PackCategoriesIntoKey(fcNaN, fcNormal): 2486 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2487 case PackCategoriesIntoKey(fcNaN, fcNaN): 2488 case PackCategoriesIntoKey(fcZero, fcNaN): 2489 case PackCategoriesIntoKey(fcNormal, fcNaN): 2490 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2491 return cmpUnordered; 2492 2493 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2494 case PackCategoriesIntoKey(fcInfinity, fcZero): 2495 case PackCategoriesIntoKey(fcNormal, fcZero): 2496 if (sign) 2497 return cmpLessThan; 2498 else 2499 return cmpGreaterThan; 2500 2501 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2502 case PackCategoriesIntoKey(fcZero, fcInfinity): 2503 case PackCategoriesIntoKey(fcZero, fcNormal): 2504 if (rhs.sign) 2505 return cmpGreaterThan; 2506 else 2507 return cmpLessThan; 2508 2509 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2510 if (sign == rhs.sign) 2511 return cmpEqual; 2512 else if (sign) 2513 return cmpLessThan; 2514 else 2515 return cmpGreaterThan; 2516 2517 case PackCategoriesIntoKey(fcZero, fcZero): 2518 return cmpEqual; 2519 2520 case PackCategoriesIntoKey(fcNormal, fcNormal): 2521 break; 2522 } 2523 2524 /* Two normal numbers. Do they have the same sign? */ 2525 if (sign != rhs.sign) { 2526 if (sign) 2527 result = cmpLessThan; 2528 else 2529 result = cmpGreaterThan; 2530 } else { 2531 /* Compare absolute values; invert result if negative. */ 2532 result = compareAbsoluteValue(rhs); 2533 2534 if (sign) { 2535 if (result == cmpLessThan) 2536 result = cmpGreaterThan; 2537 else if (result == cmpGreaterThan) 2538 result = cmpLessThan; 2539 } 2540 } 2541 2542 return result; 2543 } 2544 2545 /// IEEEFloat::convert - convert a value of one floating point type to another. 2546 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2547 /// records whether the transformation lost information, i.e. whether 2548 /// converting the result back to the original type will produce the 2549 /// original value (this is almost the same as return value==fsOK, but there 2550 /// are edge cases where this is not so). 2551 2552 APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2553 roundingMode rounding_mode, 2554 bool *losesInfo) { 2555 lostFraction lostFraction; 2556 unsigned int newPartCount, oldPartCount; 2557 opStatus fs; 2558 int shift; 2559 const fltSemantics &fromSemantics = *semantics; 2560 bool is_signaling = isSignaling(); 2561 2562 lostFraction = lfExactlyZero; 2563 newPartCount = partCountForBits(toSemantics.precision + 1); 2564 oldPartCount = partCount(); 2565 shift = toSemantics.precision - fromSemantics.precision; 2566 2567 bool X86SpecialNan = false; 2568 if (&fromSemantics == &semX87DoubleExtended && 2569 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2570 (!(*significandParts() & 0x8000000000000000ULL) || 2571 !(*significandParts() & 0x4000000000000000ULL))) { 2572 // x86 has some unusual NaNs which cannot be represented in any other 2573 // format; note them here. 2574 X86SpecialNan = true; 2575 } 2576 2577 // If this is a truncation of a denormal number, and the target semantics 2578 // has larger exponent range than the source semantics (this can happen 2579 // when truncating from PowerPC double-double to double format), the 2580 // right shift could lose result mantissa bits. Adjust exponent instead 2581 // of performing excessive shift. 2582 // Also do a similar trick in case shifting denormal would produce zero 2583 // significand as this case isn't handled correctly by normalize. 2584 if (shift < 0 && isFiniteNonZero()) { 2585 int omsb = significandMSB() + 1; 2586 int exponentChange = omsb - fromSemantics.precision; 2587 if (exponent + exponentChange < toSemantics.minExponent) 2588 exponentChange = toSemantics.minExponent - exponent; 2589 if (exponentChange < shift) 2590 exponentChange = shift; 2591 if (exponentChange < 0) { 2592 shift -= exponentChange; 2593 exponent += exponentChange; 2594 } else if (omsb <= -shift) { 2595 exponentChange = omsb + shift - 1; // leave at least one bit set 2596 shift -= exponentChange; 2597 exponent += exponentChange; 2598 } 2599 } 2600 2601 // If this is a truncation, perform the shift before we narrow the storage. 2602 if (shift < 0 && (isFiniteNonZero() || 2603 (category == fcNaN && semantics->nonFiniteBehavior != 2604 fltNonfiniteBehavior::NanOnly))) 2605 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2606 2607 // Fix the storage so it can hold to new value. 2608 if (newPartCount > oldPartCount) { 2609 // The new type requires more storage; make it available. 2610 integerPart *newParts; 2611 newParts = new integerPart[newPartCount]; 2612 APInt::tcSet(newParts, 0, newPartCount); 2613 if (isFiniteNonZero() || category==fcNaN) 2614 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2615 freeSignificand(); 2616 significand.parts = newParts; 2617 } else if (newPartCount == 1 && oldPartCount != 1) { 2618 // Switch to built-in storage for a single part. 2619 integerPart newPart = 0; 2620 if (isFiniteNonZero() || category==fcNaN) 2621 newPart = significandParts()[0]; 2622 freeSignificand(); 2623 significand.part = newPart; 2624 } 2625 2626 // Now that we have the right storage, switch the semantics. 2627 semantics = &toSemantics; 2628 2629 // If this is an extension, perform the shift now that the storage is 2630 // available. 2631 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2632 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2633 2634 if (isFiniteNonZero()) { 2635 fs = normalize(rounding_mode, lostFraction); 2636 *losesInfo = (fs != opOK); 2637 } else if (category == fcNaN) { 2638 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2639 *losesInfo = 2640 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2641 makeNaN(false, sign); 2642 return is_signaling ? opInvalidOp : opOK; 2643 } 2644 2645 // If NaN is negative zero, we need to create a new NaN to avoid converting 2646 // NaN to -Inf. 2647 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2648 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2649 makeNaN(false, false); 2650 2651 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2652 2653 // For x87 extended precision, we want to make a NaN, not a special NaN if 2654 // the input wasn't special either. 2655 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2656 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2657 2658 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2659 // This also guarantees that a sNaN does not become Inf on a truncation 2660 // that loses all payload bits. 2661 if (is_signaling) { 2662 makeQuiet(); 2663 fs = opInvalidOp; 2664 } else { 2665 fs = opOK; 2666 } 2667 } else if (category == fcInfinity && 2668 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2669 makeNaN(false, sign); 2670 *losesInfo = true; 2671 fs = opInexact; 2672 } else if (category == fcZero && 2673 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2674 // Negative zero loses info, but positive zero doesn't. 2675 *losesInfo = 2676 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2677 fs = *losesInfo ? opInexact : opOK; 2678 // NaN is negative zero means -0 -> +0, which can lose information 2679 sign = false; 2680 } else { 2681 *losesInfo = false; 2682 fs = opOK; 2683 } 2684 2685 if (category == fcZero && !semantics->hasZero) 2686 makeSmallestNormalized(false); 2687 return fs; 2688 } 2689 2690 /* Convert a floating point number to an integer according to the 2691 rounding mode. If the rounded integer value is out of range this 2692 returns an invalid operation exception and the contents of the 2693 destination parts are unspecified. If the rounded value is in 2694 range but the floating point number is not the exact integer, the C 2695 standard doesn't require an inexact exception to be raised. IEEE 2696 854 does require it so we do that. 2697 2698 Note that for conversions to integer type the C standard requires 2699 round-to-zero to always be used. */ 2700 APFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2701 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2702 roundingMode rounding_mode, bool *isExact) const { 2703 lostFraction lost_fraction; 2704 const integerPart *src; 2705 unsigned int dstPartsCount, truncatedBits; 2706 2707 *isExact = false; 2708 2709 /* Handle the three special cases first. */ 2710 if (category == fcInfinity || category == fcNaN) 2711 return opInvalidOp; 2712 2713 dstPartsCount = partCountForBits(width); 2714 assert(dstPartsCount <= parts.size() && "Integer too big"); 2715 2716 if (category == fcZero) { 2717 APInt::tcSet(parts.data(), 0, dstPartsCount); 2718 // Negative zero can't be represented as an int. 2719 *isExact = !sign; 2720 return opOK; 2721 } 2722 2723 src = significandParts(); 2724 2725 /* Step 1: place our absolute value, with any fraction truncated, in 2726 the destination. */ 2727 if (exponent < 0) { 2728 /* Our absolute value is less than one; truncate everything. */ 2729 APInt::tcSet(parts.data(), 0, dstPartsCount); 2730 /* For exponent -1 the integer bit represents .5, look at that. 2731 For smaller exponents leftmost truncated bit is 0. */ 2732 truncatedBits = semantics->precision -1U - exponent; 2733 } else { 2734 /* We want the most significant (exponent + 1) bits; the rest are 2735 truncated. */ 2736 unsigned int bits = exponent + 1U; 2737 2738 /* Hopelessly large in magnitude? */ 2739 if (bits > width) 2740 return opInvalidOp; 2741 2742 if (bits < semantics->precision) { 2743 /* We truncate (semantics->precision - bits) bits. */ 2744 truncatedBits = semantics->precision - bits; 2745 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2746 } else { 2747 /* We want at least as many bits as are available. */ 2748 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2749 0); 2750 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2751 bits - semantics->precision); 2752 truncatedBits = 0; 2753 } 2754 } 2755 2756 /* Step 2: work out any lost fraction, and increment the absolute 2757 value if we would round away from zero. */ 2758 if (truncatedBits) { 2759 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2760 truncatedBits); 2761 if (lost_fraction != lfExactlyZero && 2762 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2763 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2764 return opInvalidOp; /* Overflow. */ 2765 } 2766 } else { 2767 lost_fraction = lfExactlyZero; 2768 } 2769 2770 /* Step 3: check if we fit in the destination. */ 2771 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2772 2773 if (sign) { 2774 if (!isSigned) { 2775 /* Negative numbers cannot be represented as unsigned. */ 2776 if (omsb != 0) 2777 return opInvalidOp; 2778 } else { 2779 /* It takes omsb bits to represent the unsigned integer value. 2780 We lose a bit for the sign, but care is needed as the 2781 maximally negative integer is a special case. */ 2782 if (omsb == width && 2783 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2784 return opInvalidOp; 2785 2786 /* This case can happen because of rounding. */ 2787 if (omsb > width) 2788 return opInvalidOp; 2789 } 2790 2791 APInt::tcNegate (parts.data(), dstPartsCount); 2792 } else { 2793 if (omsb >= width + !isSigned) 2794 return opInvalidOp; 2795 } 2796 2797 if (lost_fraction == lfExactlyZero) { 2798 *isExact = true; 2799 return opOK; 2800 } else 2801 return opInexact; 2802 } 2803 2804 /* Same as convertToSignExtendedInteger, except we provide 2805 deterministic values in case of an invalid operation exception, 2806 namely zero for NaNs and the minimal or maximal value respectively 2807 for underflow or overflow. 2808 The *isExact output tells whether the result is exact, in the sense 2809 that converting it back to the original floating point type produces 2810 the original value. This is almost equivalent to result==opOK, 2811 except for negative zeroes. 2812 */ 2813 APFloat::opStatus 2814 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2815 unsigned int width, bool isSigned, 2816 roundingMode rounding_mode, bool *isExact) const { 2817 opStatus fs; 2818 2819 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2820 isExact); 2821 2822 if (fs == opInvalidOp) { 2823 unsigned int bits, dstPartsCount; 2824 2825 dstPartsCount = partCountForBits(width); 2826 assert(dstPartsCount <= parts.size() && "Integer too big"); 2827 2828 if (category == fcNaN) 2829 bits = 0; 2830 else if (sign) 2831 bits = isSigned; 2832 else 2833 bits = width - isSigned; 2834 2835 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2836 if (sign && isSigned) 2837 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2838 } 2839 2840 return fs; 2841 } 2842 2843 /* Convert an unsigned integer SRC to a floating point number, 2844 rounding according to ROUNDING_MODE. The sign of the floating 2845 point number is not modified. */ 2846 APFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2847 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2848 unsigned int omsb, precision, dstCount; 2849 integerPart *dst; 2850 lostFraction lost_fraction; 2851 2852 category = fcNormal; 2853 omsb = APInt::tcMSB(src, srcCount) + 1; 2854 dst = significandParts(); 2855 dstCount = partCount(); 2856 precision = semantics->precision; 2857 2858 /* We want the most significant PRECISION bits of SRC. There may not 2859 be that many; extract what we can. */ 2860 if (precision <= omsb) { 2861 exponent = omsb - 1; 2862 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2863 omsb - precision); 2864 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2865 } else { 2866 exponent = precision - 1; 2867 lost_fraction = lfExactlyZero; 2868 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2869 } 2870 2871 return normalize(rounding_mode, lost_fraction); 2872 } 2873 2874 APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2875 roundingMode rounding_mode) { 2876 unsigned int partCount = Val.getNumWords(); 2877 APInt api = Val; 2878 2879 sign = false; 2880 if (isSigned && api.isNegative()) { 2881 sign = true; 2882 api = -api; 2883 } 2884 2885 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2886 } 2887 2888 /* Convert a two's complement integer SRC to a floating point number, 2889 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2890 integer is signed, in which case it must be sign-extended. */ 2891 APFloat::opStatus 2892 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2893 unsigned int srcCount, bool isSigned, 2894 roundingMode rounding_mode) { 2895 opStatus status; 2896 2897 if (isSigned && 2898 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2899 integerPart *copy; 2900 2901 /* If we're signed and negative negate a copy. */ 2902 sign = true; 2903 copy = new integerPart[srcCount]; 2904 APInt::tcAssign(copy, src, srcCount); 2905 APInt::tcNegate(copy, srcCount); 2906 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2907 delete [] copy; 2908 } else { 2909 sign = false; 2910 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2911 } 2912 2913 return status; 2914 } 2915 2916 /* FIXME: should this just take a const APInt reference? */ 2917 APFloat::opStatus 2918 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2919 unsigned int width, bool isSigned, 2920 roundingMode rounding_mode) { 2921 unsigned int partCount = partCountForBits(width); 2922 APInt api = APInt(width, ArrayRef(parts, partCount)); 2923 2924 sign = false; 2925 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2926 sign = true; 2927 api = -api; 2928 } 2929 2930 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2931 } 2932 2933 Expected<APFloat::opStatus> 2934 IEEEFloat::convertFromHexadecimalString(StringRef s, 2935 roundingMode rounding_mode) { 2936 lostFraction lost_fraction = lfExactlyZero; 2937 2938 category = fcNormal; 2939 zeroSignificand(); 2940 exponent = 0; 2941 2942 integerPart *significand = significandParts(); 2943 unsigned partsCount = partCount(); 2944 unsigned bitPos = partsCount * integerPartWidth; 2945 bool computedTrailingFraction = false; 2946 2947 // Skip leading zeroes and any (hexa)decimal point. 2948 StringRef::iterator begin = s.begin(); 2949 StringRef::iterator end = s.end(); 2950 StringRef::iterator dot; 2951 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2952 if (!PtrOrErr) 2953 return PtrOrErr.takeError(); 2954 StringRef::iterator p = *PtrOrErr; 2955 StringRef::iterator firstSignificantDigit = p; 2956 2957 while (p != end) { 2958 integerPart hex_value; 2959 2960 if (*p == '.') { 2961 if (dot != end) 2962 return createError("String contains multiple dots"); 2963 dot = p++; 2964 continue; 2965 } 2966 2967 hex_value = hexDigitValue(*p); 2968 if (hex_value == UINT_MAX) 2969 break; 2970 2971 p++; 2972 2973 // Store the number while we have space. 2974 if (bitPos) { 2975 bitPos -= 4; 2976 hex_value <<= bitPos % integerPartWidth; 2977 significand[bitPos / integerPartWidth] |= hex_value; 2978 } else if (!computedTrailingFraction) { 2979 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2980 if (!FractOrErr) 2981 return FractOrErr.takeError(); 2982 lost_fraction = *FractOrErr; 2983 computedTrailingFraction = true; 2984 } 2985 } 2986 2987 /* Hex floats require an exponent but not a hexadecimal point. */ 2988 if (p == end) 2989 return createError("Hex strings require an exponent"); 2990 if (*p != 'p' && *p != 'P') 2991 return createError("Invalid character in significand"); 2992 if (p == begin) 2993 return createError("Significand has no digits"); 2994 if (dot != end && p - begin == 1) 2995 return createError("Significand has no digits"); 2996 2997 /* Ignore the exponent if we are zero. */ 2998 if (p != firstSignificantDigit) { 2999 int expAdjustment; 3000 3001 /* Implicit hexadecimal point? */ 3002 if (dot == end) 3003 dot = p; 3004 3005 /* Calculate the exponent adjustment implicit in the number of 3006 significant digits. */ 3007 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 3008 if (expAdjustment < 0) 3009 expAdjustment++; 3010 expAdjustment = expAdjustment * 4 - 1; 3011 3012 /* Adjust for writing the significand starting at the most 3013 significant nibble. */ 3014 expAdjustment += semantics->precision; 3015 expAdjustment -= partsCount * integerPartWidth; 3016 3017 /* Adjust for the given exponent. */ 3018 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 3019 if (!ExpOrErr) 3020 return ExpOrErr.takeError(); 3021 exponent = *ExpOrErr; 3022 } 3023 3024 return normalize(rounding_mode, lost_fraction); 3025 } 3026 3027 APFloat::opStatus 3028 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 3029 unsigned sigPartCount, int exp, 3030 roundingMode rounding_mode) { 3031 unsigned int parts, pow5PartCount; 3032 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 3033 integerPart pow5Parts[maxPowerOfFiveParts]; 3034 bool isNearest; 3035 3036 isNearest = (rounding_mode == rmNearestTiesToEven || 3037 rounding_mode == rmNearestTiesToAway); 3038 3039 parts = partCountForBits(semantics->precision + 11); 3040 3041 /* Calculate pow(5, abs(exp)). */ 3042 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 3043 3044 for (;; parts *= 2) { 3045 opStatus sigStatus, powStatus; 3046 unsigned int excessPrecision, truncatedBits; 3047 3048 calcSemantics.precision = parts * integerPartWidth - 1; 3049 excessPrecision = calcSemantics.precision - semantics->precision; 3050 truncatedBits = excessPrecision; 3051 3052 IEEEFloat decSig(calcSemantics, uninitialized); 3053 decSig.makeZero(sign); 3054 IEEEFloat pow5(calcSemantics); 3055 3056 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 3057 rmNearestTiesToEven); 3058 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 3059 rmNearestTiesToEven); 3060 /* Add exp, as 10^n = 5^n * 2^n. */ 3061 decSig.exponent += exp; 3062 3063 lostFraction calcLostFraction; 3064 integerPart HUerr, HUdistance; 3065 unsigned int powHUerr; 3066 3067 if (exp >= 0) { 3068 /* multiplySignificand leaves the precision-th bit set to 1. */ 3069 calcLostFraction = decSig.multiplySignificand(pow5); 3070 powHUerr = powStatus != opOK; 3071 } else { 3072 calcLostFraction = decSig.divideSignificand(pow5); 3073 /* Denormal numbers have less precision. */ 3074 if (decSig.exponent < semantics->minExponent) { 3075 excessPrecision += (semantics->minExponent - decSig.exponent); 3076 truncatedBits = excessPrecision; 3077 if (excessPrecision > calcSemantics.precision) 3078 excessPrecision = calcSemantics.precision; 3079 } 3080 /* Extra half-ulp lost in reciprocal of exponent. */ 3081 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 3082 } 3083 3084 /* Both multiplySignificand and divideSignificand return the 3085 result with the integer bit set. */ 3086 assert(APInt::tcExtractBit 3087 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3088 3089 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3090 powHUerr); 3091 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3092 excessPrecision, isNearest); 3093 3094 /* Are we guaranteed to round correctly if we truncate? */ 3095 if (HUdistance >= HUerr) { 3096 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3097 calcSemantics.precision - excessPrecision, 3098 excessPrecision); 3099 /* Take the exponent of decSig. If we tcExtract-ed less bits 3100 above we must adjust our exponent to compensate for the 3101 implicit right shift. */ 3102 exponent = (decSig.exponent + semantics->precision 3103 - (calcSemantics.precision - excessPrecision)); 3104 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3105 decSig.partCount(), 3106 truncatedBits); 3107 return normalize(rounding_mode, calcLostFraction); 3108 } 3109 } 3110 } 3111 3112 Expected<APFloat::opStatus> 3113 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3114 decimalInfo D; 3115 opStatus fs; 3116 3117 /* Scan the text. */ 3118 StringRef::iterator p = str.begin(); 3119 if (Error Err = interpretDecimal(p, str.end(), &D)) 3120 return std::move(Err); 3121 3122 /* Handle the quick cases. First the case of no significant digits, 3123 i.e. zero, and then exponents that are obviously too large or too 3124 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3125 definitely overflows if 3126 3127 (exp - 1) * L >= maxExponent 3128 3129 and definitely underflows to zero where 3130 3131 (exp + 1) * L <= minExponent - precision 3132 3133 With integer arithmetic the tightest bounds for L are 3134 3135 93/28 < L < 196/59 [ numerator <= 256 ] 3136 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3137 */ 3138 3139 // Test if we have a zero number allowing for strings with no null terminators 3140 // and zero decimals with non-zero exponents. 3141 // 3142 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3143 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3144 // be at most one dot. On the other hand, if we have a zero with a non-zero 3145 // exponent, then we know that D.firstSigDigit will be non-numeric. 3146 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3147 category = fcZero; 3148 fs = opOK; 3149 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3150 sign = false; 3151 if (!semantics->hasZero) 3152 makeSmallestNormalized(false); 3153 3154 /* Check whether the normalized exponent is high enough to overflow 3155 max during the log-rebasing in the max-exponent check below. */ 3156 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3157 fs = handleOverflow(rounding_mode); 3158 3159 /* If it wasn't, then it also wasn't high enough to overflow max 3160 during the log-rebasing in the min-exponent check. Check that it 3161 won't overflow min in either check, then perform the min-exponent 3162 check. */ 3163 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3164 (D.normalizedExponent + 1) * 28738 <= 3165 8651 * (semantics->minExponent - (int) semantics->precision)) { 3166 /* Underflow to zero and round. */ 3167 category = fcNormal; 3168 zeroSignificand(); 3169 fs = normalize(rounding_mode, lfLessThanHalf); 3170 3171 /* We can finally safely perform the max-exponent check. */ 3172 } else if ((D.normalizedExponent - 1) * 42039 3173 >= 12655 * semantics->maxExponent) { 3174 /* Overflow and round. */ 3175 fs = handleOverflow(rounding_mode); 3176 } else { 3177 integerPart *decSignificand; 3178 unsigned int partCount; 3179 3180 /* A tight upper bound on number of bits required to hold an 3181 N-digit decimal integer is N * 196 / 59. Allocate enough space 3182 to hold the full significand, and an extra part required by 3183 tcMultiplyPart. */ 3184 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3185 partCount = partCountForBits(1 + 196 * partCount / 59); 3186 decSignificand = new integerPart[partCount + 1]; 3187 partCount = 0; 3188 3189 /* Convert to binary efficiently - we do almost all multiplication 3190 in an integerPart. When this would overflow do we do a single 3191 bignum multiplication, and then revert again to multiplication 3192 in an integerPart. */ 3193 do { 3194 integerPart decValue, val, multiplier; 3195 3196 val = 0; 3197 multiplier = 1; 3198 3199 do { 3200 if (*p == '.') { 3201 p++; 3202 if (p == str.end()) { 3203 break; 3204 } 3205 } 3206 decValue = decDigitValue(*p++); 3207 if (decValue >= 10U) { 3208 delete[] decSignificand; 3209 return createError("Invalid character in significand"); 3210 } 3211 multiplier *= 10; 3212 val = val * 10 + decValue; 3213 /* The maximum number that can be multiplied by ten with any 3214 digit added without overflowing an integerPart. */ 3215 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3216 3217 /* Multiply out the current part. */ 3218 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3219 partCount, partCount + 1, false); 3220 3221 /* If we used another part (likely but not guaranteed), increase 3222 the count. */ 3223 if (decSignificand[partCount]) 3224 partCount++; 3225 } while (p <= D.lastSigDigit); 3226 3227 category = fcNormal; 3228 fs = roundSignificandWithExponent(decSignificand, partCount, 3229 D.exponent, rounding_mode); 3230 3231 delete [] decSignificand; 3232 } 3233 3234 return fs; 3235 } 3236 3237 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3238 const size_t MIN_NAME_SIZE = 3; 3239 3240 if (str.size() < MIN_NAME_SIZE) 3241 return false; 3242 3243 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3244 makeInf(false); 3245 return true; 3246 } 3247 3248 bool IsNegative = str.front() == '-'; 3249 if (IsNegative) { 3250 str = str.drop_front(); 3251 if (str.size() < MIN_NAME_SIZE) 3252 return false; 3253 3254 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3255 makeInf(true); 3256 return true; 3257 } 3258 } 3259 3260 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3261 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3262 if (IsSignaling) { 3263 str = str.drop_front(); 3264 if (str.size() < MIN_NAME_SIZE) 3265 return false; 3266 } 3267 3268 if (str.starts_with("nan") || str.starts_with("NaN")) { 3269 str = str.drop_front(3); 3270 3271 // A NaN without payload. 3272 if (str.empty()) { 3273 makeNaN(IsSignaling, IsNegative); 3274 return true; 3275 } 3276 3277 // Allow the payload to be inside parentheses. 3278 if (str.front() == '(') { 3279 // Parentheses should be balanced (and not empty). 3280 if (str.size() <= 2 || str.back() != ')') 3281 return false; 3282 3283 str = str.slice(1, str.size() - 1); 3284 } 3285 3286 // Determine the payload number's radix. 3287 unsigned Radix = 10; 3288 if (str[0] == '0') { 3289 if (str.size() > 1 && tolower(str[1]) == 'x') { 3290 str = str.drop_front(2); 3291 Radix = 16; 3292 } else 3293 Radix = 8; 3294 } 3295 3296 // Parse the payload and make the NaN. 3297 APInt Payload; 3298 if (!str.getAsInteger(Radix, Payload)) { 3299 makeNaN(IsSignaling, IsNegative, &Payload); 3300 return true; 3301 } 3302 } 3303 3304 return false; 3305 } 3306 3307 Expected<APFloat::opStatus> 3308 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3309 if (str.empty()) 3310 return createError("Invalid string length"); 3311 3312 // Handle special cases. 3313 if (convertFromStringSpecials(str)) 3314 return opOK; 3315 3316 /* Handle a leading minus sign. */ 3317 StringRef::iterator p = str.begin(); 3318 size_t slen = str.size(); 3319 sign = *p == '-' ? 1 : 0; 3320 if (sign && !semantics->hasSignedRepr) 3321 llvm_unreachable( 3322 "This floating point format does not support signed values"); 3323 3324 if (*p == '-' || *p == '+') { 3325 p++; 3326 slen--; 3327 if (!slen) 3328 return createError("String has no digits"); 3329 } 3330 3331 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3332 if (slen == 2) 3333 return createError("Invalid string"); 3334 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3335 rounding_mode); 3336 } 3337 3338 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3339 } 3340 3341 /* Write out a hexadecimal representation of the floating point value 3342 to DST, which must be of sufficient size, in the C99 form 3343 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3344 excluding the terminating NUL. 3345 3346 If UPPERCASE, the output is in upper case, otherwise in lower case. 3347 3348 HEXDIGITS digits appear altogether, rounding the value if 3349 necessary. If HEXDIGITS is 0, the minimal precision to display the 3350 number precisely is used instead. If nothing would appear after 3351 the decimal point it is suppressed. 3352 3353 The decimal exponent is always printed and has at least one digit. 3354 Zero values display an exponent of zero. Infinities and NaNs 3355 appear as "infinity" or "nan" respectively. 3356 3357 The above rules are as specified by C99. There is ambiguity about 3358 what the leading hexadecimal digit should be. This implementation 3359 uses whatever is necessary so that the exponent is displayed as 3360 stored. This implies the exponent will fall within the IEEE format 3361 range, and the leading hexadecimal digit will be 0 (for denormals), 3362 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3363 any other digits zero). 3364 */ 3365 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3366 bool upperCase, 3367 roundingMode rounding_mode) const { 3368 char *p; 3369 3370 p = dst; 3371 if (sign) 3372 *dst++ = '-'; 3373 3374 switch (category) { 3375 case fcInfinity: 3376 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3377 dst += sizeof infinityL - 1; 3378 break; 3379 3380 case fcNaN: 3381 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3382 dst += sizeof NaNU - 1; 3383 break; 3384 3385 case fcZero: 3386 *dst++ = '0'; 3387 *dst++ = upperCase ? 'X': 'x'; 3388 *dst++ = '0'; 3389 if (hexDigits > 1) { 3390 *dst++ = '.'; 3391 memset (dst, '0', hexDigits - 1); 3392 dst += hexDigits - 1; 3393 } 3394 *dst++ = upperCase ? 'P': 'p'; 3395 *dst++ = '0'; 3396 break; 3397 3398 case fcNormal: 3399 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3400 break; 3401 } 3402 3403 *dst = 0; 3404 3405 return static_cast<unsigned int>(dst - p); 3406 } 3407 3408 /* Does the hard work of outputting the correctly rounded hexadecimal 3409 form of a normal floating point number with the specified number of 3410 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3411 digits necessary to print the value precisely is output. */ 3412 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3413 bool upperCase, 3414 roundingMode rounding_mode) const { 3415 unsigned int count, valueBits, shift, partsCount, outputDigits; 3416 const char *hexDigitChars; 3417 const integerPart *significand; 3418 char *p; 3419 bool roundUp; 3420 3421 *dst++ = '0'; 3422 *dst++ = upperCase ? 'X': 'x'; 3423 3424 roundUp = false; 3425 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3426 3427 significand = significandParts(); 3428 partsCount = partCount(); 3429 3430 /* +3 because the first digit only uses the single integer bit, so 3431 we have 3 virtual zero most-significant-bits. */ 3432 valueBits = semantics->precision + 3; 3433 shift = integerPartWidth - valueBits % integerPartWidth; 3434 3435 /* The natural number of digits required ignoring trailing 3436 insignificant zeroes. */ 3437 outputDigits = (valueBits - significandLSB () + 3) / 4; 3438 3439 /* hexDigits of zero means use the required number for the 3440 precision. Otherwise, see if we are truncating. If we are, 3441 find out if we need to round away from zero. */ 3442 if (hexDigits) { 3443 if (hexDigits < outputDigits) { 3444 /* We are dropping non-zero bits, so need to check how to round. 3445 "bits" is the number of dropped bits. */ 3446 unsigned int bits; 3447 lostFraction fraction; 3448 3449 bits = valueBits - hexDigits * 4; 3450 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3451 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3452 } 3453 outputDigits = hexDigits; 3454 } 3455 3456 /* Write the digits consecutively, and start writing in the location 3457 of the hexadecimal point. We move the most significant digit 3458 left and add the hexadecimal point later. */ 3459 p = ++dst; 3460 3461 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3462 3463 while (outputDigits && count) { 3464 integerPart part; 3465 3466 /* Put the most significant integerPartWidth bits in "part". */ 3467 if (--count == partsCount) 3468 part = 0; /* An imaginary higher zero part. */ 3469 else 3470 part = significand[count] << shift; 3471 3472 if (count && shift) 3473 part |= significand[count - 1] >> (integerPartWidth - shift); 3474 3475 /* Convert as much of "part" to hexdigits as we can. */ 3476 unsigned int curDigits = integerPartWidth / 4; 3477 3478 if (curDigits > outputDigits) 3479 curDigits = outputDigits; 3480 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3481 outputDigits -= curDigits; 3482 } 3483 3484 if (roundUp) { 3485 char *q = dst; 3486 3487 /* Note that hexDigitChars has a trailing '0'. */ 3488 do { 3489 q--; 3490 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3491 } while (*q == '0'); 3492 assert(q >= p); 3493 } else { 3494 /* Add trailing zeroes. */ 3495 memset (dst, '0', outputDigits); 3496 dst += outputDigits; 3497 } 3498 3499 /* Move the most significant digit to before the point, and if there 3500 is something after the decimal point add it. This must come 3501 after rounding above. */ 3502 p[-1] = p[0]; 3503 if (dst -1 == p) 3504 dst--; 3505 else 3506 p[0] = '.'; 3507 3508 /* Finally output the exponent. */ 3509 *dst++ = upperCase ? 'P': 'p'; 3510 3511 return writeSignedDecimal (dst, exponent); 3512 } 3513 3514 hash_code hash_value(const IEEEFloat &Arg) { 3515 if (!Arg.isFiniteNonZero()) 3516 return hash_combine((uint8_t)Arg.category, 3517 // NaN has no sign, fix it at zero. 3518 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3519 Arg.semantics->precision); 3520 3521 // Normal floats need their exponent and significand hashed. 3522 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3523 Arg.semantics->precision, Arg.exponent, 3524 hash_combine_range( 3525 Arg.significandParts(), 3526 Arg.significandParts() + Arg.partCount())); 3527 } 3528 3529 // Conversion from APFloat to/from host float/double. It may eventually be 3530 // possible to eliminate these and have everybody deal with APFloats, but that 3531 // will take a while. This approach will not easily extend to long double. 3532 // Current implementation requires integerPartWidth==64, which is correct at 3533 // the moment but could be made more general. 3534 3535 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3536 // the actual IEEE respresentations. We compensate for that here. 3537 3538 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3539 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3540 assert(partCount()==2); 3541 3542 uint64_t myexponent, mysignificand; 3543 3544 if (isFiniteNonZero()) { 3545 myexponent = exponent+16383; //bias 3546 mysignificand = significandParts()[0]; 3547 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3548 myexponent = 0; // denormal 3549 } else if (category==fcZero) { 3550 myexponent = 0; 3551 mysignificand = 0; 3552 } else if (category==fcInfinity) { 3553 myexponent = 0x7fff; 3554 mysignificand = 0x8000000000000000ULL; 3555 } else { 3556 assert(category == fcNaN && "Unknown category"); 3557 myexponent = 0x7fff; 3558 mysignificand = significandParts()[0]; 3559 } 3560 3561 uint64_t words[2]; 3562 words[0] = mysignificand; 3563 words[1] = ((uint64_t)(sign & 1) << 15) | 3564 (myexponent & 0x7fffLL); 3565 return APInt(80, words); 3566 } 3567 3568 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3569 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3570 assert(partCount()==2); 3571 3572 uint64_t words[2]; 3573 opStatus fs; 3574 bool losesInfo; 3575 3576 // Convert number to double. To avoid spurious underflows, we re- 3577 // normalize against the "double" minExponent first, and only *then* 3578 // truncate the mantissa. The result of that second conversion 3579 // may be inexact, but should never underflow. 3580 // Declare fltSemantics before APFloat that uses it (and 3581 // saves pointer to it) to ensure correct destruction order. 3582 fltSemantics extendedSemantics = *semantics; 3583 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3584 IEEEFloat extended(*this); 3585 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3586 assert(fs == opOK && !losesInfo); 3587 (void)fs; 3588 3589 IEEEFloat u(extended); 3590 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3591 assert(fs == opOK || fs == opInexact); 3592 (void)fs; 3593 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3594 3595 // If conversion was exact or resulted in a special case, we're done; 3596 // just set the second double to zero. Otherwise, re-convert back to 3597 // the extended format and compute the difference. This now should 3598 // convert exactly to double. 3599 if (u.isFiniteNonZero() && losesInfo) { 3600 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3601 assert(fs == opOK && !losesInfo); 3602 (void)fs; 3603 3604 IEEEFloat v(extended); 3605 v.subtract(u, rmNearestTiesToEven); 3606 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3607 assert(fs == opOK && !losesInfo); 3608 (void)fs; 3609 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3610 } else { 3611 words[1] = 0; 3612 } 3613 3614 return APInt(128, words); 3615 } 3616 3617 template <const fltSemantics &S> 3618 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3619 assert(semantics == &S); 3620 const int bias = 3621 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1); 3622 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3623 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3624 constexpr integerPart integer_bit = 3625 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3626 constexpr uint64_t significand_mask = integer_bit - 1; 3627 constexpr unsigned int exponent_bits = 3628 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits) 3629 : S.sizeInBits; 3630 static_assert(exponent_bits < 64); 3631 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3632 3633 uint64_t myexponent; 3634 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3635 mysignificand; 3636 3637 if (isFiniteNonZero()) { 3638 myexponent = exponent + bias; 3639 std::copy_n(significandParts(), mysignificand.size(), 3640 mysignificand.begin()); 3641 if (myexponent == 1 && 3642 !(significandParts()[integer_bit_part] & integer_bit)) 3643 myexponent = 0; // denormal 3644 } else if (category == fcZero) { 3645 if (!S.hasZero) 3646 llvm_unreachable("semantics does not support zero!"); 3647 myexponent = ::exponentZero(S) + bias; 3648 mysignificand.fill(0); 3649 } else if (category == fcInfinity) { 3650 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3651 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3652 llvm_unreachable("semantics don't support inf!"); 3653 myexponent = ::exponentInf(S) + bias; 3654 mysignificand.fill(0); 3655 } else { 3656 assert(category == fcNaN && "Unknown category!"); 3657 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3658 llvm_unreachable("semantics don't support NaN!"); 3659 myexponent = ::exponentNaN(S) + bias; 3660 std::copy_n(significandParts(), mysignificand.size(), 3661 mysignificand.begin()); 3662 } 3663 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3664 auto words_iter = 3665 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3666 if constexpr (significand_mask != 0) { 3667 // Clear the integer bit. 3668 words[mysignificand.size() - 1] &= significand_mask; 3669 } 3670 std::fill(words_iter, words.end(), uint64_t{0}); 3671 constexpr size_t last_word = words.size() - 1; 3672 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3673 << ((S.sizeInBits - 1) % 64); 3674 words[last_word] |= shifted_sign; 3675 uint64_t shifted_exponent = (myexponent & exponent_mask) 3676 << (trailing_significand_bits % 64); 3677 words[last_word] |= shifted_exponent; 3678 if constexpr (last_word == 0) { 3679 return APInt(S.sizeInBits, words[0]); 3680 } 3681 return APInt(S.sizeInBits, words); 3682 } 3683 3684 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3685 assert(partCount() == 2); 3686 return convertIEEEFloatToAPInt<semIEEEquad>(); 3687 } 3688 3689 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3690 assert(partCount()==1); 3691 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3692 } 3693 3694 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3695 assert(partCount()==1); 3696 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3697 } 3698 3699 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3700 assert(partCount() == 1); 3701 return convertIEEEFloatToAPInt<semBFloat>(); 3702 } 3703 3704 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3705 assert(partCount()==1); 3706 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3707 } 3708 3709 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3710 assert(partCount() == 1); 3711 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3712 } 3713 3714 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3715 assert(partCount() == 1); 3716 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3717 } 3718 3719 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3720 assert(partCount() == 1); 3721 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3722 } 3723 3724 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3725 assert(partCount() == 1); 3726 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3727 } 3728 3729 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3730 assert(partCount() == 1); 3731 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3732 } 3733 3734 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3735 assert(partCount() == 1); 3736 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3737 } 3738 3739 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { 3740 assert(partCount() == 1); 3741 return convertIEEEFloatToAPInt<semFloat8E3M4>(); 3742 } 3743 3744 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3745 assert(partCount() == 1); 3746 return convertIEEEFloatToAPInt<semFloatTF32>(); 3747 } 3748 3749 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const { 3750 assert(partCount() == 1); 3751 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>(); 3752 } 3753 3754 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3755 assert(partCount() == 1); 3756 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3757 } 3758 3759 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3760 assert(partCount() == 1); 3761 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3762 } 3763 3764 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3765 assert(partCount() == 1); 3766 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3767 } 3768 3769 // This function creates an APInt that is just a bit map of the floating 3770 // point constant as it would appear in memory. It is not a conversion, 3771 // and treating the result as a normal integer is unlikely to be useful. 3772 3773 APInt IEEEFloat::bitcastToAPInt() const { 3774 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3775 return convertHalfAPFloatToAPInt(); 3776 3777 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3778 return convertBFloatAPFloatToAPInt(); 3779 3780 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3781 return convertFloatAPFloatToAPInt(); 3782 3783 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3784 return convertDoubleAPFloatToAPInt(); 3785 3786 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3787 return convertQuadrupleAPFloatToAPInt(); 3788 3789 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3790 return convertPPCDoubleDoubleAPFloatToAPInt(); 3791 3792 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3793 return convertFloat8E5M2APFloatToAPInt(); 3794 3795 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3796 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3797 3798 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3799 return convertFloat8E4M3APFloatToAPInt(); 3800 3801 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3802 return convertFloat8E4M3FNAPFloatToAPInt(); 3803 3804 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3805 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3806 3807 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3808 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3809 3810 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) 3811 return convertFloat8E3M4APFloatToAPInt(); 3812 3813 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3814 return convertFloatTF32APFloatToAPInt(); 3815 3816 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU) 3817 return convertFloat8E8M0FNUAPFloatToAPInt(); 3818 3819 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3820 return convertFloat6E3M2FNAPFloatToAPInt(); 3821 3822 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3823 return convertFloat6E2M3FNAPFloatToAPInt(); 3824 3825 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3826 return convertFloat4E2M1FNAPFloatToAPInt(); 3827 3828 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3829 "unknown format!"); 3830 return convertF80LongDoubleAPFloatToAPInt(); 3831 } 3832 3833 float IEEEFloat::convertToFloat() const { 3834 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3835 "Float semantics are not IEEEsingle"); 3836 APInt api = bitcastToAPInt(); 3837 return api.bitsToFloat(); 3838 } 3839 3840 double IEEEFloat::convertToDouble() const { 3841 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3842 "Float semantics are not IEEEdouble"); 3843 APInt api = bitcastToAPInt(); 3844 return api.bitsToDouble(); 3845 } 3846 3847 #ifdef HAS_IEE754_FLOAT128 3848 float128 IEEEFloat::convertToQuad() const { 3849 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3850 "Float semantics are not IEEEquads"); 3851 APInt api = bitcastToAPInt(); 3852 return api.bitsToQuad(); 3853 } 3854 #endif 3855 3856 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3857 /// does not support these bit patterns: 3858 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3859 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3860 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3861 /// exponent = 0, integer bit 1 ("pseudodenormal") 3862 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3863 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3864 uint64_t i1 = api.getRawData()[0]; 3865 uint64_t i2 = api.getRawData()[1]; 3866 uint64_t myexponent = (i2 & 0x7fff); 3867 uint64_t mysignificand = i1; 3868 uint8_t myintegerbit = mysignificand >> 63; 3869 3870 initialize(&semX87DoubleExtended); 3871 assert(partCount()==2); 3872 3873 sign = static_cast<unsigned int>(i2>>15); 3874 if (myexponent == 0 && mysignificand == 0) { 3875 makeZero(sign); 3876 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3877 makeInf(sign); 3878 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3879 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3880 category = fcNaN; 3881 exponent = exponentNaN(); 3882 significandParts()[0] = mysignificand; 3883 significandParts()[1] = 0; 3884 } else { 3885 category = fcNormal; 3886 exponent = myexponent - 16383; 3887 significandParts()[0] = mysignificand; 3888 significandParts()[1] = 0; 3889 if (myexponent==0) // denormal 3890 exponent = -16382; 3891 } 3892 } 3893 3894 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3895 uint64_t i1 = api.getRawData()[0]; 3896 uint64_t i2 = api.getRawData()[1]; 3897 opStatus fs; 3898 bool losesInfo; 3899 3900 // Get the first double and convert to our format. 3901 initFromDoubleAPInt(APInt(64, i1)); 3902 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3903 assert(fs == opOK && !losesInfo); 3904 (void)fs; 3905 3906 // Unless we have a special case, add in second double. 3907 if (isFiniteNonZero()) { 3908 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3909 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3910 assert(fs == opOK && !losesInfo); 3911 (void)fs; 3912 3913 add(v, rmNearestTiesToEven); 3914 } 3915 } 3916 3917 // The E8M0 format has the following characteristics: 3918 // It is an 8-bit unsigned format with only exponents (no actual significand). 3919 // No encodings for {zero, infinities or denorms}. 3920 // NaN is represented by all 1's. 3921 // Bias is 127. 3922 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) { 3923 const uint64_t exponent_mask = 0xff; 3924 uint64_t val = api.getRawData()[0]; 3925 uint64_t myexponent = (val & exponent_mask); 3926 3927 initialize(&semFloat8E8M0FNU); 3928 assert(partCount() == 1); 3929 3930 // This format has unsigned representation only 3931 sign = 0; 3932 3933 // Set the significand 3934 // This format does not have any significand but the 'Pth' precision bit is 3935 // always set to 1 for consistency in APFloat's internal representation. 3936 uint64_t mysignificand = 1; 3937 significandParts()[0] = mysignificand; 3938 3939 // This format can either have a NaN or fcNormal 3940 // All 1's i.e. 255 is a NaN 3941 if (val == exponent_mask) { 3942 category = fcNaN; 3943 exponent = exponentNaN(); 3944 return; 3945 } 3946 // Handle fcNormal... 3947 category = fcNormal; 3948 exponent = myexponent - 127; // 127 is bias 3949 return; 3950 } 3951 template <const fltSemantics &S> 3952 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3953 assert(api.getBitWidth() == S.sizeInBits); 3954 constexpr integerPart integer_bit = integerPart{1} 3955 << ((S.precision - 1) % integerPartWidth); 3956 constexpr uint64_t significand_mask = integer_bit - 1; 3957 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3958 constexpr unsigned int stored_significand_parts = 3959 partCountForBits(trailing_significand_bits); 3960 constexpr unsigned int exponent_bits = 3961 S.sizeInBits - 1 - trailing_significand_bits; 3962 static_assert(exponent_bits < 64); 3963 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3964 constexpr int bias = -(S.minExponent - 1); 3965 3966 // Copy the bits of the significand. We need to clear out the exponent and 3967 // sign bit in the last word. 3968 std::array<integerPart, stored_significand_parts> mysignificand; 3969 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3970 if constexpr (significand_mask != 0) { 3971 mysignificand[mysignificand.size() - 1] &= significand_mask; 3972 } 3973 3974 // We assume the last word holds the sign bit, the exponent, and potentially 3975 // some of the trailing significand field. 3976 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3977 uint64_t myexponent = 3978 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3979 3980 initialize(&S); 3981 assert(partCount() == mysignificand.size()); 3982 3983 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3984 3985 bool all_zero_significand = 3986 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3987 3988 bool is_zero = myexponent == 0 && all_zero_significand; 3989 3990 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3991 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3992 makeInf(sign); 3993 return; 3994 } 3995 } 3996 3997 bool is_nan = false; 3998 3999 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 4000 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 4001 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 4002 bool all_ones_significand = 4003 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 4004 [](integerPart bits) { return bits == ~integerPart{0}; }) && 4005 (!significand_mask || 4006 mysignificand[mysignificand.size() - 1] == significand_mask); 4007 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 4008 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 4009 is_nan = is_zero && sign; 4010 } 4011 4012 if (is_nan) { 4013 category = fcNaN; 4014 exponent = ::exponentNaN(S); 4015 std::copy_n(mysignificand.begin(), mysignificand.size(), 4016 significandParts()); 4017 return; 4018 } 4019 4020 if (is_zero) { 4021 makeZero(sign); 4022 return; 4023 } 4024 4025 category = fcNormal; 4026 exponent = myexponent - bias; 4027 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 4028 if (myexponent == 0) // denormal 4029 exponent = S.minExponent; 4030 else 4031 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 4032 } 4033 4034 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 4035 initFromIEEEAPInt<semIEEEquad>(api); 4036 } 4037 4038 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 4039 initFromIEEEAPInt<semIEEEdouble>(api); 4040 } 4041 4042 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 4043 initFromIEEEAPInt<semIEEEsingle>(api); 4044 } 4045 4046 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 4047 initFromIEEEAPInt<semBFloat>(api); 4048 } 4049 4050 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 4051 initFromIEEEAPInt<semIEEEhalf>(api); 4052 } 4053 4054 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 4055 initFromIEEEAPInt<semFloat8E5M2>(api); 4056 } 4057 4058 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 4059 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 4060 } 4061 4062 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 4063 initFromIEEEAPInt<semFloat8E4M3>(api); 4064 } 4065 4066 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 4067 initFromIEEEAPInt<semFloat8E4M3FN>(api); 4068 } 4069 4070 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 4071 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 4072 } 4073 4074 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 4075 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 4076 } 4077 4078 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { 4079 initFromIEEEAPInt<semFloat8E3M4>(api); 4080 } 4081 4082 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 4083 initFromIEEEAPInt<semFloatTF32>(api); 4084 } 4085 4086 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 4087 initFromIEEEAPInt<semFloat6E3M2FN>(api); 4088 } 4089 4090 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 4091 initFromIEEEAPInt<semFloat6E2M3FN>(api); 4092 } 4093 4094 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 4095 initFromIEEEAPInt<semFloat4E2M1FN>(api); 4096 } 4097 4098 /// Treat api as containing the bits of a floating point number. 4099 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 4100 assert(api.getBitWidth() == Sem->sizeInBits); 4101 if (Sem == &semIEEEhalf) 4102 return initFromHalfAPInt(api); 4103 if (Sem == &semBFloat) 4104 return initFromBFloatAPInt(api); 4105 if (Sem == &semIEEEsingle) 4106 return initFromFloatAPInt(api); 4107 if (Sem == &semIEEEdouble) 4108 return initFromDoubleAPInt(api); 4109 if (Sem == &semX87DoubleExtended) 4110 return initFromF80LongDoubleAPInt(api); 4111 if (Sem == &semIEEEquad) 4112 return initFromQuadrupleAPInt(api); 4113 if (Sem == &semPPCDoubleDoubleLegacy) 4114 return initFromPPCDoubleDoubleAPInt(api); 4115 if (Sem == &semFloat8E5M2) 4116 return initFromFloat8E5M2APInt(api); 4117 if (Sem == &semFloat8E5M2FNUZ) 4118 return initFromFloat8E5M2FNUZAPInt(api); 4119 if (Sem == &semFloat8E4M3) 4120 return initFromFloat8E4M3APInt(api); 4121 if (Sem == &semFloat8E4M3FN) 4122 return initFromFloat8E4M3FNAPInt(api); 4123 if (Sem == &semFloat8E4M3FNUZ) 4124 return initFromFloat8E4M3FNUZAPInt(api); 4125 if (Sem == &semFloat8E4M3B11FNUZ) 4126 return initFromFloat8E4M3B11FNUZAPInt(api); 4127 if (Sem == &semFloat8E3M4) 4128 return initFromFloat8E3M4APInt(api); 4129 if (Sem == &semFloatTF32) 4130 return initFromFloatTF32APInt(api); 4131 if (Sem == &semFloat8E8M0FNU) 4132 return initFromFloat8E8M0FNUAPInt(api); 4133 if (Sem == &semFloat6E3M2FN) 4134 return initFromFloat6E3M2FNAPInt(api); 4135 if (Sem == &semFloat6E2M3FN) 4136 return initFromFloat6E2M3FNAPInt(api); 4137 if (Sem == &semFloat4E2M1FN) 4138 return initFromFloat4E2M1FNAPInt(api); 4139 4140 llvm_unreachable(nullptr); 4141 } 4142 4143 /// Make this number the largest magnitude normal number in the given 4144 /// semantics. 4145 void IEEEFloat::makeLargest(bool Negative) { 4146 if (Negative && !semantics->hasSignedRepr) 4147 llvm_unreachable( 4148 "This floating point format does not support signed values"); 4149 // We want (in interchange format): 4150 // sign = {Negative} 4151 // exponent = 1..10 4152 // significand = 1..1 4153 category = fcNormal; 4154 sign = Negative; 4155 exponent = semantics->maxExponent; 4156 4157 // Use memset to set all but the highest integerPart to all ones. 4158 integerPart *significand = significandParts(); 4159 unsigned PartCount = partCount(); 4160 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4161 4162 // Set the high integerPart especially setting all unused top bits for 4163 // internal consistency. 4164 const unsigned NumUnusedHighBits = 4165 PartCount*integerPartWidth - semantics->precision; 4166 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4167 ? (~integerPart(0) >> NumUnusedHighBits) 4168 : 0; 4169 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4170 semantics->nanEncoding == fltNanEncoding::AllOnes && 4171 (semantics->precision > 1)) 4172 significand[0] &= ~integerPart(1); 4173 } 4174 4175 /// Make this number the smallest magnitude denormal number in the given 4176 /// semantics. 4177 void IEEEFloat::makeSmallest(bool Negative) { 4178 if (Negative && !semantics->hasSignedRepr) 4179 llvm_unreachable( 4180 "This floating point format does not support signed values"); 4181 // We want (in interchange format): 4182 // sign = {Negative} 4183 // exponent = 0..0 4184 // significand = 0..01 4185 category = fcNormal; 4186 sign = Negative; 4187 exponent = semantics->minExponent; 4188 APInt::tcSet(significandParts(), 1, partCount()); 4189 } 4190 4191 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4192 if (Negative && !semantics->hasSignedRepr) 4193 llvm_unreachable( 4194 "This floating point format does not support signed values"); 4195 // We want (in interchange format): 4196 // sign = {Negative} 4197 // exponent = 0..0 4198 // significand = 10..0 4199 4200 category = fcNormal; 4201 zeroSignificand(); 4202 sign = Negative; 4203 exponent = semantics->minExponent; 4204 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4205 } 4206 4207 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4208 initFromAPInt(&Sem, API); 4209 } 4210 4211 IEEEFloat::IEEEFloat(float f) { 4212 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4213 } 4214 4215 IEEEFloat::IEEEFloat(double d) { 4216 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4217 } 4218 4219 namespace { 4220 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4221 Buffer.append(Str.begin(), Str.end()); 4222 } 4223 4224 /// Removes data from the given significand until it is no more 4225 /// precise than is required for the desired precision. 4226 void AdjustToPrecision(APInt &significand, 4227 int &exp, unsigned FormatPrecision) { 4228 unsigned bits = significand.getActiveBits(); 4229 4230 // 196/59 is a very slight overestimate of lg_2(10). 4231 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4232 4233 if (bits <= bitsRequired) return; 4234 4235 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4236 if (!tensRemovable) return; 4237 4238 exp += tensRemovable; 4239 4240 APInt divisor(significand.getBitWidth(), 1); 4241 APInt powten(significand.getBitWidth(), 10); 4242 while (true) { 4243 if (tensRemovable & 1) 4244 divisor *= powten; 4245 tensRemovable >>= 1; 4246 if (!tensRemovable) break; 4247 powten *= powten; 4248 } 4249 4250 significand = significand.udiv(divisor); 4251 4252 // Truncate the significand down to its active bit count. 4253 significand = significand.trunc(significand.getActiveBits()); 4254 } 4255 4256 4257 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4258 int &exp, unsigned FormatPrecision) { 4259 unsigned N = buffer.size(); 4260 if (N <= FormatPrecision) return; 4261 4262 // The most significant figures are the last ones in the buffer. 4263 unsigned FirstSignificant = N - FormatPrecision; 4264 4265 // Round. 4266 // FIXME: this probably shouldn't use 'round half up'. 4267 4268 // Rounding down is just a truncation, except we also want to drop 4269 // trailing zeros from the new result. 4270 if (buffer[FirstSignificant - 1] < '5') { 4271 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4272 FirstSignificant++; 4273 4274 exp += FirstSignificant; 4275 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4276 return; 4277 } 4278 4279 // Rounding up requires a decimal add-with-carry. If we continue 4280 // the carry, the newly-introduced zeros will just be truncated. 4281 for (unsigned I = FirstSignificant; I != N; ++I) { 4282 if (buffer[I] == '9') { 4283 FirstSignificant++; 4284 } else { 4285 buffer[I]++; 4286 break; 4287 } 4288 } 4289 4290 // If we carried through, we have exactly one digit of precision. 4291 if (FirstSignificant == N) { 4292 exp += FirstSignificant; 4293 buffer.clear(); 4294 buffer.push_back('1'); 4295 return; 4296 } 4297 4298 exp += FirstSignificant; 4299 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4300 } 4301 4302 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4303 APInt significand, unsigned FormatPrecision, 4304 unsigned FormatMaxPadding, bool TruncateZero) { 4305 const int semanticsPrecision = significand.getBitWidth(); 4306 4307 if (isNeg) 4308 Str.push_back('-'); 4309 4310 // Set FormatPrecision if zero. We want to do this before we 4311 // truncate trailing zeros, as those are part of the precision. 4312 if (!FormatPrecision) { 4313 // We use enough digits so the number can be round-tripped back to an 4314 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4315 // Accurately" by Steele and White. 4316 // FIXME: Using a formula based purely on the precision is conservative; 4317 // we can print fewer digits depending on the actual value being printed. 4318 4319 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4320 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4321 } 4322 4323 // Ignore trailing binary zeros. 4324 int trailingZeros = significand.countr_zero(); 4325 exp += trailingZeros; 4326 significand.lshrInPlace(trailingZeros); 4327 4328 // Change the exponent from 2^e to 10^e. 4329 if (exp == 0) { 4330 // Nothing to do. 4331 } else if (exp > 0) { 4332 // Just shift left. 4333 significand = significand.zext(semanticsPrecision + exp); 4334 significand <<= exp; 4335 exp = 0; 4336 } else { /* exp < 0 */ 4337 int texp = -exp; 4338 4339 // We transform this using the identity: 4340 // (N)(2^-e) == (N)(5^e)(10^-e) 4341 // This means we have to multiply N (the significand) by 5^e. 4342 // To avoid overflow, we have to operate on numbers large 4343 // enough to store N * 5^e: 4344 // log2(N * 5^e) == log2(N) + e * log2(5) 4345 // <= semantics->precision + e * 137 / 59 4346 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4347 4348 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4349 4350 // Multiply significand by 5^e. 4351 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4352 significand = significand.zext(precision); 4353 APInt five_to_the_i(precision, 5); 4354 while (true) { 4355 if (texp & 1) 4356 significand *= five_to_the_i; 4357 4358 texp >>= 1; 4359 if (!texp) 4360 break; 4361 five_to_the_i *= five_to_the_i; 4362 } 4363 } 4364 4365 AdjustToPrecision(significand, exp, FormatPrecision); 4366 4367 SmallVector<char, 256> buffer; 4368 4369 // Fill the buffer. 4370 unsigned precision = significand.getBitWidth(); 4371 if (precision < 4) { 4372 // We need enough precision to store the value 10. 4373 precision = 4; 4374 significand = significand.zext(precision); 4375 } 4376 APInt ten(precision, 10); 4377 APInt digit(precision, 0); 4378 4379 bool inTrail = true; 4380 while (significand != 0) { 4381 // digit <- significand % 10 4382 // significand <- significand / 10 4383 APInt::udivrem(significand, ten, significand, digit); 4384 4385 unsigned d = digit.getZExtValue(); 4386 4387 // Drop trailing zeros. 4388 if (inTrail && !d) 4389 exp++; 4390 else { 4391 buffer.push_back((char) ('0' + d)); 4392 inTrail = false; 4393 } 4394 } 4395 4396 assert(!buffer.empty() && "no characters in buffer!"); 4397 4398 // Drop down to FormatPrecision. 4399 // TODO: don't do more precise calculations above than are required. 4400 AdjustToPrecision(buffer, exp, FormatPrecision); 4401 4402 unsigned NDigits = buffer.size(); 4403 4404 // Check whether we should use scientific notation. 4405 bool FormatScientific; 4406 if (!FormatMaxPadding) 4407 FormatScientific = true; 4408 else { 4409 if (exp >= 0) { 4410 // 765e3 --> 765000 4411 // ^^^ 4412 // But we shouldn't make the number look more precise than it is. 4413 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4414 NDigits + (unsigned) exp > FormatPrecision); 4415 } else { 4416 // Power of the most significant digit. 4417 int MSD = exp + (int) (NDigits - 1); 4418 if (MSD >= 0) { 4419 // 765e-2 == 7.65 4420 FormatScientific = false; 4421 } else { 4422 // 765e-5 == 0.00765 4423 // ^ ^^ 4424 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4425 } 4426 } 4427 } 4428 4429 // Scientific formatting is pretty straightforward. 4430 if (FormatScientific) { 4431 exp += (NDigits - 1); 4432 4433 Str.push_back(buffer[NDigits-1]); 4434 Str.push_back('.'); 4435 if (NDigits == 1 && TruncateZero) 4436 Str.push_back('0'); 4437 else 4438 for (unsigned I = 1; I != NDigits; ++I) 4439 Str.push_back(buffer[NDigits-1-I]); 4440 // Fill with zeros up to FormatPrecision. 4441 if (!TruncateZero && FormatPrecision > NDigits - 1) 4442 Str.append(FormatPrecision - NDigits + 1, '0'); 4443 // For !TruncateZero we use lower 'e'. 4444 Str.push_back(TruncateZero ? 'E' : 'e'); 4445 4446 Str.push_back(exp >= 0 ? '+' : '-'); 4447 if (exp < 0) 4448 exp = -exp; 4449 SmallVector<char, 6> expbuf; 4450 do { 4451 expbuf.push_back((char) ('0' + (exp % 10))); 4452 exp /= 10; 4453 } while (exp); 4454 // Exponent always at least two digits if we do not truncate zeros. 4455 if (!TruncateZero && expbuf.size() < 2) 4456 expbuf.push_back('0'); 4457 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4458 Str.push_back(expbuf[E-1-I]); 4459 return; 4460 } 4461 4462 // Non-scientific, positive exponents. 4463 if (exp >= 0) { 4464 for (unsigned I = 0; I != NDigits; ++I) 4465 Str.push_back(buffer[NDigits-1-I]); 4466 for (unsigned I = 0; I != (unsigned) exp; ++I) 4467 Str.push_back('0'); 4468 return; 4469 } 4470 4471 // Non-scientific, negative exponents. 4472 4473 // The number of digits to the left of the decimal point. 4474 int NWholeDigits = exp + (int) NDigits; 4475 4476 unsigned I = 0; 4477 if (NWholeDigits > 0) { 4478 for (; I != (unsigned) NWholeDigits; ++I) 4479 Str.push_back(buffer[NDigits-I-1]); 4480 Str.push_back('.'); 4481 } else { 4482 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4483 4484 Str.push_back('0'); 4485 Str.push_back('.'); 4486 for (unsigned Z = 1; Z != NZeros; ++Z) 4487 Str.push_back('0'); 4488 } 4489 4490 for (; I != NDigits; ++I) 4491 Str.push_back(buffer[NDigits-I-1]); 4492 4493 } 4494 } // namespace 4495 4496 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4497 unsigned FormatMaxPadding, bool TruncateZero) const { 4498 switch (category) { 4499 case fcInfinity: 4500 if (isNegative()) 4501 return append(Str, "-Inf"); 4502 else 4503 return append(Str, "+Inf"); 4504 4505 case fcNaN: return append(Str, "NaN"); 4506 4507 case fcZero: 4508 if (isNegative()) 4509 Str.push_back('-'); 4510 4511 if (!FormatMaxPadding) { 4512 if (TruncateZero) 4513 append(Str, "0.0E+0"); 4514 else { 4515 append(Str, "0.0"); 4516 if (FormatPrecision > 1) 4517 Str.append(FormatPrecision - 1, '0'); 4518 append(Str, "e+00"); 4519 } 4520 } else 4521 Str.push_back('0'); 4522 return; 4523 4524 case fcNormal: 4525 break; 4526 } 4527 4528 // Decompose the number into an APInt and an exponent. 4529 int exp = exponent - ((int) semantics->precision - 1); 4530 APInt significand( 4531 semantics->precision, 4532 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4533 4534 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4535 FormatMaxPadding, TruncateZero); 4536 4537 } 4538 4539 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4540 // Special floats and denormals have no exact inverse. 4541 if (!isFiniteNonZero()) 4542 return false; 4543 4544 // Check that the number is a power of two by making sure that only the 4545 // integer bit is set in the significand. 4546 if (significandLSB() != semantics->precision - 1) 4547 return false; 4548 4549 // Get the inverse. 4550 IEEEFloat reciprocal(*semantics, 1ULL); 4551 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4552 return false; 4553 4554 // Avoid multiplication with a denormal, it is not safe on all platforms and 4555 // may be slower than a normal division. 4556 if (reciprocal.isDenormal()) 4557 return false; 4558 4559 assert(reciprocal.isFiniteNonZero() && 4560 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4561 4562 if (inv) 4563 *inv = APFloat(reciprocal, *semantics); 4564 4565 return true; 4566 } 4567 4568 int IEEEFloat::getExactLog2Abs() const { 4569 if (!isFinite() || isZero()) 4570 return INT_MIN; 4571 4572 const integerPart *Parts = significandParts(); 4573 const int PartCount = partCountForBits(semantics->precision); 4574 4575 int PopCount = 0; 4576 for (int i = 0; i < PartCount; ++i) { 4577 PopCount += llvm::popcount(Parts[i]); 4578 if (PopCount > 1) 4579 return INT_MIN; 4580 } 4581 4582 if (exponent != semantics->minExponent) 4583 return exponent; 4584 4585 int CountrParts = 0; 4586 for (int i = 0; i < PartCount; 4587 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4588 if (Parts[i] != 0) { 4589 return exponent - semantics->precision + CountrParts + 4590 llvm::countr_zero(Parts[i]) + 1; 4591 } 4592 } 4593 4594 llvm_unreachable("didn't find the set bit"); 4595 } 4596 4597 bool IEEEFloat::isSignaling() const { 4598 if (!isNaN()) 4599 return false; 4600 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4601 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4602 return false; 4603 4604 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4605 // first bit of the trailing significand being 0. 4606 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4607 } 4608 4609 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4610 /// 4611 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4612 /// appropriate sign switching before/after the computation. 4613 APFloat::opStatus IEEEFloat::next(bool nextDown) { 4614 // If we are performing nextDown, swap sign so we have -x. 4615 if (nextDown) 4616 changeSign(); 4617 4618 // Compute nextUp(x) 4619 opStatus result = opOK; 4620 4621 // Handle each float category separately. 4622 switch (category) { 4623 case fcInfinity: 4624 // nextUp(+inf) = +inf 4625 if (!isNegative()) 4626 break; 4627 // nextUp(-inf) = -getLargest() 4628 makeLargest(true); 4629 break; 4630 case fcNaN: 4631 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4632 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4633 // change the payload. 4634 if (isSignaling()) { 4635 result = opInvalidOp; 4636 // For consistency, propagate the sign of the sNaN to the qNaN. 4637 makeNaN(false, isNegative(), nullptr); 4638 } 4639 break; 4640 case fcZero: 4641 // nextUp(pm 0) = +getSmallest() 4642 makeSmallest(false); 4643 break; 4644 case fcNormal: 4645 // nextUp(-getSmallest()) = -0 4646 if (isSmallest() && isNegative()) { 4647 APInt::tcSet(significandParts(), 0, partCount()); 4648 category = fcZero; 4649 exponent = 0; 4650 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4651 sign = false; 4652 if (!semantics->hasZero) 4653 makeSmallestNormalized(false); 4654 break; 4655 } 4656 4657 if (isLargest() && !isNegative()) { 4658 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4659 // nextUp(getLargest()) == NAN 4660 makeNaN(); 4661 break; 4662 } else if (semantics->nonFiniteBehavior == 4663 fltNonfiniteBehavior::FiniteOnly) { 4664 // nextUp(getLargest()) == getLargest() 4665 break; 4666 } else { 4667 // nextUp(getLargest()) == INFINITY 4668 APInt::tcSet(significandParts(), 0, partCount()); 4669 category = fcInfinity; 4670 exponent = semantics->maxExponent + 1; 4671 break; 4672 } 4673 } 4674 4675 // nextUp(normal) == normal + inc. 4676 if (isNegative()) { 4677 // If we are negative, we need to decrement the significand. 4678 4679 // We only cross a binade boundary that requires adjusting the exponent 4680 // if: 4681 // 1. exponent != semantics->minExponent. This implies we are not in the 4682 // smallest binade or are dealing with denormals. 4683 // 2. Our significand excluding the integral bit is all zeros. 4684 bool WillCrossBinadeBoundary = 4685 exponent != semantics->minExponent && isSignificandAllZeros(); 4686 4687 // Decrement the significand. 4688 // 4689 // We always do this since: 4690 // 1. If we are dealing with a non-binade decrement, by definition we 4691 // just decrement the significand. 4692 // 2. If we are dealing with a normal -> normal binade decrement, since 4693 // we have an explicit integral bit the fact that all bits but the 4694 // integral bit are zero implies that subtracting one will yield a 4695 // significand with 0 integral bit and 1 in all other spots. Thus we 4696 // must just adjust the exponent and set the integral bit to 1. 4697 // 3. If we are dealing with a normal -> denormal binade decrement, 4698 // since we set the integral bit to 0 when we represent denormals, we 4699 // just decrement the significand. 4700 integerPart *Parts = significandParts(); 4701 APInt::tcDecrement(Parts, partCount()); 4702 4703 if (WillCrossBinadeBoundary) { 4704 // Our result is a normal number. Do the following: 4705 // 1. Set the integral bit to 1. 4706 // 2. Decrement the exponent. 4707 APInt::tcSetBit(Parts, semantics->precision - 1); 4708 exponent--; 4709 } 4710 } else { 4711 // If we are positive, we need to increment the significand. 4712 4713 // We only cross a binade boundary that requires adjusting the exponent if 4714 // the input is not a denormal and all of said input's significand bits 4715 // are set. If all of said conditions are true: clear the significand, set 4716 // the integral bit to 1, and increment the exponent. If we have a 4717 // denormal always increment since moving denormals and the numbers in the 4718 // smallest normal binade have the same exponent in our representation. 4719 // If there are only exponents, any increment always crosses the 4720 // BinadeBoundary. 4721 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) || 4722 (!isDenormal() && isSignificandAllOnes()); 4723 4724 if (WillCrossBinadeBoundary) { 4725 integerPart *Parts = significandParts(); 4726 APInt::tcSet(Parts, 0, partCount()); 4727 APInt::tcSetBit(Parts, semantics->precision - 1); 4728 assert(exponent != semantics->maxExponent && 4729 "We can not increment an exponent beyond the maxExponent allowed" 4730 " by the given floating point semantics."); 4731 exponent++; 4732 } else { 4733 incrementSignificand(); 4734 } 4735 } 4736 break; 4737 } 4738 4739 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4740 if (nextDown) 4741 changeSign(); 4742 4743 return result; 4744 } 4745 4746 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4747 return ::exponentNaN(*semantics); 4748 } 4749 4750 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4751 return ::exponentInf(*semantics); 4752 } 4753 4754 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4755 return ::exponentZero(*semantics); 4756 } 4757 4758 void IEEEFloat::makeInf(bool Negative) { 4759 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4760 llvm_unreachable("This floating point format does not support Inf"); 4761 4762 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4763 // There is no Inf, so make NaN instead. 4764 makeNaN(false, Negative); 4765 return; 4766 } 4767 category = fcInfinity; 4768 sign = Negative; 4769 exponent = exponentInf(); 4770 APInt::tcSet(significandParts(), 0, partCount()); 4771 } 4772 4773 void IEEEFloat::makeZero(bool Negative) { 4774 if (!semantics->hasZero) 4775 llvm_unreachable("This floating point format does not support Zero"); 4776 4777 category = fcZero; 4778 sign = Negative; 4779 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4780 // Merge negative zero to positive because 0b10000...000 is used for NaN 4781 sign = false; 4782 } 4783 exponent = exponentZero(); 4784 APInt::tcSet(significandParts(), 0, partCount()); 4785 } 4786 4787 void IEEEFloat::makeQuiet() { 4788 assert(isNaN()); 4789 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4790 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4791 } 4792 4793 int ilogb(const IEEEFloat &Arg) { 4794 if (Arg.isNaN()) 4795 return APFloat::IEK_NaN; 4796 if (Arg.isZero()) 4797 return APFloat::IEK_Zero; 4798 if (Arg.isInfinity()) 4799 return APFloat::IEK_Inf; 4800 if (!Arg.isDenormal()) 4801 return Arg.exponent; 4802 4803 IEEEFloat Normalized(Arg); 4804 int SignificandBits = Arg.getSemantics().precision - 1; 4805 4806 Normalized.exponent += SignificandBits; 4807 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero); 4808 return Normalized.exponent - SignificandBits; 4809 } 4810 4811 IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) { 4812 auto MaxExp = X.getSemantics().maxExponent; 4813 auto MinExp = X.getSemantics().minExponent; 4814 4815 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4816 // overflow; clamp it to a safe range before adding, but ensure that the range 4817 // is large enough that the clamp does not change the result. The range we 4818 // need to support is the difference between the largest possible exponent and 4819 // the normalized exponent of half the smallest denormal. 4820 4821 int SignificandBits = X.getSemantics().precision - 1; 4822 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4823 4824 // Clamp to one past the range ends to let normalize handle overlflow. 4825 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4826 X.normalize(RoundingMode, lfExactlyZero); 4827 if (X.isNaN()) 4828 X.makeQuiet(); 4829 return X; 4830 } 4831 4832 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) { 4833 Exp = ilogb(Val); 4834 4835 // Quiet signalling nans. 4836 if (Exp == APFloat::IEK_NaN) { 4837 IEEEFloat Quiet(Val); 4838 Quiet.makeQuiet(); 4839 return Quiet; 4840 } 4841 4842 if (Exp == APFloat::IEK_Inf) 4843 return Val; 4844 4845 // 1 is added because frexp is defined to return a normalized fraction in 4846 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4847 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1; 4848 return scalbn(Val, -Exp, RM); 4849 } 4850 4851 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4852 : Semantics(&S), 4853 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4854 assert(Semantics == &semPPCDoubleDouble); 4855 } 4856 4857 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4858 : Semantics(&S), 4859 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4860 APFloat(semIEEEdouble, uninitialized)}) { 4861 assert(Semantics == &semPPCDoubleDouble); 4862 } 4863 4864 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4865 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4866 APFloat(semIEEEdouble)}) { 4867 assert(Semantics == &semPPCDoubleDouble); 4868 } 4869 4870 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4871 : Semantics(&S), 4872 Floats(new APFloat[2]{ 4873 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4874 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4875 assert(Semantics == &semPPCDoubleDouble); 4876 } 4877 4878 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4879 APFloat &&Second) 4880 : Semantics(&S), 4881 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4882 assert(Semantics == &semPPCDoubleDouble); 4883 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4884 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4885 } 4886 4887 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4888 : Semantics(RHS.Semantics), 4889 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4890 APFloat(RHS.Floats[1])} 4891 : nullptr) { 4892 assert(Semantics == &semPPCDoubleDouble); 4893 } 4894 4895 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4896 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4897 RHS.Semantics = &semBogus; 4898 assert(Semantics == &semPPCDoubleDouble); 4899 } 4900 4901 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4902 if (Semantics == RHS.Semantics && RHS.Floats) { 4903 Floats[0] = RHS.Floats[0]; 4904 Floats[1] = RHS.Floats[1]; 4905 } else if (this != &RHS) { 4906 this->~DoubleAPFloat(); 4907 new (this) DoubleAPFloat(RHS); 4908 } 4909 return *this; 4910 } 4911 4912 // Implement addition, subtraction, multiplication and division based on: 4913 // "Software for Doubled-Precision Floating-Point Computations", 4914 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4915 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4916 const APFloat &c, const APFloat &cc, 4917 roundingMode RM) { 4918 int Status = opOK; 4919 APFloat z = a; 4920 Status |= z.add(c, RM); 4921 if (!z.isFinite()) { 4922 if (!z.isInfinity()) { 4923 Floats[0] = std::move(z); 4924 Floats[1].makeZero(/* Neg = */ false); 4925 return (opStatus)Status; 4926 } 4927 Status = opOK; 4928 auto AComparedToC = a.compareAbsoluteValue(c); 4929 z = cc; 4930 Status |= z.add(aa, RM); 4931 if (AComparedToC == APFloat::cmpGreaterThan) { 4932 // z = cc + aa + c + a; 4933 Status |= z.add(c, RM); 4934 Status |= z.add(a, RM); 4935 } else { 4936 // z = cc + aa + a + c; 4937 Status |= z.add(a, RM); 4938 Status |= z.add(c, RM); 4939 } 4940 if (!z.isFinite()) { 4941 Floats[0] = std::move(z); 4942 Floats[1].makeZero(/* Neg = */ false); 4943 return (opStatus)Status; 4944 } 4945 Floats[0] = z; 4946 APFloat zz = aa; 4947 Status |= zz.add(cc, RM); 4948 if (AComparedToC == APFloat::cmpGreaterThan) { 4949 // Floats[1] = a - z + c + zz; 4950 Floats[1] = a; 4951 Status |= Floats[1].subtract(z, RM); 4952 Status |= Floats[1].add(c, RM); 4953 Status |= Floats[1].add(zz, RM); 4954 } else { 4955 // Floats[1] = c - z + a + zz; 4956 Floats[1] = c; 4957 Status |= Floats[1].subtract(z, RM); 4958 Status |= Floats[1].add(a, RM); 4959 Status |= Floats[1].add(zz, RM); 4960 } 4961 } else { 4962 // q = a - z; 4963 APFloat q = a; 4964 Status |= q.subtract(z, RM); 4965 4966 // zz = q + c + (a - (q + z)) + aa + cc; 4967 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4968 auto zz = q; 4969 Status |= zz.add(c, RM); 4970 Status |= q.add(z, RM); 4971 Status |= q.subtract(a, RM); 4972 q.changeSign(); 4973 Status |= zz.add(q, RM); 4974 Status |= zz.add(aa, RM); 4975 Status |= zz.add(cc, RM); 4976 if (zz.isZero() && !zz.isNegative()) { 4977 Floats[0] = std::move(z); 4978 Floats[1].makeZero(/* Neg = */ false); 4979 return opOK; 4980 } 4981 Floats[0] = z; 4982 Status |= Floats[0].add(zz, RM); 4983 if (!Floats[0].isFinite()) { 4984 Floats[1].makeZero(/* Neg = */ false); 4985 return (opStatus)Status; 4986 } 4987 Floats[1] = std::move(z); 4988 Status |= Floats[1].subtract(Floats[0], RM); 4989 Status |= Floats[1].add(zz, RM); 4990 } 4991 return (opStatus)Status; 4992 } 4993 4994 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4995 const DoubleAPFloat &RHS, 4996 DoubleAPFloat &Out, 4997 roundingMode RM) { 4998 if (LHS.getCategory() == fcNaN) { 4999 Out = LHS; 5000 return opOK; 5001 } 5002 if (RHS.getCategory() == fcNaN) { 5003 Out = RHS; 5004 return opOK; 5005 } 5006 if (LHS.getCategory() == fcZero) { 5007 Out = RHS; 5008 return opOK; 5009 } 5010 if (RHS.getCategory() == fcZero) { 5011 Out = LHS; 5012 return opOK; 5013 } 5014 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 5015 LHS.isNegative() != RHS.isNegative()) { 5016 Out.makeNaN(false, Out.isNegative(), nullptr); 5017 return opInvalidOp; 5018 } 5019 if (LHS.getCategory() == fcInfinity) { 5020 Out = LHS; 5021 return opOK; 5022 } 5023 if (RHS.getCategory() == fcInfinity) { 5024 Out = RHS; 5025 return opOK; 5026 } 5027 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 5028 5029 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 5030 CC(RHS.Floats[1]); 5031 assert(&A.getSemantics() == &semIEEEdouble); 5032 assert(&AA.getSemantics() == &semIEEEdouble); 5033 assert(&C.getSemantics() == &semIEEEdouble); 5034 assert(&CC.getSemantics() == &semIEEEdouble); 5035 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 5036 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 5037 return Out.addImpl(A, AA, C, CC, RM); 5038 } 5039 5040 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 5041 roundingMode RM) { 5042 return addWithSpecial(*this, RHS, *this, RM); 5043 } 5044 5045 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 5046 roundingMode RM) { 5047 changeSign(); 5048 auto Ret = add(RHS, RM); 5049 changeSign(); 5050 return Ret; 5051 } 5052 5053 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 5054 APFloat::roundingMode RM) { 5055 const auto &LHS = *this; 5056 auto &Out = *this; 5057 /* Interesting observation: For special categories, finding the lowest 5058 common ancestor of the following layered graph gives the correct 5059 return category: 5060 5061 NaN 5062 / \ 5063 Zero Inf 5064 \ / 5065 Normal 5066 5067 e.g. NaN * NaN = NaN 5068 Zero * Inf = NaN 5069 Normal * Zero = Zero 5070 Normal * Inf = Inf 5071 */ 5072 if (LHS.getCategory() == fcNaN) { 5073 Out = LHS; 5074 return opOK; 5075 } 5076 if (RHS.getCategory() == fcNaN) { 5077 Out = RHS; 5078 return opOK; 5079 } 5080 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 5081 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 5082 Out.makeNaN(false, false, nullptr); 5083 return opOK; 5084 } 5085 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 5086 Out = LHS; 5087 return opOK; 5088 } 5089 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 5090 Out = RHS; 5091 return opOK; 5092 } 5093 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 5094 "Special cases not handled exhaustively"); 5095 5096 int Status = opOK; 5097 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 5098 // t = a * c 5099 APFloat T = A; 5100 Status |= T.multiply(C, RM); 5101 if (!T.isFiniteNonZero()) { 5102 Floats[0] = T; 5103 Floats[1].makeZero(/* Neg = */ false); 5104 return (opStatus)Status; 5105 } 5106 5107 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 5108 APFloat Tau = A; 5109 T.changeSign(); 5110 Status |= Tau.fusedMultiplyAdd(C, T, RM); 5111 T.changeSign(); 5112 { 5113 // v = a * d 5114 APFloat V = A; 5115 Status |= V.multiply(D, RM); 5116 // w = b * c 5117 APFloat W = B; 5118 Status |= W.multiply(C, RM); 5119 Status |= V.add(W, RM); 5120 // tau += v + w 5121 Status |= Tau.add(V, RM); 5122 } 5123 // u = t + tau 5124 APFloat U = T; 5125 Status |= U.add(Tau, RM); 5126 5127 Floats[0] = U; 5128 if (!U.isFinite()) { 5129 Floats[1].makeZero(/* Neg = */ false); 5130 } else { 5131 // Floats[1] = (t - u) + tau 5132 Status |= T.subtract(U, RM); 5133 Status |= T.add(Tau, RM); 5134 Floats[1] = T; 5135 } 5136 return (opStatus)Status; 5137 } 5138 5139 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 5140 APFloat::roundingMode RM) { 5141 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5142 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5143 auto Ret = 5144 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 5145 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5146 return Ret; 5147 } 5148 5149 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 5150 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5151 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5152 auto Ret = 5153 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5154 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5155 return Ret; 5156 } 5157 5158 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 5159 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5160 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5161 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5162 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5163 return Ret; 5164 } 5165 5166 APFloat::opStatus 5167 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5168 const DoubleAPFloat &Addend, 5169 APFloat::roundingMode RM) { 5170 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5171 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5172 auto Ret = Tmp.fusedMultiplyAdd( 5173 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5174 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5175 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5176 return Ret; 5177 } 5178 5179 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5180 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5181 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5182 auto Ret = Tmp.roundToIntegral(RM); 5183 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5184 return Ret; 5185 } 5186 5187 void DoubleAPFloat::changeSign() { 5188 Floats[0].changeSign(); 5189 Floats[1].changeSign(); 5190 } 5191 5192 APFloat::cmpResult 5193 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5194 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5195 if (Result != cmpEqual) 5196 return Result; 5197 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5198 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5199 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5200 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5201 if (Against && !RHSAgainst) 5202 return cmpLessThan; 5203 if (!Against && RHSAgainst) 5204 return cmpGreaterThan; 5205 if (!Against && !RHSAgainst) 5206 return Result; 5207 if (Against && RHSAgainst) 5208 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5209 } 5210 return Result; 5211 } 5212 5213 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5214 return Floats[0].getCategory(); 5215 } 5216 5217 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5218 5219 void DoubleAPFloat::makeInf(bool Neg) { 5220 Floats[0].makeInf(Neg); 5221 Floats[1].makeZero(/* Neg = */ false); 5222 } 5223 5224 void DoubleAPFloat::makeZero(bool Neg) { 5225 Floats[0].makeZero(Neg); 5226 Floats[1].makeZero(/* Neg = */ false); 5227 } 5228 5229 void DoubleAPFloat::makeLargest(bool Neg) { 5230 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5231 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5232 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5233 if (Neg) 5234 changeSign(); 5235 } 5236 5237 void DoubleAPFloat::makeSmallest(bool Neg) { 5238 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5239 Floats[0].makeSmallest(Neg); 5240 Floats[1].makeZero(/* Neg = */ false); 5241 } 5242 5243 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5244 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5245 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5246 if (Neg) 5247 Floats[0].changeSign(); 5248 Floats[1].makeZero(/* Neg = */ false); 5249 } 5250 5251 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5252 Floats[0].makeNaN(SNaN, Neg, fill); 5253 Floats[1].makeZero(/* Neg = */ false); 5254 } 5255 5256 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5257 auto Result = Floats[0].compare(RHS.Floats[0]); 5258 // |Float[0]| > |Float[1]| 5259 if (Result == APFloat::cmpEqual) 5260 return Floats[1].compare(RHS.Floats[1]); 5261 return Result; 5262 } 5263 5264 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5265 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5266 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5267 } 5268 5269 hash_code hash_value(const DoubleAPFloat &Arg) { 5270 if (Arg.Floats) 5271 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5272 return hash_combine(Arg.Semantics); 5273 } 5274 5275 APInt DoubleAPFloat::bitcastToAPInt() const { 5276 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5277 uint64_t Data[] = { 5278 Floats[0].bitcastToAPInt().getRawData()[0], 5279 Floats[1].bitcastToAPInt().getRawData()[0], 5280 }; 5281 return APInt(128, 2, Data); 5282 } 5283 5284 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5285 roundingMode RM) { 5286 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5287 APFloat Tmp(semPPCDoubleDoubleLegacy); 5288 auto Ret = Tmp.convertFromString(S, RM); 5289 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5290 return Ret; 5291 } 5292 5293 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5294 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5295 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5296 auto Ret = Tmp.next(nextDown); 5297 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5298 return Ret; 5299 } 5300 5301 APFloat::opStatus 5302 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5303 unsigned int Width, bool IsSigned, 5304 roundingMode RM, bool *IsExact) const { 5305 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5306 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5307 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5308 } 5309 5310 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5311 bool IsSigned, 5312 roundingMode RM) { 5313 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5314 APFloat Tmp(semPPCDoubleDoubleLegacy); 5315 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5316 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5317 return Ret; 5318 } 5319 5320 APFloat::opStatus 5321 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5322 unsigned int InputSize, 5323 bool IsSigned, roundingMode RM) { 5324 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5325 APFloat Tmp(semPPCDoubleDoubleLegacy); 5326 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5327 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5328 return Ret; 5329 } 5330 5331 APFloat::opStatus 5332 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5333 unsigned int InputSize, 5334 bool IsSigned, roundingMode RM) { 5335 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5336 APFloat Tmp(semPPCDoubleDoubleLegacy); 5337 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5338 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5339 return Ret; 5340 } 5341 5342 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5343 unsigned int HexDigits, 5344 bool UpperCase, 5345 roundingMode RM) const { 5346 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5347 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5348 .convertToHexString(DST, HexDigits, UpperCase, RM); 5349 } 5350 5351 bool DoubleAPFloat::isDenormal() const { 5352 return getCategory() == fcNormal && 5353 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5354 // (double)(Hi + Lo) == Hi defines a normal number. 5355 Floats[0] != Floats[0] + Floats[1]); 5356 } 5357 5358 bool DoubleAPFloat::isSmallest() const { 5359 if (getCategory() != fcNormal) 5360 return false; 5361 DoubleAPFloat Tmp(*this); 5362 Tmp.makeSmallest(this->isNegative()); 5363 return Tmp.compare(*this) == cmpEqual; 5364 } 5365 5366 bool DoubleAPFloat::isSmallestNormalized() const { 5367 if (getCategory() != fcNormal) 5368 return false; 5369 5370 DoubleAPFloat Tmp(*this); 5371 Tmp.makeSmallestNormalized(this->isNegative()); 5372 return Tmp.compare(*this) == cmpEqual; 5373 } 5374 5375 bool DoubleAPFloat::isLargest() const { 5376 if (getCategory() != fcNormal) 5377 return false; 5378 DoubleAPFloat Tmp(*this); 5379 Tmp.makeLargest(this->isNegative()); 5380 return Tmp.compare(*this) == cmpEqual; 5381 } 5382 5383 bool DoubleAPFloat::isInteger() const { 5384 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5385 return Floats[0].isInteger() && Floats[1].isInteger(); 5386 } 5387 5388 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5389 unsigned FormatPrecision, 5390 unsigned FormatMaxPadding, 5391 bool TruncateZero) const { 5392 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5393 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5394 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5395 } 5396 5397 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5398 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5399 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5400 if (!inv) 5401 return Tmp.getExactInverse(nullptr); 5402 APFloat Inv(semPPCDoubleDoubleLegacy); 5403 auto Ret = Tmp.getExactInverse(&Inv); 5404 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5405 return Ret; 5406 } 5407 5408 int DoubleAPFloat::getExactLog2() const { 5409 // TODO: Implement me 5410 return INT_MIN; 5411 } 5412 5413 int DoubleAPFloat::getExactLog2Abs() const { 5414 // TODO: Implement me 5415 return INT_MIN; 5416 } 5417 5418 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5419 APFloat::roundingMode RM) { 5420 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5421 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5422 scalbn(Arg.Floats[1], Exp, RM)); 5423 } 5424 5425 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5426 APFloat::roundingMode RM) { 5427 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5428 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5429 APFloat Second = Arg.Floats[1]; 5430 if (Arg.getCategory() == APFloat::fcNormal) 5431 Second = scalbn(Second, -Exp, RM); 5432 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5433 } 5434 5435 } // namespace detail 5436 5437 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5438 if (usesLayout<IEEEFloat>(Semantics)) { 5439 new (&IEEE) IEEEFloat(std::move(F)); 5440 return; 5441 } 5442 if (usesLayout<DoubleAPFloat>(Semantics)) { 5443 const fltSemantics& S = F.getSemantics(); 5444 new (&Double) 5445 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5446 APFloat(semIEEEdouble)); 5447 return; 5448 } 5449 llvm_unreachable("Unexpected semantics"); 5450 } 5451 5452 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5453 roundingMode RM) { 5454 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5455 } 5456 5457 hash_code hash_value(const APFloat &Arg) { 5458 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5459 return hash_value(Arg.U.IEEE); 5460 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5461 return hash_value(Arg.U.Double); 5462 llvm_unreachable("Unexpected semantics"); 5463 } 5464 5465 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5466 : APFloat(Semantics) { 5467 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5468 assert(StatusOrErr && "Invalid floating point representation"); 5469 consumeError(StatusOrErr.takeError()); 5470 } 5471 5472 FPClassTest APFloat::classify() const { 5473 if (isZero()) 5474 return isNegative() ? fcNegZero : fcPosZero; 5475 if (isNormal()) 5476 return isNegative() ? fcNegNormal : fcPosNormal; 5477 if (isDenormal()) 5478 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5479 if (isInfinity()) 5480 return isNegative() ? fcNegInf : fcPosInf; 5481 assert(isNaN() && "Other class of FP constant"); 5482 return isSignaling() ? fcSNan : fcQNan; 5483 } 5484 5485 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5486 roundingMode RM, bool *losesInfo) { 5487 if (&getSemantics() == &ToSemantics) { 5488 *losesInfo = false; 5489 return opOK; 5490 } 5491 if (usesLayout<IEEEFloat>(getSemantics()) && 5492 usesLayout<IEEEFloat>(ToSemantics)) 5493 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5494 if (usesLayout<IEEEFloat>(getSemantics()) && 5495 usesLayout<DoubleAPFloat>(ToSemantics)) { 5496 assert(&ToSemantics == &semPPCDoubleDouble); 5497 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5498 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5499 return Ret; 5500 } 5501 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5502 usesLayout<IEEEFloat>(ToSemantics)) { 5503 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5504 *this = APFloat(std::move(getIEEE()), ToSemantics); 5505 return Ret; 5506 } 5507 llvm_unreachable("Unexpected semantics"); 5508 } 5509 5510 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5511 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5512 } 5513 5514 void APFloat::print(raw_ostream &OS) const { 5515 SmallVector<char, 16> Buffer; 5516 toString(Buffer); 5517 OS << Buffer; 5518 } 5519 5520 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5521 LLVM_DUMP_METHOD void APFloat::dump() const { 5522 print(dbgs()); 5523 dbgs() << '\n'; 5524 } 5525 #endif 5526 5527 void APFloat::Profile(FoldingSetNodeID &NID) const { 5528 NID.Add(bitcastToAPInt()); 5529 } 5530 5531 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5532 an APSInt, whose initial bit-width and signed-ness are used to determine the 5533 precision of the conversion. 5534 */ 5535 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5536 roundingMode rounding_mode, 5537 bool *isExact) const { 5538 unsigned bitWidth = result.getBitWidth(); 5539 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5540 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5541 rounding_mode, isExact); 5542 // Keeps the original signed-ness. 5543 result = APInt(bitWidth, parts); 5544 return status; 5545 } 5546 5547 double APFloat::convertToDouble() const { 5548 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5549 return getIEEE().convertToDouble(); 5550 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5551 "Float semantics is not representable by IEEEdouble"); 5552 APFloat Temp = *this; 5553 bool LosesInfo; 5554 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5555 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5556 (void)St; 5557 return Temp.getIEEE().convertToDouble(); 5558 } 5559 5560 #ifdef HAS_IEE754_FLOAT128 5561 float128 APFloat::convertToQuad() const { 5562 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5563 return getIEEE().convertToQuad(); 5564 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5565 "Float semantics is not representable by IEEEquad"); 5566 APFloat Temp = *this; 5567 bool LosesInfo; 5568 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5569 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5570 (void)St; 5571 return Temp.getIEEE().convertToQuad(); 5572 } 5573 #endif 5574 5575 float APFloat::convertToFloat() const { 5576 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5577 return getIEEE().convertToFloat(); 5578 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5579 "Float semantics is not representable by IEEEsingle"); 5580 APFloat Temp = *this; 5581 bool LosesInfo; 5582 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5583 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5584 (void)St; 5585 return Temp.getIEEE().convertToFloat(); 5586 } 5587 5588 } // namespace llvm 5589 5590 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5591