1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 123 /* Whether this semantics has an encoding for Zero */ 124 bool hasZero = true; 125 126 /* Whether this semantics can represent signed values */ 127 bool hasSignedRepr = true; 128 129 // Returns true if any number described by this semantics can be precisely 130 // represented by the specified semantics. Does not take into account 131 // the value of fltNonfiniteBehavior. 132 bool isRepresentableBy(const fltSemantics &S) const { 133 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 134 precision <= S.precision; 135 } 136 }; 137 138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 144 static constexpr fltSemantics semFloat8E5M2FNUZ = { 145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 147 static constexpr fltSemantics semFloat8E4M3FN = { 148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 149 static constexpr fltSemantics semFloat8E4M3FNUZ = { 150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; 154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 155 static constexpr fltSemantics semFloat8E8M0FNU = { 156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes, 157 false, false}; 158 159 static constexpr fltSemantics semFloat6E3M2FN = { 160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 161 static constexpr fltSemantics semFloat6E2M3FN = { 162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 163 static constexpr fltSemantics semFloat4E2M1FN = { 164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 166 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 167 168 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 169 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 170 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 171 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 172 to each other, and two 11-bit exponents. 173 174 Note: we need to make the value different from semBogus as otherwise 175 an unsafe optimization may collapse both values to a single address, 176 and we heavily rely on them having distinct addresses. */ 177 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 178 179 /* These are legacy semantics for the fallback, inaccrurate implementation of 180 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 181 operation. It's equivalent to having an IEEE number with consecutive 106 182 bits of mantissa and 11 bits of exponent. 183 184 It's not equivalent to IBM double-double. For example, a legit IBM 185 double-double, 1 + epsilon: 186 187 1 + epsilon = 1 + (1 >> 1076) 188 189 is not representable by a consecutive 106 bits of mantissa. 190 191 Currently, these semantics are used in the following way: 192 193 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 194 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 195 semPPCDoubleDoubleLegacy -> IEEE operations 196 197 We use bitcastToAPInt() to get the bit representation (in APInt) of the 198 underlying IEEEdouble, then use the APInt constructor to construct the 199 legacy IEEE float. 200 201 TODO: Implement all operations in semPPCDoubleDouble, and delete these 202 semantics. */ 203 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 204 53 + 53, 128}; 205 206 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 207 switch (S) { 208 case S_IEEEhalf: 209 return IEEEhalf(); 210 case S_BFloat: 211 return BFloat(); 212 case S_IEEEsingle: 213 return IEEEsingle(); 214 case S_IEEEdouble: 215 return IEEEdouble(); 216 case S_IEEEquad: 217 return IEEEquad(); 218 case S_PPCDoubleDouble: 219 return PPCDoubleDouble(); 220 case S_Float8E5M2: 221 return Float8E5M2(); 222 case S_Float8E5M2FNUZ: 223 return Float8E5M2FNUZ(); 224 case S_Float8E4M3: 225 return Float8E4M3(); 226 case S_Float8E4M3FN: 227 return Float8E4M3FN(); 228 case S_Float8E4M3FNUZ: 229 return Float8E4M3FNUZ(); 230 case S_Float8E4M3B11FNUZ: 231 return Float8E4M3B11FNUZ(); 232 case S_Float8E3M4: 233 return Float8E3M4(); 234 case S_FloatTF32: 235 return FloatTF32(); 236 case S_Float8E8M0FNU: 237 return Float8E8M0FNU(); 238 case S_Float6E3M2FN: 239 return Float6E3M2FN(); 240 case S_Float6E2M3FN: 241 return Float6E2M3FN(); 242 case S_Float4E2M1FN: 243 return Float4E2M1FN(); 244 case S_x87DoubleExtended: 245 return x87DoubleExtended(); 246 } 247 llvm_unreachable("Unrecognised floating semantics"); 248 } 249 250 APFloatBase::Semantics 251 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 252 if (&Sem == &llvm::APFloat::IEEEhalf()) 253 return S_IEEEhalf; 254 else if (&Sem == &llvm::APFloat::BFloat()) 255 return S_BFloat; 256 else if (&Sem == &llvm::APFloat::IEEEsingle()) 257 return S_IEEEsingle; 258 else if (&Sem == &llvm::APFloat::IEEEdouble()) 259 return S_IEEEdouble; 260 else if (&Sem == &llvm::APFloat::IEEEquad()) 261 return S_IEEEquad; 262 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 263 return S_PPCDoubleDouble; 264 else if (&Sem == &llvm::APFloat::Float8E5M2()) 265 return S_Float8E5M2; 266 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 267 return S_Float8E5M2FNUZ; 268 else if (&Sem == &llvm::APFloat::Float8E4M3()) 269 return S_Float8E4M3; 270 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 271 return S_Float8E4M3FN; 272 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 273 return S_Float8E4M3FNUZ; 274 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 275 return S_Float8E4M3B11FNUZ; 276 else if (&Sem == &llvm::APFloat::Float8E3M4()) 277 return S_Float8E3M4; 278 else if (&Sem == &llvm::APFloat::FloatTF32()) 279 return S_FloatTF32; 280 else if (&Sem == &llvm::APFloat::Float8E8M0FNU()) 281 return S_Float8E8M0FNU; 282 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 283 return S_Float6E3M2FN; 284 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 285 return S_Float6E2M3FN; 286 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 287 return S_Float4E2M1FN; 288 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 289 return S_x87DoubleExtended; 290 else 291 llvm_unreachable("Unknown floating semantics"); 292 } 293 294 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 295 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 296 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 297 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 298 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 299 const fltSemantics &APFloatBase::PPCDoubleDouble() { 300 return semPPCDoubleDouble; 301 } 302 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 303 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 304 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 305 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 306 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 307 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 308 return semFloat8E4M3B11FNUZ; 309 } 310 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } 311 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 312 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; } 313 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 314 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 315 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 316 const fltSemantics &APFloatBase::x87DoubleExtended() { 317 return semX87DoubleExtended; 318 } 319 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 320 321 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 322 constexpr RoundingMode APFloatBase::rmTowardPositive; 323 constexpr RoundingMode APFloatBase::rmTowardNegative; 324 constexpr RoundingMode APFloatBase::rmTowardZero; 325 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 326 327 /* A tight upper bound on number of parts required to hold the value 328 pow(5, power) is 329 330 power * 815 / (351 * integerPartWidth) + 1 331 332 However, whilst the result may require only this many parts, 333 because we are multiplying two values to get it, the 334 multiplication may require an extra part with the excess part 335 being zero (consider the trivial case of 1 * 1, tcFullMultiply 336 requires two parts to hold the single-part result). So we add an 337 extra one to guarantee enough space whilst multiplying. */ 338 const unsigned int maxExponent = 16383; 339 const unsigned int maxPrecision = 113; 340 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 341 const unsigned int maxPowerOfFiveParts = 342 2 + 343 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 344 345 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 346 return semantics.precision; 347 } 348 APFloatBase::ExponentType 349 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 350 return semantics.maxExponent; 351 } 352 APFloatBase::ExponentType 353 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 354 return semantics.minExponent; 355 } 356 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 357 return semantics.sizeInBits; 358 } 359 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 360 bool isSigned) { 361 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 362 // at least one more bit than the MaxExponent to hold the max FP value. 363 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 364 // Extra sign bit needed. 365 if (isSigned) 366 ++MinBitWidth; 367 return MinBitWidth; 368 } 369 370 bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) { 371 return semantics.hasZero; 372 } 373 374 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) { 375 return semantics.hasSignedRepr; 376 } 377 378 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 379 const fltSemantics &Dst) { 380 // Exponent range must be larger. 381 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 382 return false; 383 384 // If the mantissa is long enough, the result value could still be denormal 385 // with a larger exponent range. 386 // 387 // FIXME: This condition is probably not accurate but also shouldn't be a 388 // practical concern with existing types. 389 return Dst.precision >= Src.precision; 390 } 391 392 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 393 return Sem.sizeInBits; 394 } 395 396 static constexpr APFloatBase::ExponentType 397 exponentZero(const fltSemantics &semantics) { 398 return semantics.minExponent - 1; 399 } 400 401 static constexpr APFloatBase::ExponentType 402 exponentInf(const fltSemantics &semantics) { 403 return semantics.maxExponent + 1; 404 } 405 406 static constexpr APFloatBase::ExponentType 407 exponentNaN(const fltSemantics &semantics) { 408 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 409 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 410 return exponentZero(semantics); 411 if (semantics.hasSignedRepr) 412 return semantics.maxExponent; 413 } 414 return semantics.maxExponent + 1; 415 } 416 417 /* A bunch of private, handy routines. */ 418 419 static inline Error createError(const Twine &Err) { 420 return make_error<StringError>(Err, inconvertibleErrorCode()); 421 } 422 423 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 424 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) / 425 APFloatBase::integerPartWidth); 426 } 427 428 /* Returns 0U-9U. Return values >= 10U are not digits. */ 429 static inline unsigned int 430 decDigitValue(unsigned int c) 431 { 432 return c - '0'; 433 } 434 435 /* Return the value of a decimal exponent of the form 436 [+-]ddddddd. 437 438 If the exponent overflows, returns a large exponent with the 439 appropriate sign. */ 440 static Expected<int> readExponent(StringRef::iterator begin, 441 StringRef::iterator end) { 442 bool isNegative; 443 unsigned int absExponent; 444 const unsigned int overlargeExponent = 24000; /* FIXME. */ 445 StringRef::iterator p = begin; 446 447 // Treat no exponent as 0 to match binutils 448 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 449 return 0; 450 } 451 452 isNegative = (*p == '-'); 453 if (*p == '-' || *p == '+') { 454 p++; 455 if (p == end) 456 return createError("Exponent has no digits"); 457 } 458 459 absExponent = decDigitValue(*p++); 460 if (absExponent >= 10U) 461 return createError("Invalid character in exponent"); 462 463 for (; p != end; ++p) { 464 unsigned int value; 465 466 value = decDigitValue(*p); 467 if (value >= 10U) 468 return createError("Invalid character in exponent"); 469 470 absExponent = absExponent * 10U + value; 471 if (absExponent >= overlargeExponent) { 472 absExponent = overlargeExponent; 473 break; 474 } 475 } 476 477 if (isNegative) 478 return -(int) absExponent; 479 else 480 return (int) absExponent; 481 } 482 483 /* This is ugly and needs cleaning up, but I don't immediately see 484 how whilst remaining safe. */ 485 static Expected<int> totalExponent(StringRef::iterator p, 486 StringRef::iterator end, 487 int exponentAdjustment) { 488 int unsignedExponent; 489 bool negative, overflow; 490 int exponent = 0; 491 492 if (p == end) 493 return createError("Exponent has no digits"); 494 495 negative = *p == '-'; 496 if (*p == '-' || *p == '+') { 497 p++; 498 if (p == end) 499 return createError("Exponent has no digits"); 500 } 501 502 unsignedExponent = 0; 503 overflow = false; 504 for (; p != end; ++p) { 505 unsigned int value; 506 507 value = decDigitValue(*p); 508 if (value >= 10U) 509 return createError("Invalid character in exponent"); 510 511 unsignedExponent = unsignedExponent * 10 + value; 512 if (unsignedExponent > 32767) { 513 overflow = true; 514 break; 515 } 516 } 517 518 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 519 overflow = true; 520 521 if (!overflow) { 522 exponent = unsignedExponent; 523 if (negative) 524 exponent = -exponent; 525 exponent += exponentAdjustment; 526 if (exponent > 32767 || exponent < -32768) 527 overflow = true; 528 } 529 530 if (overflow) 531 exponent = negative ? -32768: 32767; 532 533 return exponent; 534 } 535 536 static Expected<StringRef::iterator> 537 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 538 StringRef::iterator *dot) { 539 StringRef::iterator p = begin; 540 *dot = end; 541 while (p != end && *p == '0') 542 p++; 543 544 if (p != end && *p == '.') { 545 *dot = p++; 546 547 if (end - begin == 1) 548 return createError("Significand has no digits"); 549 550 while (p != end && *p == '0') 551 p++; 552 } 553 554 return p; 555 } 556 557 /* Given a normal decimal floating point number of the form 558 559 dddd.dddd[eE][+-]ddd 560 561 where the decimal point and exponent are optional, fill out the 562 structure D. Exponent is appropriate if the significand is 563 treated as an integer, and normalizedExponent if the significand 564 is taken to have the decimal point after a single leading 565 non-zero digit. 566 567 If the value is zero, V->firstSigDigit points to a non-digit, and 568 the return exponent is zero. 569 */ 570 struct decimalInfo { 571 const char *firstSigDigit; 572 const char *lastSigDigit; 573 int exponent; 574 int normalizedExponent; 575 }; 576 577 static Error interpretDecimal(StringRef::iterator begin, 578 StringRef::iterator end, decimalInfo *D) { 579 StringRef::iterator dot = end; 580 581 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 582 if (!PtrOrErr) 583 return PtrOrErr.takeError(); 584 StringRef::iterator p = *PtrOrErr; 585 586 D->firstSigDigit = p; 587 D->exponent = 0; 588 D->normalizedExponent = 0; 589 590 for (; p != end; ++p) { 591 if (*p == '.') { 592 if (dot != end) 593 return createError("String contains multiple dots"); 594 dot = p++; 595 if (p == end) 596 break; 597 } 598 if (decDigitValue(*p) >= 10U) 599 break; 600 } 601 602 if (p != end) { 603 if (*p != 'e' && *p != 'E') 604 return createError("Invalid character in significand"); 605 if (p == begin) 606 return createError("Significand has no digits"); 607 if (dot != end && p - begin == 1) 608 return createError("Significand has no digits"); 609 610 /* p points to the first non-digit in the string */ 611 auto ExpOrErr = readExponent(p + 1, end); 612 if (!ExpOrErr) 613 return ExpOrErr.takeError(); 614 D->exponent = *ExpOrErr; 615 616 /* Implied decimal point? */ 617 if (dot == end) 618 dot = p; 619 } 620 621 /* If number is all zeroes accept any exponent. */ 622 if (p != D->firstSigDigit) { 623 /* Drop insignificant trailing zeroes. */ 624 if (p != begin) { 625 do 626 do 627 p--; 628 while (p != begin && *p == '0'); 629 while (p != begin && *p == '.'); 630 } 631 632 /* Adjust the exponents for any decimal point. */ 633 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 634 D->normalizedExponent = (D->exponent + 635 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 636 - (dot > D->firstSigDigit && dot < p))); 637 } 638 639 D->lastSigDigit = p; 640 return Error::success(); 641 } 642 643 /* Return the trailing fraction of a hexadecimal number. 644 DIGITVALUE is the first hex digit of the fraction, P points to 645 the next digit. */ 646 static Expected<lostFraction> 647 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 648 unsigned int digitValue) { 649 unsigned int hexDigit; 650 651 /* If the first trailing digit isn't 0 or 8 we can work out the 652 fraction immediately. */ 653 if (digitValue > 8) 654 return lfMoreThanHalf; 655 else if (digitValue < 8 && digitValue > 0) 656 return lfLessThanHalf; 657 658 // Otherwise we need to find the first non-zero digit. 659 while (p != end && (*p == '0' || *p == '.')) 660 p++; 661 662 if (p == end) 663 return createError("Invalid trailing hexadecimal fraction!"); 664 665 hexDigit = hexDigitValue(*p); 666 667 /* If we ran off the end it is exactly zero or one-half, otherwise 668 a little more. */ 669 if (hexDigit == UINT_MAX) 670 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 671 else 672 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 673 } 674 675 /* Return the fraction lost were a bignum truncated losing the least 676 significant BITS bits. */ 677 static lostFraction 678 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 679 unsigned int partCount, 680 unsigned int bits) 681 { 682 unsigned int lsb; 683 684 lsb = APInt::tcLSB(parts, partCount); 685 686 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 687 if (bits <= lsb) 688 return lfExactlyZero; 689 if (bits == lsb + 1) 690 return lfExactlyHalf; 691 if (bits <= partCount * APFloatBase::integerPartWidth && 692 APInt::tcExtractBit(parts, bits - 1)) 693 return lfMoreThanHalf; 694 695 return lfLessThanHalf; 696 } 697 698 /* Shift DST right BITS bits noting lost fraction. */ 699 static lostFraction 700 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 701 { 702 lostFraction lost_fraction; 703 704 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 705 706 APInt::tcShiftRight(dst, parts, bits); 707 708 return lost_fraction; 709 } 710 711 /* Combine the effect of two lost fractions. */ 712 static lostFraction 713 combineLostFractions(lostFraction moreSignificant, 714 lostFraction lessSignificant) 715 { 716 if (lessSignificant != lfExactlyZero) { 717 if (moreSignificant == lfExactlyZero) 718 moreSignificant = lfLessThanHalf; 719 else if (moreSignificant == lfExactlyHalf) 720 moreSignificant = lfMoreThanHalf; 721 } 722 723 return moreSignificant; 724 } 725 726 /* The error from the true value, in half-ulps, on multiplying two 727 floating point numbers, which differ from the value they 728 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 729 than the returned value. 730 731 See "How to Read Floating Point Numbers Accurately" by William D 732 Clinger. */ 733 static unsigned int 734 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 735 { 736 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 737 738 if (HUerr1 + HUerr2 == 0) 739 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 740 else 741 return inexactMultiply + 2 * (HUerr1 + HUerr2); 742 } 743 744 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 745 when the least significant BITS are truncated. BITS cannot be 746 zero. */ 747 static APFloatBase::integerPart 748 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 749 bool isNearest) { 750 unsigned int count, partBits; 751 APFloatBase::integerPart part, boundary; 752 753 assert(bits != 0); 754 755 bits--; 756 count = bits / APFloatBase::integerPartWidth; 757 partBits = bits % APFloatBase::integerPartWidth + 1; 758 759 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 760 761 if (isNearest) 762 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 763 else 764 boundary = 0; 765 766 if (count == 0) { 767 if (part - boundary <= boundary - part) 768 return part - boundary; 769 else 770 return boundary - part; 771 } 772 773 if (part == boundary) { 774 while (--count) 775 if (parts[count]) 776 return ~(APFloatBase::integerPart) 0; /* A lot. */ 777 778 return parts[0]; 779 } else if (part == boundary - 1) { 780 while (--count) 781 if (~parts[count]) 782 return ~(APFloatBase::integerPart) 0; /* A lot. */ 783 784 return -parts[0]; 785 } 786 787 return ~(APFloatBase::integerPart) 0; /* A lot. */ 788 } 789 790 /* Place pow(5, power) in DST, and return the number of parts used. 791 DST must be at least one part larger than size of the answer. */ 792 static unsigned int 793 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 794 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 795 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 796 pow5s[0] = 78125 * 5; 797 798 unsigned int partsCount = 1; 799 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 800 unsigned int result; 801 assert(power <= maxExponent); 802 803 p1 = dst; 804 p2 = scratch; 805 806 *p1 = firstEightPowers[power & 7]; 807 power >>= 3; 808 809 result = 1; 810 pow5 = pow5s; 811 812 for (unsigned int n = 0; power; power >>= 1, n++) { 813 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 814 if (n != 0) { 815 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 816 partsCount, partsCount); 817 partsCount *= 2; 818 if (pow5[partsCount - 1] == 0) 819 partsCount--; 820 } 821 822 if (power & 1) { 823 APFloatBase::integerPart *tmp; 824 825 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 826 result += partsCount; 827 if (p2[result - 1] == 0) 828 result--; 829 830 /* Now result is in p1 with partsCount parts and p2 is scratch 831 space. */ 832 tmp = p1; 833 p1 = p2; 834 p2 = tmp; 835 } 836 837 pow5 += partsCount; 838 } 839 840 if (p1 != dst) 841 APInt::tcAssign(dst, p1, result); 842 843 return result; 844 } 845 846 /* Zero at the end to avoid modular arithmetic when adding one; used 847 when rounding up during hexadecimal output. */ 848 static const char hexDigitsLower[] = "0123456789abcdef0"; 849 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 850 static const char infinityL[] = "infinity"; 851 static const char infinityU[] = "INFINITY"; 852 static const char NaNL[] = "nan"; 853 static const char NaNU[] = "NAN"; 854 855 /* Write out an integerPart in hexadecimal, starting with the most 856 significant nibble. Write out exactly COUNT hexdigits, return 857 COUNT. */ 858 static unsigned int 859 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 860 const char *hexDigitChars) 861 { 862 unsigned int result = count; 863 864 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 865 866 part >>= (APFloatBase::integerPartWidth - 4 * count); 867 while (count--) { 868 dst[count] = hexDigitChars[part & 0xf]; 869 part >>= 4; 870 } 871 872 return result; 873 } 874 875 /* Write out an unsigned decimal integer. */ 876 static char * 877 writeUnsignedDecimal (char *dst, unsigned int n) 878 { 879 char buff[40], *p; 880 881 p = buff; 882 do 883 *p++ = '0' + n % 10; 884 while (n /= 10); 885 886 do 887 *dst++ = *--p; 888 while (p != buff); 889 890 return dst; 891 } 892 893 /* Write out a signed decimal integer. */ 894 static char * 895 writeSignedDecimal (char *dst, int value) 896 { 897 if (value < 0) { 898 *dst++ = '-'; 899 dst = writeUnsignedDecimal(dst, -(unsigned) value); 900 } else 901 dst = writeUnsignedDecimal(dst, value); 902 903 return dst; 904 } 905 906 namespace detail { 907 /* Constructors. */ 908 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 909 unsigned int count; 910 911 semantics = ourSemantics; 912 count = partCount(); 913 if (count > 1) 914 significand.parts = new integerPart[count]; 915 } 916 917 void IEEEFloat::freeSignificand() { 918 if (needsCleanup()) 919 delete [] significand.parts; 920 } 921 922 void IEEEFloat::assign(const IEEEFloat &rhs) { 923 assert(semantics == rhs.semantics); 924 925 sign = rhs.sign; 926 category = rhs.category; 927 exponent = rhs.exponent; 928 if (isFiniteNonZero() || category == fcNaN) 929 copySignificand(rhs); 930 } 931 932 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 933 assert(isFiniteNonZero() || category == fcNaN); 934 assert(rhs.partCount() >= partCount()); 935 936 APInt::tcAssign(significandParts(), rhs.significandParts(), 937 partCount()); 938 } 939 940 /* Make this number a NaN, with an arbitrary but deterministic value 941 for the significand. If double or longer, this is a signalling NaN, 942 which may not be ideal. If float, this is QNaN(0). */ 943 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 944 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 945 llvm_unreachable("This floating point format does not support NaN"); 946 947 if (Negative && !semantics->hasSignedRepr) 948 llvm_unreachable( 949 "This floating point format does not support signed values"); 950 951 category = fcNaN; 952 sign = Negative; 953 exponent = exponentNaN(); 954 955 integerPart *significand = significandParts(); 956 unsigned numParts = partCount(); 957 958 APInt fill_storage; 959 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 960 // Finite-only types do not distinguish signalling and quiet NaN, so 961 // make them all signalling. 962 SNaN = false; 963 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 964 sign = true; 965 fill_storage = APInt::getZero(semantics->precision - 1); 966 } else { 967 fill_storage = APInt::getAllOnes(semantics->precision - 1); 968 } 969 fill = &fill_storage; 970 } 971 972 // Set the significand bits to the fill. 973 if (!fill || fill->getNumWords() < numParts) 974 APInt::tcSet(significand, 0, numParts); 975 if (fill) { 976 APInt::tcAssign(significand, fill->getRawData(), 977 std::min(fill->getNumWords(), numParts)); 978 979 // Zero out the excess bits of the significand. 980 unsigned bitsToPreserve = semantics->precision - 1; 981 unsigned part = bitsToPreserve / 64; 982 bitsToPreserve %= 64; 983 significand[part] &= ((1ULL << bitsToPreserve) - 1); 984 for (part++; part != numParts; ++part) 985 significand[part] = 0; 986 } 987 988 unsigned QNaNBit = 989 (semantics->precision >= 2) ? (semantics->precision - 2) : 0; 990 991 if (SNaN) { 992 // We always have to clear the QNaN bit to make it an SNaN. 993 APInt::tcClearBit(significand, QNaNBit); 994 995 // If there are no bits set in the payload, we have to set 996 // *something* to make it a NaN instead of an infinity; 997 // conventionally, this is the next bit down from the QNaN bit. 998 if (APInt::tcIsZero(significand, numParts)) 999 APInt::tcSetBit(significand, QNaNBit - 1); 1000 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 1001 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 1002 // Do nothing. 1003 } else { 1004 // We always have to set the QNaN bit to make it a QNaN. 1005 APInt::tcSetBit(significand, QNaNBit); 1006 } 1007 1008 // For x87 extended precision, we want to make a NaN, not a 1009 // pseudo-NaN. Maybe we should expose the ability to make 1010 // pseudo-NaNs? 1011 if (semantics == &semX87DoubleExtended) 1012 APInt::tcSetBit(significand, QNaNBit + 1); 1013 } 1014 1015 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 1016 if (this != &rhs) { 1017 if (semantics != rhs.semantics) { 1018 freeSignificand(); 1019 initialize(rhs.semantics); 1020 } 1021 assign(rhs); 1022 } 1023 1024 return *this; 1025 } 1026 1027 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 1028 freeSignificand(); 1029 1030 semantics = rhs.semantics; 1031 significand = rhs.significand; 1032 exponent = rhs.exponent; 1033 category = rhs.category; 1034 sign = rhs.sign; 1035 1036 rhs.semantics = &semBogus; 1037 return *this; 1038 } 1039 1040 bool IEEEFloat::isDenormal() const { 1041 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1042 (APInt::tcExtractBit(significandParts(), 1043 semantics->precision - 1) == 0); 1044 } 1045 1046 bool IEEEFloat::isSmallest() const { 1047 // The smallest number by magnitude in our format will be the smallest 1048 // denormal, i.e. the floating point number with exponent being minimum 1049 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1050 return isFiniteNonZero() && exponent == semantics->minExponent && 1051 significandMSB() == 0; 1052 } 1053 1054 bool IEEEFloat::isSmallestNormalized() const { 1055 return getCategory() == fcNormal && exponent == semantics->minExponent && 1056 isSignificandAllZerosExceptMSB(); 1057 } 1058 1059 unsigned int IEEEFloat::getNumHighBits() const { 1060 const unsigned int PartCount = partCountForBits(semantics->precision); 1061 const unsigned int Bits = PartCount * integerPartWidth; 1062 1063 // Compute how many bits are used in the final word. 1064 // When precision is just 1, it represents the 'Pth' 1065 // Precision bit and not the actual significand bit. 1066 const unsigned int NumHighBits = (semantics->precision > 1) 1067 ? (Bits - semantics->precision + 1) 1068 : (Bits - semantics->precision); 1069 return NumHighBits; 1070 } 1071 1072 bool IEEEFloat::isSignificandAllOnes() const { 1073 // Test if the significand excluding the integral bit is all ones. This allows 1074 // us to test for binade boundaries. 1075 const integerPart *Parts = significandParts(); 1076 const unsigned PartCount = partCountForBits(semantics->precision); 1077 for (unsigned i = 0; i < PartCount - 1; i++) 1078 if (~Parts[i]) 1079 return false; 1080 1081 // Set the unused high bits to all ones when we compare. 1082 const unsigned NumHighBits = getNumHighBits(); 1083 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1084 "Can not have more high bits to fill than integerPartWidth"); 1085 const integerPart HighBitFill = 1086 ~integerPart(0) << (integerPartWidth - NumHighBits); 1087 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill))) 1088 return false; 1089 1090 return true; 1091 } 1092 1093 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1094 // Test if the significand excluding the integral bit is all ones except for 1095 // the least significant bit. 1096 const integerPart *Parts = significandParts(); 1097 1098 if (Parts[0] & 1) 1099 return false; 1100 1101 const unsigned PartCount = partCountForBits(semantics->precision); 1102 for (unsigned i = 0; i < PartCount - 1; i++) { 1103 if (~Parts[i] & ~unsigned{!i}) 1104 return false; 1105 } 1106 1107 // Set the unused high bits to all ones when we compare. 1108 const unsigned NumHighBits = getNumHighBits(); 1109 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1110 "Can not have more high bits to fill than integerPartWidth"); 1111 const integerPart HighBitFill = ~integerPart(0) 1112 << (integerPartWidth - NumHighBits); 1113 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1114 return false; 1115 1116 return true; 1117 } 1118 1119 bool IEEEFloat::isSignificandAllZeros() const { 1120 // Test if the significand excluding the integral bit is all zeros. This 1121 // allows us to test for binade boundaries. 1122 const integerPart *Parts = significandParts(); 1123 const unsigned PartCount = partCountForBits(semantics->precision); 1124 1125 for (unsigned i = 0; i < PartCount - 1; i++) 1126 if (Parts[i]) 1127 return false; 1128 1129 // Compute how many bits are used in the final word. 1130 const unsigned NumHighBits = getNumHighBits(); 1131 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1132 "clear than integerPartWidth"); 1133 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1134 1135 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask)) 1136 return false; 1137 1138 return true; 1139 } 1140 1141 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1142 const integerPart *Parts = significandParts(); 1143 const unsigned PartCount = partCountForBits(semantics->precision); 1144 1145 for (unsigned i = 0; i < PartCount - 1; i++) { 1146 if (Parts[i]) 1147 return false; 1148 } 1149 1150 const unsigned NumHighBits = getNumHighBits(); 1151 const integerPart MSBMask = integerPart(1) 1152 << (integerPartWidth - NumHighBits); 1153 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask)); 1154 } 1155 1156 bool IEEEFloat::isLargest() const { 1157 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent; 1158 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1159 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1160 // The largest number by magnitude in our format will be the floating point 1161 // number with maximum exponent and with significand that is all ones except 1162 // the LSB. 1163 return (IsMaxExp && APFloat::hasSignificand(*semantics)) 1164 ? isSignificandAllOnesExceptLSB() 1165 : IsMaxExp; 1166 } else { 1167 // The largest number by magnitude in our format will be the floating point 1168 // number with maximum exponent and with significand that is all ones. 1169 return IsMaxExp && isSignificandAllOnes(); 1170 } 1171 } 1172 1173 bool IEEEFloat::isInteger() const { 1174 // This could be made more efficient; I'm going for obviously correct. 1175 if (!isFinite()) return false; 1176 IEEEFloat truncated = *this; 1177 truncated.roundToIntegral(rmTowardZero); 1178 return compare(truncated) == cmpEqual; 1179 } 1180 1181 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1182 if (this == &rhs) 1183 return true; 1184 if (semantics != rhs.semantics || 1185 category != rhs.category || 1186 sign != rhs.sign) 1187 return false; 1188 if (category==fcZero || category==fcInfinity) 1189 return true; 1190 1191 if (isFiniteNonZero() && exponent != rhs.exponent) 1192 return false; 1193 1194 return std::equal(significandParts(), significandParts() + partCount(), 1195 rhs.significandParts()); 1196 } 1197 1198 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1199 initialize(&ourSemantics); 1200 sign = 0; 1201 category = fcNormal; 1202 zeroSignificand(); 1203 exponent = ourSemantics.precision - 1; 1204 significandParts()[0] = value; 1205 normalize(rmNearestTiesToEven, lfExactlyZero); 1206 } 1207 1208 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1209 initialize(&ourSemantics); 1210 // The Float8E8MOFNU format does not have a representation 1211 // for zero. So, use the closest representation instead. 1212 // Moreover, the all-zero encoding represents a valid 1213 // normal value (which is the smallestNormalized here). 1214 // Hence, we call makeSmallestNormalized (where category is 1215 // 'fcNormal') instead of makeZero (where category is 'fcZero'). 1216 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false); 1217 } 1218 1219 // Delegate to the previous constructor, because later copy constructor may 1220 // actually inspects category, which can't be garbage. 1221 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1222 : IEEEFloat(ourSemantics) {} 1223 1224 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1225 initialize(rhs.semantics); 1226 assign(rhs); 1227 } 1228 1229 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1230 *this = std::move(rhs); 1231 } 1232 1233 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1234 1235 unsigned int IEEEFloat::partCount() const { 1236 return partCountForBits(semantics->precision + 1); 1237 } 1238 1239 const APFloat::integerPart *IEEEFloat::significandParts() const { 1240 return const_cast<IEEEFloat *>(this)->significandParts(); 1241 } 1242 1243 APFloat::integerPart *IEEEFloat::significandParts() { 1244 if (partCount() > 1) 1245 return significand.parts; 1246 else 1247 return &significand.part; 1248 } 1249 1250 void IEEEFloat::zeroSignificand() { 1251 APInt::tcSet(significandParts(), 0, partCount()); 1252 } 1253 1254 /* Increment an fcNormal floating point number's significand. */ 1255 void IEEEFloat::incrementSignificand() { 1256 integerPart carry; 1257 1258 carry = APInt::tcIncrement(significandParts(), partCount()); 1259 1260 /* Our callers should never cause us to overflow. */ 1261 assert(carry == 0); 1262 (void)carry; 1263 } 1264 1265 /* Add the significand of the RHS. Returns the carry flag. */ 1266 APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1267 integerPart *parts; 1268 1269 parts = significandParts(); 1270 1271 assert(semantics == rhs.semantics); 1272 assert(exponent == rhs.exponent); 1273 1274 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1275 } 1276 1277 /* Subtract the significand of the RHS with a borrow flag. Returns 1278 the borrow flag. */ 1279 APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1280 integerPart borrow) { 1281 integerPart *parts; 1282 1283 parts = significandParts(); 1284 1285 assert(semantics == rhs.semantics); 1286 assert(exponent == rhs.exponent); 1287 1288 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1289 partCount()); 1290 } 1291 1292 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1293 on to the full-precision result of the multiplication. Returns the 1294 lost fraction. */ 1295 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1296 IEEEFloat addend, 1297 bool ignoreAddend) { 1298 unsigned int omsb; // One, not zero, based MSB. 1299 unsigned int partsCount, newPartsCount, precision; 1300 integerPart *lhsSignificand; 1301 integerPart scratch[4]; 1302 integerPart *fullSignificand; 1303 lostFraction lost_fraction; 1304 bool ignored; 1305 1306 assert(semantics == rhs.semantics); 1307 1308 precision = semantics->precision; 1309 1310 // Allocate space for twice as many bits as the original significand, plus one 1311 // extra bit for the addition to overflow into. 1312 newPartsCount = partCountForBits(precision * 2 + 1); 1313 1314 if (newPartsCount > 4) 1315 fullSignificand = new integerPart[newPartsCount]; 1316 else 1317 fullSignificand = scratch; 1318 1319 lhsSignificand = significandParts(); 1320 partsCount = partCount(); 1321 1322 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1323 rhs.significandParts(), partsCount, partsCount); 1324 1325 lost_fraction = lfExactlyZero; 1326 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1327 exponent += rhs.exponent; 1328 1329 // Assume the operands involved in the multiplication are single-precision 1330 // FP, and the two multiplicants are: 1331 // *this = a23 . a22 ... a0 * 2^e1 1332 // rhs = b23 . b22 ... b0 * 2^e2 1333 // the result of multiplication is: 1334 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1335 // Note that there are three significant bits at the left-hand side of the 1336 // radix point: two for the multiplication, and an overflow bit for the 1337 // addition (that will always be zero at this point). Move the radix point 1338 // toward left by two bits, and adjust exponent accordingly. 1339 exponent += 2; 1340 1341 if (!ignoreAddend && addend.isNonZero()) { 1342 // The intermediate result of the multiplication has "2 * precision" 1343 // signicant bit; adjust the addend to be consistent with mul result. 1344 // 1345 Significand savedSignificand = significand; 1346 const fltSemantics *savedSemantics = semantics; 1347 fltSemantics extendedSemantics; 1348 opStatus status; 1349 unsigned int extendedPrecision; 1350 1351 // Normalize our MSB to one below the top bit to allow for overflow. 1352 extendedPrecision = 2 * precision + 1; 1353 if (omsb != extendedPrecision - 1) { 1354 assert(extendedPrecision > omsb); 1355 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1356 (extendedPrecision - 1) - omsb); 1357 exponent -= (extendedPrecision - 1) - omsb; 1358 } 1359 1360 /* Create new semantics. */ 1361 extendedSemantics = *semantics; 1362 extendedSemantics.precision = extendedPrecision; 1363 1364 if (newPartsCount == 1) 1365 significand.part = fullSignificand[0]; 1366 else 1367 significand.parts = fullSignificand; 1368 semantics = &extendedSemantics; 1369 1370 // Make a copy so we can convert it to the extended semantics. 1371 // Note that we cannot convert the addend directly, as the extendedSemantics 1372 // is a local variable (which we take a reference to). 1373 IEEEFloat extendedAddend(addend); 1374 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero, 1375 &ignored); 1376 assert(status == APFloat::opOK); 1377 (void)status; 1378 1379 // Shift the significand of the addend right by one bit. This guarantees 1380 // that the high bit of the significand is zero (same as fullSignificand), 1381 // so the addition will overflow (if it does overflow at all) into the top bit. 1382 lost_fraction = extendedAddend.shiftSignificandRight(1); 1383 assert(lost_fraction == lfExactlyZero && 1384 "Lost precision while shifting addend for fused-multiply-add."); 1385 1386 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1387 1388 /* Restore our state. */ 1389 if (newPartsCount == 1) 1390 fullSignificand[0] = significand.part; 1391 significand = savedSignificand; 1392 semantics = savedSemantics; 1393 1394 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1395 } 1396 1397 // Convert the result having "2 * precision" significant-bits back to the one 1398 // having "precision" significant-bits. First, move the radix point from 1399 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1400 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1401 exponent -= precision + 1; 1402 1403 // In case MSB resides at the left-hand side of radix point, shift the 1404 // mantissa right by some amount to make sure the MSB reside right before 1405 // the radix point (i.e. "MSB . rest-significant-bits"). 1406 // 1407 // Note that the result is not normalized when "omsb < precision". So, the 1408 // caller needs to call IEEEFloat::normalize() if normalized value is 1409 // expected. 1410 if (omsb > precision) { 1411 unsigned int bits, significantParts; 1412 lostFraction lf; 1413 1414 bits = omsb - precision; 1415 significantParts = partCountForBits(omsb); 1416 lf = shiftRight(fullSignificand, significantParts, bits); 1417 lost_fraction = combineLostFractions(lf, lost_fraction); 1418 exponent += bits; 1419 } 1420 1421 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1422 1423 if (newPartsCount > 4) 1424 delete [] fullSignificand; 1425 1426 return lost_fraction; 1427 } 1428 1429 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1430 // When the given semantics has zero, the addend here is a zero. 1431 // i.e . it belongs to the 'fcZero' category. 1432 // But when the semantics does not support zero, we need to 1433 // explicitly convey that this addend should be ignored 1434 // for multiplication. 1435 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero); 1436 } 1437 1438 /* Multiply the significands of LHS and RHS to DST. */ 1439 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1440 unsigned int bit, i, partsCount; 1441 const integerPart *rhsSignificand; 1442 integerPart *lhsSignificand, *dividend, *divisor; 1443 integerPart scratch[4]; 1444 lostFraction lost_fraction; 1445 1446 assert(semantics == rhs.semantics); 1447 1448 lhsSignificand = significandParts(); 1449 rhsSignificand = rhs.significandParts(); 1450 partsCount = partCount(); 1451 1452 if (partsCount > 2) 1453 dividend = new integerPart[partsCount * 2]; 1454 else 1455 dividend = scratch; 1456 1457 divisor = dividend + partsCount; 1458 1459 /* Copy the dividend and divisor as they will be modified in-place. */ 1460 for (i = 0; i < partsCount; i++) { 1461 dividend[i] = lhsSignificand[i]; 1462 divisor[i] = rhsSignificand[i]; 1463 lhsSignificand[i] = 0; 1464 } 1465 1466 exponent -= rhs.exponent; 1467 1468 unsigned int precision = semantics->precision; 1469 1470 /* Normalize the divisor. */ 1471 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1472 if (bit) { 1473 exponent += bit; 1474 APInt::tcShiftLeft(divisor, partsCount, bit); 1475 } 1476 1477 /* Normalize the dividend. */ 1478 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1479 if (bit) { 1480 exponent -= bit; 1481 APInt::tcShiftLeft(dividend, partsCount, bit); 1482 } 1483 1484 /* Ensure the dividend >= divisor initially for the loop below. 1485 Incidentally, this means that the division loop below is 1486 guaranteed to set the integer bit to one. */ 1487 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1488 exponent--; 1489 APInt::tcShiftLeft(dividend, partsCount, 1); 1490 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1491 } 1492 1493 /* Long division. */ 1494 for (bit = precision; bit; bit -= 1) { 1495 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1496 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1497 APInt::tcSetBit(lhsSignificand, bit - 1); 1498 } 1499 1500 APInt::tcShiftLeft(dividend, partsCount, 1); 1501 } 1502 1503 /* Figure out the lost fraction. */ 1504 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1505 1506 if (cmp > 0) 1507 lost_fraction = lfMoreThanHalf; 1508 else if (cmp == 0) 1509 lost_fraction = lfExactlyHalf; 1510 else if (APInt::tcIsZero(dividend, partsCount)) 1511 lost_fraction = lfExactlyZero; 1512 else 1513 lost_fraction = lfLessThanHalf; 1514 1515 if (partsCount > 2) 1516 delete [] dividend; 1517 1518 return lost_fraction; 1519 } 1520 1521 unsigned int IEEEFloat::significandMSB() const { 1522 return APInt::tcMSB(significandParts(), partCount()); 1523 } 1524 1525 unsigned int IEEEFloat::significandLSB() const { 1526 return APInt::tcLSB(significandParts(), partCount()); 1527 } 1528 1529 /* Note that a zero result is NOT normalized to fcZero. */ 1530 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1531 /* Our exponent should not overflow. */ 1532 assert((ExponentType) (exponent + bits) >= exponent); 1533 1534 exponent += bits; 1535 1536 return shiftRight(significandParts(), partCount(), bits); 1537 } 1538 1539 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1540 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1541 assert(bits < semantics->precision || 1542 (semantics->precision == 1 && bits <= 1)); 1543 1544 if (bits) { 1545 unsigned int partsCount = partCount(); 1546 1547 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1548 exponent -= bits; 1549 1550 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1551 } 1552 } 1553 1554 APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1555 int compare; 1556 1557 assert(semantics == rhs.semantics); 1558 assert(isFiniteNonZero()); 1559 assert(rhs.isFiniteNonZero()); 1560 1561 compare = exponent - rhs.exponent; 1562 1563 /* If exponents are equal, do an unsigned bignum comparison of the 1564 significands. */ 1565 if (compare == 0) 1566 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1567 partCount()); 1568 1569 if (compare > 0) 1570 return cmpGreaterThan; 1571 else if (compare < 0) 1572 return cmpLessThan; 1573 else 1574 return cmpEqual; 1575 } 1576 1577 /* Set the least significant BITS bits of a bignum, clear the 1578 rest. */ 1579 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1580 unsigned bits) { 1581 unsigned i = 0; 1582 while (bits > APInt::APINT_BITS_PER_WORD) { 1583 dst[i++] = ~(APInt::WordType)0; 1584 bits -= APInt::APINT_BITS_PER_WORD; 1585 } 1586 1587 if (bits) 1588 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1589 1590 while (i < parts) 1591 dst[i++] = 0; 1592 } 1593 1594 /* Handle overflow. Sign is preserved. We either become infinity or 1595 the largest finite number. */ 1596 APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1597 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1598 /* Infinity? */ 1599 if (rounding_mode == rmNearestTiesToEven || 1600 rounding_mode == rmNearestTiesToAway || 1601 (rounding_mode == rmTowardPositive && !sign) || 1602 (rounding_mode == rmTowardNegative && sign)) { 1603 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1604 makeNaN(false, sign); 1605 else 1606 category = fcInfinity; 1607 return static_cast<opStatus>(opOverflow | opInexact); 1608 } 1609 } 1610 1611 /* Otherwise we become the largest finite number. */ 1612 category = fcNormal; 1613 exponent = semantics->maxExponent; 1614 tcSetLeastSignificantBits(significandParts(), partCount(), 1615 semantics->precision); 1616 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1617 semantics->nanEncoding == fltNanEncoding::AllOnes) 1618 APInt::tcClearBit(significandParts(), 0); 1619 1620 return opInexact; 1621 } 1622 1623 /* Returns TRUE if, when truncating the current number, with BIT the 1624 new LSB, with the given lost fraction and rounding mode, the result 1625 would need to be rounded away from zero (i.e., by increasing the 1626 signficand). This routine must work for fcZero of both signs, and 1627 fcNormal numbers. */ 1628 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1629 lostFraction lost_fraction, 1630 unsigned int bit) const { 1631 /* NaNs and infinities should not have lost fractions. */ 1632 assert(isFiniteNonZero() || category == fcZero); 1633 1634 /* Current callers never pass this so we don't handle it. */ 1635 assert(lost_fraction != lfExactlyZero); 1636 1637 switch (rounding_mode) { 1638 case rmNearestTiesToAway: 1639 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1640 1641 case rmNearestTiesToEven: 1642 if (lost_fraction == lfMoreThanHalf) 1643 return true; 1644 1645 /* Our zeroes don't have a significand to test. */ 1646 if (lost_fraction == lfExactlyHalf && category != fcZero) 1647 return APInt::tcExtractBit(significandParts(), bit); 1648 1649 return false; 1650 1651 case rmTowardZero: 1652 return false; 1653 1654 case rmTowardPositive: 1655 return !sign; 1656 1657 case rmTowardNegative: 1658 return sign; 1659 1660 default: 1661 break; 1662 } 1663 llvm_unreachable("Invalid rounding mode found"); 1664 } 1665 1666 APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1667 lostFraction lost_fraction) { 1668 unsigned int omsb; /* One, not zero, based MSB. */ 1669 int exponentChange; 1670 1671 if (!isFiniteNonZero()) 1672 return opOK; 1673 1674 /* Before rounding normalize the exponent of fcNormal numbers. */ 1675 omsb = significandMSB() + 1; 1676 1677 if (omsb) { 1678 /* OMSB is numbered from 1. We want to place it in the integer 1679 bit numbered PRECISION if possible, with a compensating change in 1680 the exponent. */ 1681 exponentChange = omsb - semantics->precision; 1682 1683 /* If the resulting exponent is too high, overflow according to 1684 the rounding mode. */ 1685 if (exponent + exponentChange > semantics->maxExponent) 1686 return handleOverflow(rounding_mode); 1687 1688 /* Subnormal numbers have exponent minExponent, and their MSB 1689 is forced based on that. */ 1690 if (exponent + exponentChange < semantics->minExponent) 1691 exponentChange = semantics->minExponent - exponent; 1692 1693 /* Shifting left is easy as we don't lose precision. */ 1694 if (exponentChange < 0) { 1695 assert(lost_fraction == lfExactlyZero); 1696 1697 shiftSignificandLeft(-exponentChange); 1698 1699 return opOK; 1700 } 1701 1702 if (exponentChange > 0) { 1703 lostFraction lf; 1704 1705 /* Shift right and capture any new lost fraction. */ 1706 lf = shiftSignificandRight(exponentChange); 1707 1708 lost_fraction = combineLostFractions(lf, lost_fraction); 1709 1710 /* Keep OMSB up-to-date. */ 1711 if (omsb > (unsigned) exponentChange) 1712 omsb -= exponentChange; 1713 else 1714 omsb = 0; 1715 } 1716 } 1717 1718 // The all-ones values is an overflow if NaN is all ones. If NaN is 1719 // represented by negative zero, then it is a valid finite value. 1720 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1721 semantics->nanEncoding == fltNanEncoding::AllOnes && 1722 exponent == semantics->maxExponent && isSignificandAllOnes()) 1723 return handleOverflow(rounding_mode); 1724 1725 /* Now round the number according to rounding_mode given the lost 1726 fraction. */ 1727 1728 /* As specified in IEEE 754, since we do not trap we do not report 1729 underflow for exact results. */ 1730 if (lost_fraction == lfExactlyZero) { 1731 /* Canonicalize zeroes. */ 1732 if (omsb == 0) { 1733 category = fcZero; 1734 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1735 sign = false; 1736 if (!semantics->hasZero) 1737 makeSmallestNormalized(false); 1738 } 1739 1740 return opOK; 1741 } 1742 1743 /* Increment the significand if we're rounding away from zero. */ 1744 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1745 if (omsb == 0) 1746 exponent = semantics->minExponent; 1747 1748 incrementSignificand(); 1749 omsb = significandMSB() + 1; 1750 1751 /* Did the significand increment overflow? */ 1752 if (omsb == (unsigned) semantics->precision + 1) { 1753 /* Renormalize by incrementing the exponent and shifting our 1754 significand right one. However if we already have the 1755 maximum exponent we overflow to infinity. */ 1756 if (exponent == semantics->maxExponent) 1757 // Invoke overflow handling with a rounding mode that will guarantee 1758 // that the result gets turned into the correct infinity representation. 1759 // This is needed instead of just setting the category to infinity to 1760 // account for 8-bit floating point types that have no inf, only NaN. 1761 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1762 1763 shiftSignificandRight(1); 1764 1765 return opInexact; 1766 } 1767 1768 // The all-ones values is an overflow if NaN is all ones. If NaN is 1769 // represented by negative zero, then it is a valid finite value. 1770 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1771 semantics->nanEncoding == fltNanEncoding::AllOnes && 1772 exponent == semantics->maxExponent && isSignificandAllOnes()) 1773 return handleOverflow(rounding_mode); 1774 } 1775 1776 /* The normal case - we were and are not denormal, and any 1777 significand increment above didn't overflow. */ 1778 if (omsb == semantics->precision) 1779 return opInexact; 1780 1781 /* We have a non-zero denormal. */ 1782 assert(omsb < semantics->precision); 1783 1784 /* Canonicalize zeroes. */ 1785 if (omsb == 0) { 1786 category = fcZero; 1787 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1788 sign = false; 1789 // This condition handles the case where the semantics 1790 // does not have zero but uses the all-zero encoding 1791 // to represent the smallest normal value. 1792 if (!semantics->hasZero) 1793 makeSmallestNormalized(false); 1794 } 1795 1796 /* The fcZero case is a denormal that underflowed to zero. */ 1797 return (opStatus) (opUnderflow | opInexact); 1798 } 1799 1800 APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1801 bool subtract) { 1802 switch (PackCategoriesIntoKey(category, rhs.category)) { 1803 default: 1804 llvm_unreachable(nullptr); 1805 1806 case PackCategoriesIntoKey(fcZero, fcNaN): 1807 case PackCategoriesIntoKey(fcNormal, fcNaN): 1808 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1809 assign(rhs); 1810 [[fallthrough]]; 1811 case PackCategoriesIntoKey(fcNaN, fcZero): 1812 case PackCategoriesIntoKey(fcNaN, fcNormal): 1813 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1814 case PackCategoriesIntoKey(fcNaN, fcNaN): 1815 if (isSignaling()) { 1816 makeQuiet(); 1817 return opInvalidOp; 1818 } 1819 return rhs.isSignaling() ? opInvalidOp : opOK; 1820 1821 case PackCategoriesIntoKey(fcNormal, fcZero): 1822 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1823 case PackCategoriesIntoKey(fcInfinity, fcZero): 1824 return opOK; 1825 1826 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1827 case PackCategoriesIntoKey(fcZero, fcInfinity): 1828 category = fcInfinity; 1829 sign = rhs.sign ^ subtract; 1830 return opOK; 1831 1832 case PackCategoriesIntoKey(fcZero, fcNormal): 1833 assign(rhs); 1834 sign = rhs.sign ^ subtract; 1835 return opOK; 1836 1837 case PackCategoriesIntoKey(fcZero, fcZero): 1838 /* Sign depends on rounding mode; handled by caller. */ 1839 return opOK; 1840 1841 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1842 /* Differently signed infinities can only be validly 1843 subtracted. */ 1844 if (((sign ^ rhs.sign)!=0) != subtract) { 1845 makeNaN(); 1846 return opInvalidOp; 1847 } 1848 1849 return opOK; 1850 1851 case PackCategoriesIntoKey(fcNormal, fcNormal): 1852 return opDivByZero; 1853 } 1854 } 1855 1856 /* Add or subtract two normal numbers. */ 1857 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1858 bool subtract) { 1859 integerPart carry; 1860 lostFraction lost_fraction; 1861 int bits; 1862 1863 /* Determine if the operation on the absolute values is effectively 1864 an addition or subtraction. */ 1865 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1866 1867 /* Are we bigger exponent-wise than the RHS? */ 1868 bits = exponent - rhs.exponent; 1869 1870 /* Subtraction is more subtle than one might naively expect. */ 1871 if (subtract) { 1872 if ((bits < 0) && !semantics->hasSignedRepr) 1873 llvm_unreachable( 1874 "This floating point format does not support signed values"); 1875 1876 IEEEFloat temp_rhs(rhs); 1877 1878 if (bits == 0) 1879 lost_fraction = lfExactlyZero; 1880 else if (bits > 0) { 1881 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1882 shiftSignificandLeft(1); 1883 } else { 1884 lost_fraction = shiftSignificandRight(-bits - 1); 1885 temp_rhs.shiftSignificandLeft(1); 1886 } 1887 1888 // Should we reverse the subtraction. 1889 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1890 carry = temp_rhs.subtractSignificand 1891 (*this, lost_fraction != lfExactlyZero); 1892 copySignificand(temp_rhs); 1893 sign = !sign; 1894 } else { 1895 carry = subtractSignificand 1896 (temp_rhs, lost_fraction != lfExactlyZero); 1897 } 1898 1899 /* Invert the lost fraction - it was on the RHS and 1900 subtracted. */ 1901 if (lost_fraction == lfLessThanHalf) 1902 lost_fraction = lfMoreThanHalf; 1903 else if (lost_fraction == lfMoreThanHalf) 1904 lost_fraction = lfLessThanHalf; 1905 1906 /* The code above is intended to ensure that no borrow is 1907 necessary. */ 1908 assert(!carry); 1909 (void)carry; 1910 } else { 1911 if (bits > 0) { 1912 IEEEFloat temp_rhs(rhs); 1913 1914 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1915 carry = addSignificand(temp_rhs); 1916 } else { 1917 lost_fraction = shiftSignificandRight(-bits); 1918 carry = addSignificand(rhs); 1919 } 1920 1921 /* We have a guard bit; generating a carry cannot happen. */ 1922 assert(!carry); 1923 (void)carry; 1924 } 1925 1926 return lost_fraction; 1927 } 1928 1929 APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1930 switch (PackCategoriesIntoKey(category, rhs.category)) { 1931 default: 1932 llvm_unreachable(nullptr); 1933 1934 case PackCategoriesIntoKey(fcZero, fcNaN): 1935 case PackCategoriesIntoKey(fcNormal, fcNaN): 1936 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1937 assign(rhs); 1938 sign = false; 1939 [[fallthrough]]; 1940 case PackCategoriesIntoKey(fcNaN, fcZero): 1941 case PackCategoriesIntoKey(fcNaN, fcNormal): 1942 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1943 case PackCategoriesIntoKey(fcNaN, fcNaN): 1944 sign ^= rhs.sign; // restore the original sign 1945 if (isSignaling()) { 1946 makeQuiet(); 1947 return opInvalidOp; 1948 } 1949 return rhs.isSignaling() ? opInvalidOp : opOK; 1950 1951 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1952 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1953 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1954 category = fcInfinity; 1955 return opOK; 1956 1957 case PackCategoriesIntoKey(fcZero, fcNormal): 1958 case PackCategoriesIntoKey(fcNormal, fcZero): 1959 case PackCategoriesIntoKey(fcZero, fcZero): 1960 category = fcZero; 1961 return opOK; 1962 1963 case PackCategoriesIntoKey(fcZero, fcInfinity): 1964 case PackCategoriesIntoKey(fcInfinity, fcZero): 1965 makeNaN(); 1966 return opInvalidOp; 1967 1968 case PackCategoriesIntoKey(fcNormal, fcNormal): 1969 return opOK; 1970 } 1971 } 1972 1973 APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1974 switch (PackCategoriesIntoKey(category, rhs.category)) { 1975 default: 1976 llvm_unreachable(nullptr); 1977 1978 case PackCategoriesIntoKey(fcZero, fcNaN): 1979 case PackCategoriesIntoKey(fcNormal, fcNaN): 1980 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1981 assign(rhs); 1982 sign = false; 1983 [[fallthrough]]; 1984 case PackCategoriesIntoKey(fcNaN, fcZero): 1985 case PackCategoriesIntoKey(fcNaN, fcNormal): 1986 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1987 case PackCategoriesIntoKey(fcNaN, fcNaN): 1988 sign ^= rhs.sign; // restore the original sign 1989 if (isSignaling()) { 1990 makeQuiet(); 1991 return opInvalidOp; 1992 } 1993 return rhs.isSignaling() ? opInvalidOp : opOK; 1994 1995 case PackCategoriesIntoKey(fcInfinity, fcZero): 1996 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1997 case PackCategoriesIntoKey(fcZero, fcInfinity): 1998 case PackCategoriesIntoKey(fcZero, fcNormal): 1999 return opOK; 2000 2001 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2002 category = fcZero; 2003 return opOK; 2004 2005 case PackCategoriesIntoKey(fcNormal, fcZero): 2006 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 2007 makeNaN(false, sign); 2008 else 2009 category = fcInfinity; 2010 return opDivByZero; 2011 2012 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2013 case PackCategoriesIntoKey(fcZero, fcZero): 2014 makeNaN(); 2015 return opInvalidOp; 2016 2017 case PackCategoriesIntoKey(fcNormal, fcNormal): 2018 return opOK; 2019 } 2020 } 2021 2022 APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 2023 switch (PackCategoriesIntoKey(category, rhs.category)) { 2024 default: 2025 llvm_unreachable(nullptr); 2026 2027 case PackCategoriesIntoKey(fcZero, fcNaN): 2028 case PackCategoriesIntoKey(fcNormal, fcNaN): 2029 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2030 assign(rhs); 2031 [[fallthrough]]; 2032 case PackCategoriesIntoKey(fcNaN, fcZero): 2033 case PackCategoriesIntoKey(fcNaN, fcNormal): 2034 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2035 case PackCategoriesIntoKey(fcNaN, fcNaN): 2036 if (isSignaling()) { 2037 makeQuiet(); 2038 return opInvalidOp; 2039 } 2040 return rhs.isSignaling() ? opInvalidOp : opOK; 2041 2042 case PackCategoriesIntoKey(fcZero, fcInfinity): 2043 case PackCategoriesIntoKey(fcZero, fcNormal): 2044 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2045 return opOK; 2046 2047 case PackCategoriesIntoKey(fcNormal, fcZero): 2048 case PackCategoriesIntoKey(fcInfinity, fcZero): 2049 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2050 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2051 case PackCategoriesIntoKey(fcZero, fcZero): 2052 makeNaN(); 2053 return opInvalidOp; 2054 2055 case PackCategoriesIntoKey(fcNormal, fcNormal): 2056 return opOK; 2057 } 2058 } 2059 2060 APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 2061 switch (PackCategoriesIntoKey(category, rhs.category)) { 2062 default: 2063 llvm_unreachable(nullptr); 2064 2065 case PackCategoriesIntoKey(fcZero, fcNaN): 2066 case PackCategoriesIntoKey(fcNormal, fcNaN): 2067 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2068 assign(rhs); 2069 [[fallthrough]]; 2070 case PackCategoriesIntoKey(fcNaN, fcZero): 2071 case PackCategoriesIntoKey(fcNaN, fcNormal): 2072 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2073 case PackCategoriesIntoKey(fcNaN, fcNaN): 2074 if (isSignaling()) { 2075 makeQuiet(); 2076 return opInvalidOp; 2077 } 2078 return rhs.isSignaling() ? opInvalidOp : opOK; 2079 2080 case PackCategoriesIntoKey(fcZero, fcInfinity): 2081 case PackCategoriesIntoKey(fcZero, fcNormal): 2082 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2083 return opOK; 2084 2085 case PackCategoriesIntoKey(fcNormal, fcZero): 2086 case PackCategoriesIntoKey(fcInfinity, fcZero): 2087 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2088 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2089 case PackCategoriesIntoKey(fcZero, fcZero): 2090 makeNaN(); 2091 return opInvalidOp; 2092 2093 case PackCategoriesIntoKey(fcNormal, fcNormal): 2094 return opDivByZero; // fake status, indicating this is not a special case 2095 } 2096 } 2097 2098 /* Change sign. */ 2099 void IEEEFloat::changeSign() { 2100 // With NaN-as-negative-zero, neither NaN or negative zero can change 2101 // their signs. 2102 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2103 (isZero() || isNaN())) 2104 return; 2105 /* Look mummy, this one's easy. */ 2106 sign = !sign; 2107 } 2108 2109 /* Normalized addition or subtraction. */ 2110 APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2111 roundingMode rounding_mode, 2112 bool subtract) { 2113 opStatus fs; 2114 2115 fs = addOrSubtractSpecials(rhs, subtract); 2116 2117 /* This return code means it was not a simple case. */ 2118 if (fs == opDivByZero) { 2119 lostFraction lost_fraction; 2120 2121 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2122 fs = normalize(rounding_mode, lost_fraction); 2123 2124 /* Can only be zero if we lost no fraction. */ 2125 assert(category != fcZero || lost_fraction == lfExactlyZero); 2126 } 2127 2128 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2129 positive zero unless rounding to minus infinity, except that 2130 adding two like-signed zeroes gives that zero. */ 2131 if (category == fcZero) { 2132 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2133 sign = (rounding_mode == rmTowardNegative); 2134 // NaN-in-negative-zero means zeros need to be normalized to +0. 2135 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2136 sign = false; 2137 } 2138 2139 return fs; 2140 } 2141 2142 /* Normalized addition. */ 2143 APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2144 roundingMode rounding_mode) { 2145 return addOrSubtract(rhs, rounding_mode, false); 2146 } 2147 2148 /* Normalized subtraction. */ 2149 APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2150 roundingMode rounding_mode) { 2151 return addOrSubtract(rhs, rounding_mode, true); 2152 } 2153 2154 /* Normalized multiply. */ 2155 APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2156 roundingMode rounding_mode) { 2157 opStatus fs; 2158 2159 sign ^= rhs.sign; 2160 fs = multiplySpecials(rhs); 2161 2162 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2163 sign = false; 2164 if (isFiniteNonZero()) { 2165 lostFraction lost_fraction = multiplySignificand(rhs); 2166 fs = normalize(rounding_mode, lost_fraction); 2167 if (lost_fraction != lfExactlyZero) 2168 fs = (opStatus) (fs | opInexact); 2169 } 2170 2171 return fs; 2172 } 2173 2174 /* Normalized divide. */ 2175 APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2176 roundingMode rounding_mode) { 2177 opStatus fs; 2178 2179 sign ^= rhs.sign; 2180 fs = divideSpecials(rhs); 2181 2182 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2183 sign = false; 2184 if (isFiniteNonZero()) { 2185 lostFraction lost_fraction = divideSignificand(rhs); 2186 fs = normalize(rounding_mode, lost_fraction); 2187 if (lost_fraction != lfExactlyZero) 2188 fs = (opStatus) (fs | opInexact); 2189 } 2190 2191 return fs; 2192 } 2193 2194 /* Normalized remainder. */ 2195 APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2196 opStatus fs; 2197 unsigned int origSign = sign; 2198 2199 // First handle the special cases. 2200 fs = remainderSpecials(rhs); 2201 if (fs != opDivByZero) 2202 return fs; 2203 2204 fs = opOK; 2205 2206 // Make sure the current value is less than twice the denom. If the addition 2207 // did not succeed (an overflow has happened), which means that the finite 2208 // value we currently posses must be less than twice the denom (as we are 2209 // using the same semantics). 2210 IEEEFloat P2 = rhs; 2211 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2212 fs = mod(P2); 2213 assert(fs == opOK); 2214 } 2215 2216 // Lets work with absolute numbers. 2217 IEEEFloat P = rhs; 2218 P.sign = false; 2219 sign = false; 2220 2221 // 2222 // To calculate the remainder we use the following scheme. 2223 // 2224 // The remainder is defained as follows: 2225 // 2226 // remainder = numer - rquot * denom = x - r * p 2227 // 2228 // Where r is the result of: x/p, rounded toward the nearest integral value 2229 // (with halfway cases rounded toward the even number). 2230 // 2231 // Currently, (after x mod 2p): 2232 // r is the number of 2p's present inside x, which is inherently, an even 2233 // number of p's. 2234 // 2235 // We may split the remaining calculation into 4 options: 2236 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2237 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2238 // are done as well. 2239 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2240 // to subtract 1p at least once. 2241 // - if x >= p then we must subtract p at least once, as x must be a 2242 // remainder. 2243 // 2244 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2245 // 2246 // We can now split the remaining calculation to the following 3 options: 2247 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2248 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2249 // must round up to the next even number. so we must subtract p once more. 2250 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2251 // integral, and subtract p once more. 2252 // 2253 2254 // Extend the semantics to prevent an overflow/underflow or inexact result. 2255 bool losesInfo; 2256 fltSemantics extendedSemantics = *semantics; 2257 extendedSemantics.maxExponent++; 2258 extendedSemantics.minExponent--; 2259 extendedSemantics.precision += 2; 2260 2261 IEEEFloat VEx = *this; 2262 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2263 assert(fs == opOK && !losesInfo); 2264 IEEEFloat PEx = P; 2265 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2266 assert(fs == opOK && !losesInfo); 2267 2268 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2269 // any fraction. 2270 fs = VEx.add(VEx, rmNearestTiesToEven); 2271 assert(fs == opOK); 2272 2273 if (VEx.compare(PEx) == cmpGreaterThan) { 2274 fs = subtract(P, rmNearestTiesToEven); 2275 assert(fs == opOK); 2276 2277 // Make VEx = this.add(this), but because we have different semantics, we do 2278 // not want to `convert` again, so we just subtract PEx twice (which equals 2279 // to the desired value). 2280 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2281 assert(fs == opOK); 2282 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2283 assert(fs == opOK); 2284 2285 cmpResult result = VEx.compare(PEx); 2286 if (result == cmpGreaterThan || result == cmpEqual) { 2287 fs = subtract(P, rmNearestTiesToEven); 2288 assert(fs == opOK); 2289 } 2290 } 2291 2292 if (isZero()) { 2293 sign = origSign; // IEEE754 requires this 2294 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2295 // But some 8-bit floats only have positive 0. 2296 sign = false; 2297 } 2298 2299 else 2300 sign ^= origSign; 2301 return fs; 2302 } 2303 2304 /* Normalized llvm frem (C fmod). */ 2305 APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2306 opStatus fs; 2307 fs = modSpecials(rhs); 2308 unsigned int origSign = sign; 2309 2310 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2311 compareAbsoluteValue(rhs) != cmpLessThan) { 2312 int Exp = ilogb(*this) - ilogb(rhs); 2313 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2314 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2315 // check for it. 2316 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2317 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2318 V.sign = sign; 2319 2320 fs = subtract(V, rmNearestTiesToEven); 2321 2322 // When the semantics supports zero, this loop's 2323 // exit-condition is handled by the 'isFiniteNonZero' 2324 // category check above. However, when the semantics 2325 // does not have 'fcZero' and we have reached the 2326 // minimum possible value, (and any further subtract 2327 // will underflow to the same value) explicitly 2328 // provide an exit-path here. 2329 if (!semantics->hasZero && this->isSmallest()) 2330 break; 2331 2332 assert(fs==opOK); 2333 } 2334 if (isZero()) { 2335 sign = origSign; // fmod requires this 2336 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2337 sign = false; 2338 } 2339 return fs; 2340 } 2341 2342 /* Normalized fused-multiply-add. */ 2343 APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2344 const IEEEFloat &addend, 2345 roundingMode rounding_mode) { 2346 opStatus fs; 2347 2348 /* Post-multiplication sign, before addition. */ 2349 sign ^= multiplicand.sign; 2350 2351 /* If and only if all arguments are normal do we need to do an 2352 extended-precision calculation. */ 2353 if (isFiniteNonZero() && 2354 multiplicand.isFiniteNonZero() && 2355 addend.isFinite()) { 2356 lostFraction lost_fraction; 2357 2358 lost_fraction = multiplySignificand(multiplicand, addend); 2359 fs = normalize(rounding_mode, lost_fraction); 2360 if (lost_fraction != lfExactlyZero) 2361 fs = (opStatus) (fs | opInexact); 2362 2363 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2364 positive zero unless rounding to minus infinity, except that 2365 adding two like-signed zeroes gives that zero. */ 2366 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2367 sign = (rounding_mode == rmTowardNegative); 2368 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2369 sign = false; 2370 } 2371 } else { 2372 fs = multiplySpecials(multiplicand); 2373 2374 /* FS can only be opOK or opInvalidOp. There is no more work 2375 to do in the latter case. The IEEE-754R standard says it is 2376 implementation-defined in this case whether, if ADDEND is a 2377 quiet NaN, we raise invalid op; this implementation does so. 2378 2379 If we need to do the addition we can do so with normal 2380 precision. */ 2381 if (fs == opOK) 2382 fs = addOrSubtract(addend, rounding_mode, false); 2383 } 2384 2385 return fs; 2386 } 2387 2388 /* Rounding-mode correct round to integral value. */ 2389 APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2390 opStatus fs; 2391 2392 if (isInfinity()) 2393 // [IEEE Std 754-2008 6.1]: 2394 // The behavior of infinity in floating-point arithmetic is derived from the 2395 // limiting cases of real arithmetic with operands of arbitrarily 2396 // large magnitude, when such a limit exists. 2397 // ... 2398 // Operations on infinite operands are usually exact and therefore signal no 2399 // exceptions ... 2400 return opOK; 2401 2402 if (isNaN()) { 2403 if (isSignaling()) { 2404 // [IEEE Std 754-2008 6.2]: 2405 // Under default exception handling, any operation signaling an invalid 2406 // operation exception and for which a floating-point result is to be 2407 // delivered shall deliver a quiet NaN. 2408 makeQuiet(); 2409 // [IEEE Std 754-2008 6.2]: 2410 // Signaling NaNs shall be reserved operands that, under default exception 2411 // handling, signal the invalid operation exception(see 7.2) for every 2412 // general-computational and signaling-computational operation except for 2413 // the conversions described in 5.12. 2414 return opInvalidOp; 2415 } else { 2416 // [IEEE Std 754-2008 6.2]: 2417 // For an operation with quiet NaN inputs, other than maximum and minimum 2418 // operations, if a floating-point result is to be delivered the result 2419 // shall be a quiet NaN which should be one of the input NaNs. 2420 // ... 2421 // Every general-computational and quiet-computational operation involving 2422 // one or more input NaNs, none of them signaling, shall signal no 2423 // exception, except fusedMultiplyAdd might signal the invalid operation 2424 // exception(see 7.2). 2425 return opOK; 2426 } 2427 } 2428 2429 if (isZero()) { 2430 // [IEEE Std 754-2008 6.3]: 2431 // ... the sign of the result of conversions, the quantize operation, the 2432 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2433 // the sign of the first or only operand. 2434 return opOK; 2435 } 2436 2437 // If the exponent is large enough, we know that this value is already 2438 // integral, and the arithmetic below would potentially cause it to saturate 2439 // to +/-Inf. Bail out early instead. 2440 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics)) 2441 return opOK; 2442 2443 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2444 // precision of our format, and then subtract it back off again. The choice 2445 // of rounding modes for the addition/subtraction determines the rounding mode 2446 // for our integral rounding as well. 2447 // NOTE: When the input value is negative, we do subtraction followed by 2448 // addition instead. 2449 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)), 2450 1); 2451 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1; 2452 IEEEFloat MagicConstant(*semantics); 2453 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2454 rmNearestTiesToEven); 2455 assert(fs == opOK); 2456 MagicConstant.sign = sign; 2457 2458 // Preserve the input sign so that we can handle the case of zero result 2459 // correctly. 2460 bool inputSign = isNegative(); 2461 2462 fs = add(MagicConstant, rounding_mode); 2463 2464 // Current value and 'MagicConstant' are both integers, so the result of the 2465 // subtraction is always exact according to Sterbenz' lemma. 2466 subtract(MagicConstant, rounding_mode); 2467 2468 // Restore the input sign. 2469 if (inputSign != isNegative()) 2470 changeSign(); 2471 2472 return fs; 2473 } 2474 2475 /* Comparison requires normalized numbers. */ 2476 APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2477 cmpResult result; 2478 2479 assert(semantics == rhs.semantics); 2480 2481 switch (PackCategoriesIntoKey(category, rhs.category)) { 2482 default: 2483 llvm_unreachable(nullptr); 2484 2485 case PackCategoriesIntoKey(fcNaN, fcZero): 2486 case PackCategoriesIntoKey(fcNaN, fcNormal): 2487 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2488 case PackCategoriesIntoKey(fcNaN, fcNaN): 2489 case PackCategoriesIntoKey(fcZero, fcNaN): 2490 case PackCategoriesIntoKey(fcNormal, fcNaN): 2491 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2492 return cmpUnordered; 2493 2494 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2495 case PackCategoriesIntoKey(fcInfinity, fcZero): 2496 case PackCategoriesIntoKey(fcNormal, fcZero): 2497 if (sign) 2498 return cmpLessThan; 2499 else 2500 return cmpGreaterThan; 2501 2502 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2503 case PackCategoriesIntoKey(fcZero, fcInfinity): 2504 case PackCategoriesIntoKey(fcZero, fcNormal): 2505 if (rhs.sign) 2506 return cmpGreaterThan; 2507 else 2508 return cmpLessThan; 2509 2510 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2511 if (sign == rhs.sign) 2512 return cmpEqual; 2513 else if (sign) 2514 return cmpLessThan; 2515 else 2516 return cmpGreaterThan; 2517 2518 case PackCategoriesIntoKey(fcZero, fcZero): 2519 return cmpEqual; 2520 2521 case PackCategoriesIntoKey(fcNormal, fcNormal): 2522 break; 2523 } 2524 2525 /* Two normal numbers. Do they have the same sign? */ 2526 if (sign != rhs.sign) { 2527 if (sign) 2528 result = cmpLessThan; 2529 else 2530 result = cmpGreaterThan; 2531 } else { 2532 /* Compare absolute values; invert result if negative. */ 2533 result = compareAbsoluteValue(rhs); 2534 2535 if (sign) { 2536 if (result == cmpLessThan) 2537 result = cmpGreaterThan; 2538 else if (result == cmpGreaterThan) 2539 result = cmpLessThan; 2540 } 2541 } 2542 2543 return result; 2544 } 2545 2546 /// IEEEFloat::convert - convert a value of one floating point type to another. 2547 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2548 /// records whether the transformation lost information, i.e. whether 2549 /// converting the result back to the original type will produce the 2550 /// original value (this is almost the same as return value==fsOK, but there 2551 /// are edge cases where this is not so). 2552 2553 APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2554 roundingMode rounding_mode, 2555 bool *losesInfo) { 2556 lostFraction lostFraction; 2557 unsigned int newPartCount, oldPartCount; 2558 opStatus fs; 2559 int shift; 2560 const fltSemantics &fromSemantics = *semantics; 2561 bool is_signaling = isSignaling(); 2562 2563 lostFraction = lfExactlyZero; 2564 newPartCount = partCountForBits(toSemantics.precision + 1); 2565 oldPartCount = partCount(); 2566 shift = toSemantics.precision - fromSemantics.precision; 2567 2568 bool X86SpecialNan = false; 2569 if (&fromSemantics == &semX87DoubleExtended && 2570 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2571 (!(*significandParts() & 0x8000000000000000ULL) || 2572 !(*significandParts() & 0x4000000000000000ULL))) { 2573 // x86 has some unusual NaNs which cannot be represented in any other 2574 // format; note them here. 2575 X86SpecialNan = true; 2576 } 2577 2578 // If this is a truncation of a denormal number, and the target semantics 2579 // has larger exponent range than the source semantics (this can happen 2580 // when truncating from PowerPC double-double to double format), the 2581 // right shift could lose result mantissa bits. Adjust exponent instead 2582 // of performing excessive shift. 2583 // Also do a similar trick in case shifting denormal would produce zero 2584 // significand as this case isn't handled correctly by normalize. 2585 if (shift < 0 && isFiniteNonZero()) { 2586 int omsb = significandMSB() + 1; 2587 int exponentChange = omsb - fromSemantics.precision; 2588 if (exponent + exponentChange < toSemantics.minExponent) 2589 exponentChange = toSemantics.minExponent - exponent; 2590 if (exponentChange < shift) 2591 exponentChange = shift; 2592 if (exponentChange < 0) { 2593 shift -= exponentChange; 2594 exponent += exponentChange; 2595 } else if (omsb <= -shift) { 2596 exponentChange = omsb + shift - 1; // leave at least one bit set 2597 shift -= exponentChange; 2598 exponent += exponentChange; 2599 } 2600 } 2601 2602 // If this is a truncation, perform the shift before we narrow the storage. 2603 if (shift < 0 && (isFiniteNonZero() || 2604 (category == fcNaN && semantics->nonFiniteBehavior != 2605 fltNonfiniteBehavior::NanOnly))) 2606 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2607 2608 // Fix the storage so it can hold to new value. 2609 if (newPartCount > oldPartCount) { 2610 // The new type requires more storage; make it available. 2611 integerPart *newParts; 2612 newParts = new integerPart[newPartCount]; 2613 APInt::tcSet(newParts, 0, newPartCount); 2614 if (isFiniteNonZero() || category==fcNaN) 2615 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2616 freeSignificand(); 2617 significand.parts = newParts; 2618 } else if (newPartCount == 1 && oldPartCount != 1) { 2619 // Switch to built-in storage for a single part. 2620 integerPart newPart = 0; 2621 if (isFiniteNonZero() || category==fcNaN) 2622 newPart = significandParts()[0]; 2623 freeSignificand(); 2624 significand.part = newPart; 2625 } 2626 2627 // Now that we have the right storage, switch the semantics. 2628 semantics = &toSemantics; 2629 2630 // If this is an extension, perform the shift now that the storage is 2631 // available. 2632 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2633 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2634 2635 if (isFiniteNonZero()) { 2636 fs = normalize(rounding_mode, lostFraction); 2637 *losesInfo = (fs != opOK); 2638 } else if (category == fcNaN) { 2639 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2640 *losesInfo = 2641 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2642 makeNaN(false, sign); 2643 return is_signaling ? opInvalidOp : opOK; 2644 } 2645 2646 // If NaN is negative zero, we need to create a new NaN to avoid converting 2647 // NaN to -Inf. 2648 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2649 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2650 makeNaN(false, false); 2651 2652 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2653 2654 // For x87 extended precision, we want to make a NaN, not a special NaN if 2655 // the input wasn't special either. 2656 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2657 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2658 2659 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2660 // This also guarantees that a sNaN does not become Inf on a truncation 2661 // that loses all payload bits. 2662 if (is_signaling) { 2663 makeQuiet(); 2664 fs = opInvalidOp; 2665 } else { 2666 fs = opOK; 2667 } 2668 } else if (category == fcInfinity && 2669 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2670 makeNaN(false, sign); 2671 *losesInfo = true; 2672 fs = opInexact; 2673 } else if (category == fcZero && 2674 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2675 // Negative zero loses info, but positive zero doesn't. 2676 *losesInfo = 2677 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2678 fs = *losesInfo ? opInexact : opOK; 2679 // NaN is negative zero means -0 -> +0, which can lose information 2680 sign = false; 2681 } else { 2682 *losesInfo = false; 2683 fs = opOK; 2684 } 2685 2686 if (category == fcZero && !semantics->hasZero) 2687 makeSmallestNormalized(false); 2688 return fs; 2689 } 2690 2691 /* Convert a floating point number to an integer according to the 2692 rounding mode. If the rounded integer value is out of range this 2693 returns an invalid operation exception and the contents of the 2694 destination parts are unspecified. If the rounded value is in 2695 range but the floating point number is not the exact integer, the C 2696 standard doesn't require an inexact exception to be raised. IEEE 2697 854 does require it so we do that. 2698 2699 Note that for conversions to integer type the C standard requires 2700 round-to-zero to always be used. */ 2701 APFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2702 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2703 roundingMode rounding_mode, bool *isExact) const { 2704 lostFraction lost_fraction; 2705 const integerPart *src; 2706 unsigned int dstPartsCount, truncatedBits; 2707 2708 *isExact = false; 2709 2710 /* Handle the three special cases first. */ 2711 if (category == fcInfinity || category == fcNaN) 2712 return opInvalidOp; 2713 2714 dstPartsCount = partCountForBits(width); 2715 assert(dstPartsCount <= parts.size() && "Integer too big"); 2716 2717 if (category == fcZero) { 2718 APInt::tcSet(parts.data(), 0, dstPartsCount); 2719 // Negative zero can't be represented as an int. 2720 *isExact = !sign; 2721 return opOK; 2722 } 2723 2724 src = significandParts(); 2725 2726 /* Step 1: place our absolute value, with any fraction truncated, in 2727 the destination. */ 2728 if (exponent < 0) { 2729 /* Our absolute value is less than one; truncate everything. */ 2730 APInt::tcSet(parts.data(), 0, dstPartsCount); 2731 /* For exponent -1 the integer bit represents .5, look at that. 2732 For smaller exponents leftmost truncated bit is 0. */ 2733 truncatedBits = semantics->precision -1U - exponent; 2734 } else { 2735 /* We want the most significant (exponent + 1) bits; the rest are 2736 truncated. */ 2737 unsigned int bits = exponent + 1U; 2738 2739 /* Hopelessly large in magnitude? */ 2740 if (bits > width) 2741 return opInvalidOp; 2742 2743 if (bits < semantics->precision) { 2744 /* We truncate (semantics->precision - bits) bits. */ 2745 truncatedBits = semantics->precision - bits; 2746 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2747 } else { 2748 /* We want at least as many bits as are available. */ 2749 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2750 0); 2751 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2752 bits - semantics->precision); 2753 truncatedBits = 0; 2754 } 2755 } 2756 2757 /* Step 2: work out any lost fraction, and increment the absolute 2758 value if we would round away from zero. */ 2759 if (truncatedBits) { 2760 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2761 truncatedBits); 2762 if (lost_fraction != lfExactlyZero && 2763 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2764 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2765 return opInvalidOp; /* Overflow. */ 2766 } 2767 } else { 2768 lost_fraction = lfExactlyZero; 2769 } 2770 2771 /* Step 3: check if we fit in the destination. */ 2772 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2773 2774 if (sign) { 2775 if (!isSigned) { 2776 /* Negative numbers cannot be represented as unsigned. */ 2777 if (omsb != 0) 2778 return opInvalidOp; 2779 } else { 2780 /* It takes omsb bits to represent the unsigned integer value. 2781 We lose a bit for the sign, but care is needed as the 2782 maximally negative integer is a special case. */ 2783 if (omsb == width && 2784 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2785 return opInvalidOp; 2786 2787 /* This case can happen because of rounding. */ 2788 if (omsb > width) 2789 return opInvalidOp; 2790 } 2791 2792 APInt::tcNegate (parts.data(), dstPartsCount); 2793 } else { 2794 if (omsb >= width + !isSigned) 2795 return opInvalidOp; 2796 } 2797 2798 if (lost_fraction == lfExactlyZero) { 2799 *isExact = true; 2800 return opOK; 2801 } else 2802 return opInexact; 2803 } 2804 2805 /* Same as convertToSignExtendedInteger, except we provide 2806 deterministic values in case of an invalid operation exception, 2807 namely zero for NaNs and the minimal or maximal value respectively 2808 for underflow or overflow. 2809 The *isExact output tells whether the result is exact, in the sense 2810 that converting it back to the original floating point type produces 2811 the original value. This is almost equivalent to result==opOK, 2812 except for negative zeroes. 2813 */ 2814 APFloat::opStatus 2815 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2816 unsigned int width, bool isSigned, 2817 roundingMode rounding_mode, bool *isExact) const { 2818 opStatus fs; 2819 2820 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2821 isExact); 2822 2823 if (fs == opInvalidOp) { 2824 unsigned int bits, dstPartsCount; 2825 2826 dstPartsCount = partCountForBits(width); 2827 assert(dstPartsCount <= parts.size() && "Integer too big"); 2828 2829 if (category == fcNaN) 2830 bits = 0; 2831 else if (sign) 2832 bits = isSigned; 2833 else 2834 bits = width - isSigned; 2835 2836 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2837 if (sign && isSigned) 2838 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2839 } 2840 2841 return fs; 2842 } 2843 2844 /* Convert an unsigned integer SRC to a floating point number, 2845 rounding according to ROUNDING_MODE. The sign of the floating 2846 point number is not modified. */ 2847 APFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2848 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2849 unsigned int omsb, precision, dstCount; 2850 integerPart *dst; 2851 lostFraction lost_fraction; 2852 2853 category = fcNormal; 2854 omsb = APInt::tcMSB(src, srcCount) + 1; 2855 dst = significandParts(); 2856 dstCount = partCount(); 2857 precision = semantics->precision; 2858 2859 /* We want the most significant PRECISION bits of SRC. There may not 2860 be that many; extract what we can. */ 2861 if (precision <= omsb) { 2862 exponent = omsb - 1; 2863 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2864 omsb - precision); 2865 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2866 } else { 2867 exponent = precision - 1; 2868 lost_fraction = lfExactlyZero; 2869 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2870 } 2871 2872 return normalize(rounding_mode, lost_fraction); 2873 } 2874 2875 APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2876 roundingMode rounding_mode) { 2877 unsigned int partCount = Val.getNumWords(); 2878 APInt api = Val; 2879 2880 sign = false; 2881 if (isSigned && api.isNegative()) { 2882 sign = true; 2883 api = -api; 2884 } 2885 2886 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2887 } 2888 2889 /* Convert a two's complement integer SRC to a floating point number, 2890 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2891 integer is signed, in which case it must be sign-extended. */ 2892 APFloat::opStatus 2893 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2894 unsigned int srcCount, bool isSigned, 2895 roundingMode rounding_mode) { 2896 opStatus status; 2897 2898 if (isSigned && 2899 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2900 integerPart *copy; 2901 2902 /* If we're signed and negative negate a copy. */ 2903 sign = true; 2904 copy = new integerPart[srcCount]; 2905 APInt::tcAssign(copy, src, srcCount); 2906 APInt::tcNegate(copy, srcCount); 2907 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2908 delete [] copy; 2909 } else { 2910 sign = false; 2911 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2912 } 2913 2914 return status; 2915 } 2916 2917 /* FIXME: should this just take a const APInt reference? */ 2918 APFloat::opStatus 2919 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2920 unsigned int width, bool isSigned, 2921 roundingMode rounding_mode) { 2922 unsigned int partCount = partCountForBits(width); 2923 APInt api = APInt(width, ArrayRef(parts, partCount)); 2924 2925 sign = false; 2926 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2927 sign = true; 2928 api = -api; 2929 } 2930 2931 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2932 } 2933 2934 Expected<APFloat::opStatus> 2935 IEEEFloat::convertFromHexadecimalString(StringRef s, 2936 roundingMode rounding_mode) { 2937 lostFraction lost_fraction = lfExactlyZero; 2938 2939 category = fcNormal; 2940 zeroSignificand(); 2941 exponent = 0; 2942 2943 integerPart *significand = significandParts(); 2944 unsigned partsCount = partCount(); 2945 unsigned bitPos = partsCount * integerPartWidth; 2946 bool computedTrailingFraction = false; 2947 2948 // Skip leading zeroes and any (hexa)decimal point. 2949 StringRef::iterator begin = s.begin(); 2950 StringRef::iterator end = s.end(); 2951 StringRef::iterator dot; 2952 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2953 if (!PtrOrErr) 2954 return PtrOrErr.takeError(); 2955 StringRef::iterator p = *PtrOrErr; 2956 StringRef::iterator firstSignificantDigit = p; 2957 2958 while (p != end) { 2959 integerPart hex_value; 2960 2961 if (*p == '.') { 2962 if (dot != end) 2963 return createError("String contains multiple dots"); 2964 dot = p++; 2965 continue; 2966 } 2967 2968 hex_value = hexDigitValue(*p); 2969 if (hex_value == UINT_MAX) 2970 break; 2971 2972 p++; 2973 2974 // Store the number while we have space. 2975 if (bitPos) { 2976 bitPos -= 4; 2977 hex_value <<= bitPos % integerPartWidth; 2978 significand[bitPos / integerPartWidth] |= hex_value; 2979 } else if (!computedTrailingFraction) { 2980 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2981 if (!FractOrErr) 2982 return FractOrErr.takeError(); 2983 lost_fraction = *FractOrErr; 2984 computedTrailingFraction = true; 2985 } 2986 } 2987 2988 /* Hex floats require an exponent but not a hexadecimal point. */ 2989 if (p == end) 2990 return createError("Hex strings require an exponent"); 2991 if (*p != 'p' && *p != 'P') 2992 return createError("Invalid character in significand"); 2993 if (p == begin) 2994 return createError("Significand has no digits"); 2995 if (dot != end && p - begin == 1) 2996 return createError("Significand has no digits"); 2997 2998 /* Ignore the exponent if we are zero. */ 2999 if (p != firstSignificantDigit) { 3000 int expAdjustment; 3001 3002 /* Implicit hexadecimal point? */ 3003 if (dot == end) 3004 dot = p; 3005 3006 /* Calculate the exponent adjustment implicit in the number of 3007 significant digits. */ 3008 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 3009 if (expAdjustment < 0) 3010 expAdjustment++; 3011 expAdjustment = expAdjustment * 4 - 1; 3012 3013 /* Adjust for writing the significand starting at the most 3014 significant nibble. */ 3015 expAdjustment += semantics->precision; 3016 expAdjustment -= partsCount * integerPartWidth; 3017 3018 /* Adjust for the given exponent. */ 3019 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 3020 if (!ExpOrErr) 3021 return ExpOrErr.takeError(); 3022 exponent = *ExpOrErr; 3023 } 3024 3025 return normalize(rounding_mode, lost_fraction); 3026 } 3027 3028 APFloat::opStatus 3029 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 3030 unsigned sigPartCount, int exp, 3031 roundingMode rounding_mode) { 3032 unsigned int parts, pow5PartCount; 3033 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 3034 integerPart pow5Parts[maxPowerOfFiveParts]; 3035 bool isNearest; 3036 3037 isNearest = (rounding_mode == rmNearestTiesToEven || 3038 rounding_mode == rmNearestTiesToAway); 3039 3040 parts = partCountForBits(semantics->precision + 11); 3041 3042 /* Calculate pow(5, abs(exp)). */ 3043 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 3044 3045 for (;; parts *= 2) { 3046 opStatus sigStatus, powStatus; 3047 unsigned int excessPrecision, truncatedBits; 3048 3049 calcSemantics.precision = parts * integerPartWidth - 1; 3050 excessPrecision = calcSemantics.precision - semantics->precision; 3051 truncatedBits = excessPrecision; 3052 3053 IEEEFloat decSig(calcSemantics, uninitialized); 3054 decSig.makeZero(sign); 3055 IEEEFloat pow5(calcSemantics); 3056 3057 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 3058 rmNearestTiesToEven); 3059 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 3060 rmNearestTiesToEven); 3061 /* Add exp, as 10^n = 5^n * 2^n. */ 3062 decSig.exponent += exp; 3063 3064 lostFraction calcLostFraction; 3065 integerPart HUerr, HUdistance; 3066 unsigned int powHUerr; 3067 3068 if (exp >= 0) { 3069 /* multiplySignificand leaves the precision-th bit set to 1. */ 3070 calcLostFraction = decSig.multiplySignificand(pow5); 3071 powHUerr = powStatus != opOK; 3072 } else { 3073 calcLostFraction = decSig.divideSignificand(pow5); 3074 /* Denormal numbers have less precision. */ 3075 if (decSig.exponent < semantics->minExponent) { 3076 excessPrecision += (semantics->minExponent - decSig.exponent); 3077 truncatedBits = excessPrecision; 3078 if (excessPrecision > calcSemantics.precision) 3079 excessPrecision = calcSemantics.precision; 3080 } 3081 /* Extra half-ulp lost in reciprocal of exponent. */ 3082 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 3083 } 3084 3085 /* Both multiplySignificand and divideSignificand return the 3086 result with the integer bit set. */ 3087 assert(APInt::tcExtractBit 3088 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3089 3090 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3091 powHUerr); 3092 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3093 excessPrecision, isNearest); 3094 3095 /* Are we guaranteed to round correctly if we truncate? */ 3096 if (HUdistance >= HUerr) { 3097 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3098 calcSemantics.precision - excessPrecision, 3099 excessPrecision); 3100 /* Take the exponent of decSig. If we tcExtract-ed less bits 3101 above we must adjust our exponent to compensate for the 3102 implicit right shift. */ 3103 exponent = (decSig.exponent + semantics->precision 3104 - (calcSemantics.precision - excessPrecision)); 3105 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3106 decSig.partCount(), 3107 truncatedBits); 3108 return normalize(rounding_mode, calcLostFraction); 3109 } 3110 } 3111 } 3112 3113 Expected<APFloat::opStatus> 3114 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3115 decimalInfo D; 3116 opStatus fs; 3117 3118 /* Scan the text. */ 3119 StringRef::iterator p = str.begin(); 3120 if (Error Err = interpretDecimal(p, str.end(), &D)) 3121 return std::move(Err); 3122 3123 /* Handle the quick cases. First the case of no significant digits, 3124 i.e. zero, and then exponents that are obviously too large or too 3125 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3126 definitely overflows if 3127 3128 (exp - 1) * L >= maxExponent 3129 3130 and definitely underflows to zero where 3131 3132 (exp + 1) * L <= minExponent - precision 3133 3134 With integer arithmetic the tightest bounds for L are 3135 3136 93/28 < L < 196/59 [ numerator <= 256 ] 3137 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3138 */ 3139 3140 // Test if we have a zero number allowing for strings with no null terminators 3141 // and zero decimals with non-zero exponents. 3142 // 3143 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3144 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3145 // be at most one dot. On the other hand, if we have a zero with a non-zero 3146 // exponent, then we know that D.firstSigDigit will be non-numeric. 3147 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3148 category = fcZero; 3149 fs = opOK; 3150 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3151 sign = false; 3152 if (!semantics->hasZero) 3153 makeSmallestNormalized(false); 3154 3155 /* Check whether the normalized exponent is high enough to overflow 3156 max during the log-rebasing in the max-exponent check below. */ 3157 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3158 fs = handleOverflow(rounding_mode); 3159 3160 /* If it wasn't, then it also wasn't high enough to overflow max 3161 during the log-rebasing in the min-exponent check. Check that it 3162 won't overflow min in either check, then perform the min-exponent 3163 check. */ 3164 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3165 (D.normalizedExponent + 1) * 28738 <= 3166 8651 * (semantics->minExponent - (int) semantics->precision)) { 3167 /* Underflow to zero and round. */ 3168 category = fcNormal; 3169 zeroSignificand(); 3170 fs = normalize(rounding_mode, lfLessThanHalf); 3171 3172 /* We can finally safely perform the max-exponent check. */ 3173 } else if ((D.normalizedExponent - 1) * 42039 3174 >= 12655 * semantics->maxExponent) { 3175 /* Overflow and round. */ 3176 fs = handleOverflow(rounding_mode); 3177 } else { 3178 integerPart *decSignificand; 3179 unsigned int partCount; 3180 3181 /* A tight upper bound on number of bits required to hold an 3182 N-digit decimal integer is N * 196 / 59. Allocate enough space 3183 to hold the full significand, and an extra part required by 3184 tcMultiplyPart. */ 3185 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3186 partCount = partCountForBits(1 + 196 * partCount / 59); 3187 decSignificand = new integerPart[partCount + 1]; 3188 partCount = 0; 3189 3190 /* Convert to binary efficiently - we do almost all multiplication 3191 in an integerPart. When this would overflow do we do a single 3192 bignum multiplication, and then revert again to multiplication 3193 in an integerPart. */ 3194 do { 3195 integerPart decValue, val, multiplier; 3196 3197 val = 0; 3198 multiplier = 1; 3199 3200 do { 3201 if (*p == '.') { 3202 p++; 3203 if (p == str.end()) { 3204 break; 3205 } 3206 } 3207 decValue = decDigitValue(*p++); 3208 if (decValue >= 10U) { 3209 delete[] decSignificand; 3210 return createError("Invalid character in significand"); 3211 } 3212 multiplier *= 10; 3213 val = val * 10 + decValue; 3214 /* The maximum number that can be multiplied by ten with any 3215 digit added without overflowing an integerPart. */ 3216 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3217 3218 /* Multiply out the current part. */ 3219 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3220 partCount, partCount + 1, false); 3221 3222 /* If we used another part (likely but not guaranteed), increase 3223 the count. */ 3224 if (decSignificand[partCount]) 3225 partCount++; 3226 } while (p <= D.lastSigDigit); 3227 3228 category = fcNormal; 3229 fs = roundSignificandWithExponent(decSignificand, partCount, 3230 D.exponent, rounding_mode); 3231 3232 delete [] decSignificand; 3233 } 3234 3235 return fs; 3236 } 3237 3238 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3239 const size_t MIN_NAME_SIZE = 3; 3240 3241 if (str.size() < MIN_NAME_SIZE) 3242 return false; 3243 3244 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3245 makeInf(false); 3246 return true; 3247 } 3248 3249 bool IsNegative = str.front() == '-'; 3250 if (IsNegative) { 3251 str = str.drop_front(); 3252 if (str.size() < MIN_NAME_SIZE) 3253 return false; 3254 3255 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3256 makeInf(true); 3257 return true; 3258 } 3259 } 3260 3261 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3262 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3263 if (IsSignaling) { 3264 str = str.drop_front(); 3265 if (str.size() < MIN_NAME_SIZE) 3266 return false; 3267 } 3268 3269 if (str.starts_with("nan") || str.starts_with("NaN")) { 3270 str = str.drop_front(3); 3271 3272 // A NaN without payload. 3273 if (str.empty()) { 3274 makeNaN(IsSignaling, IsNegative); 3275 return true; 3276 } 3277 3278 // Allow the payload to be inside parentheses. 3279 if (str.front() == '(') { 3280 // Parentheses should be balanced (and not empty). 3281 if (str.size() <= 2 || str.back() != ')') 3282 return false; 3283 3284 str = str.slice(1, str.size() - 1); 3285 } 3286 3287 // Determine the payload number's radix. 3288 unsigned Radix = 10; 3289 if (str[0] == '0') { 3290 if (str.size() > 1 && tolower(str[1]) == 'x') { 3291 str = str.drop_front(2); 3292 Radix = 16; 3293 } else 3294 Radix = 8; 3295 } 3296 3297 // Parse the payload and make the NaN. 3298 APInt Payload; 3299 if (!str.getAsInteger(Radix, Payload)) { 3300 makeNaN(IsSignaling, IsNegative, &Payload); 3301 return true; 3302 } 3303 } 3304 3305 return false; 3306 } 3307 3308 Expected<APFloat::opStatus> 3309 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3310 if (str.empty()) 3311 return createError("Invalid string length"); 3312 3313 // Handle special cases. 3314 if (convertFromStringSpecials(str)) 3315 return opOK; 3316 3317 /* Handle a leading minus sign. */ 3318 StringRef::iterator p = str.begin(); 3319 size_t slen = str.size(); 3320 sign = *p == '-' ? 1 : 0; 3321 if (sign && !semantics->hasSignedRepr) 3322 llvm_unreachable( 3323 "This floating point format does not support signed values"); 3324 3325 if (*p == '-' || *p == '+') { 3326 p++; 3327 slen--; 3328 if (!slen) 3329 return createError("String has no digits"); 3330 } 3331 3332 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3333 if (slen == 2) 3334 return createError("Invalid string"); 3335 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3336 rounding_mode); 3337 } 3338 3339 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3340 } 3341 3342 /* Write out a hexadecimal representation of the floating point value 3343 to DST, which must be of sufficient size, in the C99 form 3344 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3345 excluding the terminating NUL. 3346 3347 If UPPERCASE, the output is in upper case, otherwise in lower case. 3348 3349 HEXDIGITS digits appear altogether, rounding the value if 3350 necessary. If HEXDIGITS is 0, the minimal precision to display the 3351 number precisely is used instead. If nothing would appear after 3352 the decimal point it is suppressed. 3353 3354 The decimal exponent is always printed and has at least one digit. 3355 Zero values display an exponent of zero. Infinities and NaNs 3356 appear as "infinity" or "nan" respectively. 3357 3358 The above rules are as specified by C99. There is ambiguity about 3359 what the leading hexadecimal digit should be. This implementation 3360 uses whatever is necessary so that the exponent is displayed as 3361 stored. This implies the exponent will fall within the IEEE format 3362 range, and the leading hexadecimal digit will be 0 (for denormals), 3363 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3364 any other digits zero). 3365 */ 3366 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3367 bool upperCase, 3368 roundingMode rounding_mode) const { 3369 char *p; 3370 3371 p = dst; 3372 if (sign) 3373 *dst++ = '-'; 3374 3375 switch (category) { 3376 case fcInfinity: 3377 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3378 dst += sizeof infinityL - 1; 3379 break; 3380 3381 case fcNaN: 3382 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3383 dst += sizeof NaNU - 1; 3384 break; 3385 3386 case fcZero: 3387 *dst++ = '0'; 3388 *dst++ = upperCase ? 'X': 'x'; 3389 *dst++ = '0'; 3390 if (hexDigits > 1) { 3391 *dst++ = '.'; 3392 memset (dst, '0', hexDigits - 1); 3393 dst += hexDigits - 1; 3394 } 3395 *dst++ = upperCase ? 'P': 'p'; 3396 *dst++ = '0'; 3397 break; 3398 3399 case fcNormal: 3400 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3401 break; 3402 } 3403 3404 *dst = 0; 3405 3406 return static_cast<unsigned int>(dst - p); 3407 } 3408 3409 /* Does the hard work of outputting the correctly rounded hexadecimal 3410 form of a normal floating point number with the specified number of 3411 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3412 digits necessary to print the value precisely is output. */ 3413 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3414 bool upperCase, 3415 roundingMode rounding_mode) const { 3416 unsigned int count, valueBits, shift, partsCount, outputDigits; 3417 const char *hexDigitChars; 3418 const integerPart *significand; 3419 char *p; 3420 bool roundUp; 3421 3422 *dst++ = '0'; 3423 *dst++ = upperCase ? 'X': 'x'; 3424 3425 roundUp = false; 3426 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3427 3428 significand = significandParts(); 3429 partsCount = partCount(); 3430 3431 /* +3 because the first digit only uses the single integer bit, so 3432 we have 3 virtual zero most-significant-bits. */ 3433 valueBits = semantics->precision + 3; 3434 shift = integerPartWidth - valueBits % integerPartWidth; 3435 3436 /* The natural number of digits required ignoring trailing 3437 insignificant zeroes. */ 3438 outputDigits = (valueBits - significandLSB () + 3) / 4; 3439 3440 /* hexDigits of zero means use the required number for the 3441 precision. Otherwise, see if we are truncating. If we are, 3442 find out if we need to round away from zero. */ 3443 if (hexDigits) { 3444 if (hexDigits < outputDigits) { 3445 /* We are dropping non-zero bits, so need to check how to round. 3446 "bits" is the number of dropped bits. */ 3447 unsigned int bits; 3448 lostFraction fraction; 3449 3450 bits = valueBits - hexDigits * 4; 3451 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3452 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3453 } 3454 outputDigits = hexDigits; 3455 } 3456 3457 /* Write the digits consecutively, and start writing in the location 3458 of the hexadecimal point. We move the most significant digit 3459 left and add the hexadecimal point later. */ 3460 p = ++dst; 3461 3462 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3463 3464 while (outputDigits && count) { 3465 integerPart part; 3466 3467 /* Put the most significant integerPartWidth bits in "part". */ 3468 if (--count == partsCount) 3469 part = 0; /* An imaginary higher zero part. */ 3470 else 3471 part = significand[count] << shift; 3472 3473 if (count && shift) 3474 part |= significand[count - 1] >> (integerPartWidth - shift); 3475 3476 /* Convert as much of "part" to hexdigits as we can. */ 3477 unsigned int curDigits = integerPartWidth / 4; 3478 3479 if (curDigits > outputDigits) 3480 curDigits = outputDigits; 3481 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3482 outputDigits -= curDigits; 3483 } 3484 3485 if (roundUp) { 3486 char *q = dst; 3487 3488 /* Note that hexDigitChars has a trailing '0'. */ 3489 do { 3490 q--; 3491 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3492 } while (*q == '0'); 3493 assert(q >= p); 3494 } else { 3495 /* Add trailing zeroes. */ 3496 memset (dst, '0', outputDigits); 3497 dst += outputDigits; 3498 } 3499 3500 /* Move the most significant digit to before the point, and if there 3501 is something after the decimal point add it. This must come 3502 after rounding above. */ 3503 p[-1] = p[0]; 3504 if (dst -1 == p) 3505 dst--; 3506 else 3507 p[0] = '.'; 3508 3509 /* Finally output the exponent. */ 3510 *dst++ = upperCase ? 'P': 'p'; 3511 3512 return writeSignedDecimal (dst, exponent); 3513 } 3514 3515 hash_code hash_value(const IEEEFloat &Arg) { 3516 if (!Arg.isFiniteNonZero()) 3517 return hash_combine((uint8_t)Arg.category, 3518 // NaN has no sign, fix it at zero. 3519 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3520 Arg.semantics->precision); 3521 3522 // Normal floats need their exponent and significand hashed. 3523 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3524 Arg.semantics->precision, Arg.exponent, 3525 hash_combine_range( 3526 Arg.significandParts(), 3527 Arg.significandParts() + Arg.partCount())); 3528 } 3529 3530 // Conversion from APFloat to/from host float/double. It may eventually be 3531 // possible to eliminate these and have everybody deal with APFloats, but that 3532 // will take a while. This approach will not easily extend to long double. 3533 // Current implementation requires integerPartWidth==64, which is correct at 3534 // the moment but could be made more general. 3535 3536 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3537 // the actual IEEE respresentations. We compensate for that here. 3538 3539 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3540 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3541 assert(partCount()==2); 3542 3543 uint64_t myexponent, mysignificand; 3544 3545 if (isFiniteNonZero()) { 3546 myexponent = exponent+16383; //bias 3547 mysignificand = significandParts()[0]; 3548 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3549 myexponent = 0; // denormal 3550 } else if (category==fcZero) { 3551 myexponent = 0; 3552 mysignificand = 0; 3553 } else if (category==fcInfinity) { 3554 myexponent = 0x7fff; 3555 mysignificand = 0x8000000000000000ULL; 3556 } else { 3557 assert(category == fcNaN && "Unknown category"); 3558 myexponent = 0x7fff; 3559 mysignificand = significandParts()[0]; 3560 } 3561 3562 uint64_t words[2]; 3563 words[0] = mysignificand; 3564 words[1] = ((uint64_t)(sign & 1) << 15) | 3565 (myexponent & 0x7fffLL); 3566 return APInt(80, words); 3567 } 3568 3569 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3570 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3571 assert(partCount()==2); 3572 3573 uint64_t words[2]; 3574 opStatus fs; 3575 bool losesInfo; 3576 3577 // Convert number to double. To avoid spurious underflows, we re- 3578 // normalize against the "double" minExponent first, and only *then* 3579 // truncate the mantissa. The result of that second conversion 3580 // may be inexact, but should never underflow. 3581 // Declare fltSemantics before APFloat that uses it (and 3582 // saves pointer to it) to ensure correct destruction order. 3583 fltSemantics extendedSemantics = *semantics; 3584 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3585 IEEEFloat extended(*this); 3586 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3587 assert(fs == opOK && !losesInfo); 3588 (void)fs; 3589 3590 IEEEFloat u(extended); 3591 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3592 assert(fs == opOK || fs == opInexact); 3593 (void)fs; 3594 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3595 3596 // If conversion was exact or resulted in a special case, we're done; 3597 // just set the second double to zero. Otherwise, re-convert back to 3598 // the extended format and compute the difference. This now should 3599 // convert exactly to double. 3600 if (u.isFiniteNonZero() && losesInfo) { 3601 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3602 assert(fs == opOK && !losesInfo); 3603 (void)fs; 3604 3605 IEEEFloat v(extended); 3606 v.subtract(u, rmNearestTiesToEven); 3607 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3608 assert(fs == opOK && !losesInfo); 3609 (void)fs; 3610 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3611 } else { 3612 words[1] = 0; 3613 } 3614 3615 return APInt(128, words); 3616 } 3617 3618 template <const fltSemantics &S> 3619 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3620 assert(semantics == &S); 3621 const int bias = 3622 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1); 3623 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3624 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3625 constexpr integerPart integer_bit = 3626 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3627 constexpr uint64_t significand_mask = integer_bit - 1; 3628 constexpr unsigned int exponent_bits = 3629 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits) 3630 : S.sizeInBits; 3631 static_assert(exponent_bits < 64); 3632 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3633 3634 uint64_t myexponent; 3635 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3636 mysignificand; 3637 3638 if (isFiniteNonZero()) { 3639 myexponent = exponent + bias; 3640 std::copy_n(significandParts(), mysignificand.size(), 3641 mysignificand.begin()); 3642 if (myexponent == 1 && 3643 !(significandParts()[integer_bit_part] & integer_bit)) 3644 myexponent = 0; // denormal 3645 } else if (category == fcZero) { 3646 if (!S.hasZero) 3647 llvm_unreachable("semantics does not support zero!"); 3648 myexponent = ::exponentZero(S) + bias; 3649 mysignificand.fill(0); 3650 } else if (category == fcInfinity) { 3651 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3652 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3653 llvm_unreachable("semantics don't support inf!"); 3654 myexponent = ::exponentInf(S) + bias; 3655 mysignificand.fill(0); 3656 } else { 3657 assert(category == fcNaN && "Unknown category!"); 3658 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3659 llvm_unreachable("semantics don't support NaN!"); 3660 myexponent = ::exponentNaN(S) + bias; 3661 std::copy_n(significandParts(), mysignificand.size(), 3662 mysignificand.begin()); 3663 } 3664 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3665 auto words_iter = 3666 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3667 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) { 3668 // Clear the integer bit. 3669 words[mysignificand.size() - 1] &= significand_mask; 3670 } 3671 std::fill(words_iter, words.end(), uint64_t{0}); 3672 constexpr size_t last_word = words.size() - 1; 3673 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3674 << ((S.sizeInBits - 1) % 64); 3675 words[last_word] |= shifted_sign; 3676 uint64_t shifted_exponent = (myexponent & exponent_mask) 3677 << (trailing_significand_bits % 64); 3678 words[last_word] |= shifted_exponent; 3679 if constexpr (last_word == 0) { 3680 return APInt(S.sizeInBits, words[0]); 3681 } 3682 return APInt(S.sizeInBits, words); 3683 } 3684 3685 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3686 assert(partCount() == 2); 3687 return convertIEEEFloatToAPInt<semIEEEquad>(); 3688 } 3689 3690 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3691 assert(partCount()==1); 3692 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3693 } 3694 3695 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3696 assert(partCount()==1); 3697 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3698 } 3699 3700 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3701 assert(partCount() == 1); 3702 return convertIEEEFloatToAPInt<semBFloat>(); 3703 } 3704 3705 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3706 assert(partCount()==1); 3707 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3708 } 3709 3710 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3711 assert(partCount() == 1); 3712 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3713 } 3714 3715 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3716 assert(partCount() == 1); 3717 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3718 } 3719 3720 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3721 assert(partCount() == 1); 3722 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3723 } 3724 3725 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3726 assert(partCount() == 1); 3727 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3728 } 3729 3730 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3731 assert(partCount() == 1); 3732 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3733 } 3734 3735 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3736 assert(partCount() == 1); 3737 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3738 } 3739 3740 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { 3741 assert(partCount() == 1); 3742 return convertIEEEFloatToAPInt<semFloat8E3M4>(); 3743 } 3744 3745 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3746 assert(partCount() == 1); 3747 return convertIEEEFloatToAPInt<semFloatTF32>(); 3748 } 3749 3750 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const { 3751 assert(partCount() == 1); 3752 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>(); 3753 } 3754 3755 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3756 assert(partCount() == 1); 3757 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3758 } 3759 3760 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3761 assert(partCount() == 1); 3762 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3763 } 3764 3765 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3766 assert(partCount() == 1); 3767 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3768 } 3769 3770 // This function creates an APInt that is just a bit map of the floating 3771 // point constant as it would appear in memory. It is not a conversion, 3772 // and treating the result as a normal integer is unlikely to be useful. 3773 3774 APInt IEEEFloat::bitcastToAPInt() const { 3775 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3776 return convertHalfAPFloatToAPInt(); 3777 3778 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3779 return convertBFloatAPFloatToAPInt(); 3780 3781 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3782 return convertFloatAPFloatToAPInt(); 3783 3784 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3785 return convertDoubleAPFloatToAPInt(); 3786 3787 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3788 return convertQuadrupleAPFloatToAPInt(); 3789 3790 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3791 return convertPPCDoubleDoubleAPFloatToAPInt(); 3792 3793 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3794 return convertFloat8E5M2APFloatToAPInt(); 3795 3796 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3797 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3798 3799 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3800 return convertFloat8E4M3APFloatToAPInt(); 3801 3802 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3803 return convertFloat8E4M3FNAPFloatToAPInt(); 3804 3805 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3806 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3807 3808 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3809 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3810 3811 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) 3812 return convertFloat8E3M4APFloatToAPInt(); 3813 3814 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3815 return convertFloatTF32APFloatToAPInt(); 3816 3817 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU) 3818 return convertFloat8E8M0FNUAPFloatToAPInt(); 3819 3820 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3821 return convertFloat6E3M2FNAPFloatToAPInt(); 3822 3823 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3824 return convertFloat6E2M3FNAPFloatToAPInt(); 3825 3826 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3827 return convertFloat4E2M1FNAPFloatToAPInt(); 3828 3829 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3830 "unknown format!"); 3831 return convertF80LongDoubleAPFloatToAPInt(); 3832 } 3833 3834 float IEEEFloat::convertToFloat() const { 3835 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3836 "Float semantics are not IEEEsingle"); 3837 APInt api = bitcastToAPInt(); 3838 return api.bitsToFloat(); 3839 } 3840 3841 double IEEEFloat::convertToDouble() const { 3842 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3843 "Float semantics are not IEEEdouble"); 3844 APInt api = bitcastToAPInt(); 3845 return api.bitsToDouble(); 3846 } 3847 3848 #ifdef HAS_IEE754_FLOAT128 3849 float128 IEEEFloat::convertToQuad() const { 3850 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3851 "Float semantics are not IEEEquads"); 3852 APInt api = bitcastToAPInt(); 3853 return api.bitsToQuad(); 3854 } 3855 #endif 3856 3857 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3858 /// does not support these bit patterns: 3859 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3860 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3861 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3862 /// exponent = 0, integer bit 1 ("pseudodenormal") 3863 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3864 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3865 uint64_t i1 = api.getRawData()[0]; 3866 uint64_t i2 = api.getRawData()[1]; 3867 uint64_t myexponent = (i2 & 0x7fff); 3868 uint64_t mysignificand = i1; 3869 uint8_t myintegerbit = mysignificand >> 63; 3870 3871 initialize(&semX87DoubleExtended); 3872 assert(partCount()==2); 3873 3874 sign = static_cast<unsigned int>(i2>>15); 3875 if (myexponent == 0 && mysignificand == 0) { 3876 makeZero(sign); 3877 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3878 makeInf(sign); 3879 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3880 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3881 category = fcNaN; 3882 exponent = exponentNaN(); 3883 significandParts()[0] = mysignificand; 3884 significandParts()[1] = 0; 3885 } else { 3886 category = fcNormal; 3887 exponent = myexponent - 16383; 3888 significandParts()[0] = mysignificand; 3889 significandParts()[1] = 0; 3890 if (myexponent==0) // denormal 3891 exponent = -16382; 3892 } 3893 } 3894 3895 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3896 uint64_t i1 = api.getRawData()[0]; 3897 uint64_t i2 = api.getRawData()[1]; 3898 opStatus fs; 3899 bool losesInfo; 3900 3901 // Get the first double and convert to our format. 3902 initFromDoubleAPInt(APInt(64, i1)); 3903 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3904 assert(fs == opOK && !losesInfo); 3905 (void)fs; 3906 3907 // Unless we have a special case, add in second double. 3908 if (isFiniteNonZero()) { 3909 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3910 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3911 assert(fs == opOK && !losesInfo); 3912 (void)fs; 3913 3914 add(v, rmNearestTiesToEven); 3915 } 3916 } 3917 3918 // The E8M0 format has the following characteristics: 3919 // It is an 8-bit unsigned format with only exponents (no actual significand). 3920 // No encodings for {zero, infinities or denorms}. 3921 // NaN is represented by all 1's. 3922 // Bias is 127. 3923 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) { 3924 const uint64_t exponent_mask = 0xff; 3925 uint64_t val = api.getRawData()[0]; 3926 uint64_t myexponent = (val & exponent_mask); 3927 3928 initialize(&semFloat8E8M0FNU); 3929 assert(partCount() == 1); 3930 3931 // This format has unsigned representation only 3932 sign = 0; 3933 3934 // Set the significand 3935 // This format does not have any significand but the 'Pth' precision bit is 3936 // always set to 1 for consistency in APFloat's internal representation. 3937 uint64_t mysignificand = 1; 3938 significandParts()[0] = mysignificand; 3939 3940 // This format can either have a NaN or fcNormal 3941 // All 1's i.e. 255 is a NaN 3942 if (val == exponent_mask) { 3943 category = fcNaN; 3944 exponent = exponentNaN(); 3945 return; 3946 } 3947 // Handle fcNormal... 3948 category = fcNormal; 3949 exponent = myexponent - 127; // 127 is bias 3950 } 3951 template <const fltSemantics &S> 3952 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3953 assert(api.getBitWidth() == S.sizeInBits); 3954 constexpr integerPart integer_bit = integerPart{1} 3955 << ((S.precision - 1) % integerPartWidth); 3956 constexpr uint64_t significand_mask = integer_bit - 1; 3957 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3958 constexpr unsigned int stored_significand_parts = 3959 partCountForBits(trailing_significand_bits); 3960 constexpr unsigned int exponent_bits = 3961 S.sizeInBits - 1 - trailing_significand_bits; 3962 static_assert(exponent_bits < 64); 3963 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3964 constexpr int bias = -(S.minExponent - 1); 3965 3966 // Copy the bits of the significand. We need to clear out the exponent and 3967 // sign bit in the last word. 3968 std::array<integerPart, stored_significand_parts> mysignificand; 3969 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3970 if constexpr (significand_mask != 0) { 3971 mysignificand[mysignificand.size() - 1] &= significand_mask; 3972 } 3973 3974 // We assume the last word holds the sign bit, the exponent, and potentially 3975 // some of the trailing significand field. 3976 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3977 uint64_t myexponent = 3978 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3979 3980 initialize(&S); 3981 assert(partCount() == mysignificand.size()); 3982 3983 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3984 3985 bool all_zero_significand = 3986 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3987 3988 bool is_zero = myexponent == 0 && all_zero_significand; 3989 3990 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3991 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 3992 makeInf(sign); 3993 return; 3994 } 3995 } 3996 3997 bool is_nan = false; 3998 3999 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 4000 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 4001 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 4002 bool all_ones_significand = 4003 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 4004 [](integerPart bits) { return bits == ~integerPart{0}; }) && 4005 (!significand_mask || 4006 mysignificand[mysignificand.size() - 1] == significand_mask); 4007 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 4008 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 4009 is_nan = is_zero && sign; 4010 } 4011 4012 if (is_nan) { 4013 category = fcNaN; 4014 exponent = ::exponentNaN(S); 4015 std::copy_n(mysignificand.begin(), mysignificand.size(), 4016 significandParts()); 4017 return; 4018 } 4019 4020 if (is_zero) { 4021 makeZero(sign); 4022 return; 4023 } 4024 4025 category = fcNormal; 4026 exponent = myexponent - bias; 4027 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 4028 if (myexponent == 0) // denormal 4029 exponent = S.minExponent; 4030 else 4031 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 4032 } 4033 4034 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 4035 initFromIEEEAPInt<semIEEEquad>(api); 4036 } 4037 4038 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 4039 initFromIEEEAPInt<semIEEEdouble>(api); 4040 } 4041 4042 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 4043 initFromIEEEAPInt<semIEEEsingle>(api); 4044 } 4045 4046 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 4047 initFromIEEEAPInt<semBFloat>(api); 4048 } 4049 4050 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 4051 initFromIEEEAPInt<semIEEEhalf>(api); 4052 } 4053 4054 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 4055 initFromIEEEAPInt<semFloat8E5M2>(api); 4056 } 4057 4058 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 4059 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 4060 } 4061 4062 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 4063 initFromIEEEAPInt<semFloat8E4M3>(api); 4064 } 4065 4066 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 4067 initFromIEEEAPInt<semFloat8E4M3FN>(api); 4068 } 4069 4070 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 4071 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 4072 } 4073 4074 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 4075 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 4076 } 4077 4078 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { 4079 initFromIEEEAPInt<semFloat8E3M4>(api); 4080 } 4081 4082 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 4083 initFromIEEEAPInt<semFloatTF32>(api); 4084 } 4085 4086 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 4087 initFromIEEEAPInt<semFloat6E3M2FN>(api); 4088 } 4089 4090 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 4091 initFromIEEEAPInt<semFloat6E2M3FN>(api); 4092 } 4093 4094 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 4095 initFromIEEEAPInt<semFloat4E2M1FN>(api); 4096 } 4097 4098 /// Treat api as containing the bits of a floating point number. 4099 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 4100 assert(api.getBitWidth() == Sem->sizeInBits); 4101 if (Sem == &semIEEEhalf) 4102 return initFromHalfAPInt(api); 4103 if (Sem == &semBFloat) 4104 return initFromBFloatAPInt(api); 4105 if (Sem == &semIEEEsingle) 4106 return initFromFloatAPInt(api); 4107 if (Sem == &semIEEEdouble) 4108 return initFromDoubleAPInt(api); 4109 if (Sem == &semX87DoubleExtended) 4110 return initFromF80LongDoubleAPInt(api); 4111 if (Sem == &semIEEEquad) 4112 return initFromQuadrupleAPInt(api); 4113 if (Sem == &semPPCDoubleDoubleLegacy) 4114 return initFromPPCDoubleDoubleAPInt(api); 4115 if (Sem == &semFloat8E5M2) 4116 return initFromFloat8E5M2APInt(api); 4117 if (Sem == &semFloat8E5M2FNUZ) 4118 return initFromFloat8E5M2FNUZAPInt(api); 4119 if (Sem == &semFloat8E4M3) 4120 return initFromFloat8E4M3APInt(api); 4121 if (Sem == &semFloat8E4M3FN) 4122 return initFromFloat8E4M3FNAPInt(api); 4123 if (Sem == &semFloat8E4M3FNUZ) 4124 return initFromFloat8E4M3FNUZAPInt(api); 4125 if (Sem == &semFloat8E4M3B11FNUZ) 4126 return initFromFloat8E4M3B11FNUZAPInt(api); 4127 if (Sem == &semFloat8E3M4) 4128 return initFromFloat8E3M4APInt(api); 4129 if (Sem == &semFloatTF32) 4130 return initFromFloatTF32APInt(api); 4131 if (Sem == &semFloat8E8M0FNU) 4132 return initFromFloat8E8M0FNUAPInt(api); 4133 if (Sem == &semFloat6E3M2FN) 4134 return initFromFloat6E3M2FNAPInt(api); 4135 if (Sem == &semFloat6E2M3FN) 4136 return initFromFloat6E2M3FNAPInt(api); 4137 if (Sem == &semFloat4E2M1FN) 4138 return initFromFloat4E2M1FNAPInt(api); 4139 4140 llvm_unreachable(nullptr); 4141 } 4142 4143 /// Make this number the largest magnitude normal number in the given 4144 /// semantics. 4145 void IEEEFloat::makeLargest(bool Negative) { 4146 if (Negative && !semantics->hasSignedRepr) 4147 llvm_unreachable( 4148 "This floating point format does not support signed values"); 4149 // We want (in interchange format): 4150 // sign = {Negative} 4151 // exponent = 1..10 4152 // significand = 1..1 4153 category = fcNormal; 4154 sign = Negative; 4155 exponent = semantics->maxExponent; 4156 4157 // Use memset to set all but the highest integerPart to all ones. 4158 integerPart *significand = significandParts(); 4159 unsigned PartCount = partCount(); 4160 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4161 4162 // Set the high integerPart especially setting all unused top bits for 4163 // internal consistency. 4164 const unsigned NumUnusedHighBits = 4165 PartCount*integerPartWidth - semantics->precision; 4166 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4167 ? (~integerPart(0) >> NumUnusedHighBits) 4168 : 0; 4169 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4170 semantics->nanEncoding == fltNanEncoding::AllOnes && 4171 (semantics->precision > 1)) 4172 significand[0] &= ~integerPart(1); 4173 } 4174 4175 /// Make this number the smallest magnitude denormal number in the given 4176 /// semantics. 4177 void IEEEFloat::makeSmallest(bool Negative) { 4178 if (Negative && !semantics->hasSignedRepr) 4179 llvm_unreachable( 4180 "This floating point format does not support signed values"); 4181 // We want (in interchange format): 4182 // sign = {Negative} 4183 // exponent = 0..0 4184 // significand = 0..01 4185 category = fcNormal; 4186 sign = Negative; 4187 exponent = semantics->minExponent; 4188 APInt::tcSet(significandParts(), 1, partCount()); 4189 } 4190 4191 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4192 if (Negative && !semantics->hasSignedRepr) 4193 llvm_unreachable( 4194 "This floating point format does not support signed values"); 4195 // We want (in interchange format): 4196 // sign = {Negative} 4197 // exponent = 0..0 4198 // significand = 10..0 4199 4200 category = fcNormal; 4201 zeroSignificand(); 4202 sign = Negative; 4203 exponent = semantics->minExponent; 4204 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4205 } 4206 4207 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4208 initFromAPInt(&Sem, API); 4209 } 4210 4211 IEEEFloat::IEEEFloat(float f) { 4212 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4213 } 4214 4215 IEEEFloat::IEEEFloat(double d) { 4216 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4217 } 4218 4219 namespace { 4220 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4221 Buffer.append(Str.begin(), Str.end()); 4222 } 4223 4224 /// Removes data from the given significand until it is no more 4225 /// precise than is required for the desired precision. 4226 void AdjustToPrecision(APInt &significand, 4227 int &exp, unsigned FormatPrecision) { 4228 unsigned bits = significand.getActiveBits(); 4229 4230 // 196/59 is a very slight overestimate of lg_2(10). 4231 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4232 4233 if (bits <= bitsRequired) return; 4234 4235 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4236 if (!tensRemovable) return; 4237 4238 exp += tensRemovable; 4239 4240 APInt divisor(significand.getBitWidth(), 1); 4241 APInt powten(significand.getBitWidth(), 10); 4242 while (true) { 4243 if (tensRemovable & 1) 4244 divisor *= powten; 4245 tensRemovable >>= 1; 4246 if (!tensRemovable) break; 4247 powten *= powten; 4248 } 4249 4250 significand = significand.udiv(divisor); 4251 4252 // Truncate the significand down to its active bit count. 4253 significand = significand.trunc(significand.getActiveBits()); 4254 } 4255 4256 4257 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4258 int &exp, unsigned FormatPrecision) { 4259 unsigned N = buffer.size(); 4260 if (N <= FormatPrecision) return; 4261 4262 // The most significant figures are the last ones in the buffer. 4263 unsigned FirstSignificant = N - FormatPrecision; 4264 4265 // Round. 4266 // FIXME: this probably shouldn't use 'round half up'. 4267 4268 // Rounding down is just a truncation, except we also want to drop 4269 // trailing zeros from the new result. 4270 if (buffer[FirstSignificant - 1] < '5') { 4271 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4272 FirstSignificant++; 4273 4274 exp += FirstSignificant; 4275 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4276 return; 4277 } 4278 4279 // Rounding up requires a decimal add-with-carry. If we continue 4280 // the carry, the newly-introduced zeros will just be truncated. 4281 for (unsigned I = FirstSignificant; I != N; ++I) { 4282 if (buffer[I] == '9') { 4283 FirstSignificant++; 4284 } else { 4285 buffer[I]++; 4286 break; 4287 } 4288 } 4289 4290 // If we carried through, we have exactly one digit of precision. 4291 if (FirstSignificant == N) { 4292 exp += FirstSignificant; 4293 buffer.clear(); 4294 buffer.push_back('1'); 4295 return; 4296 } 4297 4298 exp += FirstSignificant; 4299 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4300 } 4301 4302 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4303 APInt significand, unsigned FormatPrecision, 4304 unsigned FormatMaxPadding, bool TruncateZero) { 4305 const int semanticsPrecision = significand.getBitWidth(); 4306 4307 if (isNeg) 4308 Str.push_back('-'); 4309 4310 // Set FormatPrecision if zero. We want to do this before we 4311 // truncate trailing zeros, as those are part of the precision. 4312 if (!FormatPrecision) { 4313 // We use enough digits so the number can be round-tripped back to an 4314 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4315 // Accurately" by Steele and White. 4316 // FIXME: Using a formula based purely on the precision is conservative; 4317 // we can print fewer digits depending on the actual value being printed. 4318 4319 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4320 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4321 } 4322 4323 // Ignore trailing binary zeros. 4324 int trailingZeros = significand.countr_zero(); 4325 exp += trailingZeros; 4326 significand.lshrInPlace(trailingZeros); 4327 4328 // Change the exponent from 2^e to 10^e. 4329 if (exp == 0) { 4330 // Nothing to do. 4331 } else if (exp > 0) { 4332 // Just shift left. 4333 significand = significand.zext(semanticsPrecision + exp); 4334 significand <<= exp; 4335 exp = 0; 4336 } else { /* exp < 0 */ 4337 int texp = -exp; 4338 4339 // We transform this using the identity: 4340 // (N)(2^-e) == (N)(5^e)(10^-e) 4341 // This means we have to multiply N (the significand) by 5^e. 4342 // To avoid overflow, we have to operate on numbers large 4343 // enough to store N * 5^e: 4344 // log2(N * 5^e) == log2(N) + e * log2(5) 4345 // <= semantics->precision + e * 137 / 59 4346 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4347 4348 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4349 4350 // Multiply significand by 5^e. 4351 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4352 significand = significand.zext(precision); 4353 APInt five_to_the_i(precision, 5); 4354 while (true) { 4355 if (texp & 1) 4356 significand *= five_to_the_i; 4357 4358 texp >>= 1; 4359 if (!texp) 4360 break; 4361 five_to_the_i *= five_to_the_i; 4362 } 4363 } 4364 4365 AdjustToPrecision(significand, exp, FormatPrecision); 4366 4367 SmallVector<char, 256> buffer; 4368 4369 // Fill the buffer. 4370 unsigned precision = significand.getBitWidth(); 4371 if (precision < 4) { 4372 // We need enough precision to store the value 10. 4373 precision = 4; 4374 significand = significand.zext(precision); 4375 } 4376 APInt ten(precision, 10); 4377 APInt digit(precision, 0); 4378 4379 bool inTrail = true; 4380 while (significand != 0) { 4381 // digit <- significand % 10 4382 // significand <- significand / 10 4383 APInt::udivrem(significand, ten, significand, digit); 4384 4385 unsigned d = digit.getZExtValue(); 4386 4387 // Drop trailing zeros. 4388 if (inTrail && !d) 4389 exp++; 4390 else { 4391 buffer.push_back((char) ('0' + d)); 4392 inTrail = false; 4393 } 4394 } 4395 4396 assert(!buffer.empty() && "no characters in buffer!"); 4397 4398 // Drop down to FormatPrecision. 4399 // TODO: don't do more precise calculations above than are required. 4400 AdjustToPrecision(buffer, exp, FormatPrecision); 4401 4402 unsigned NDigits = buffer.size(); 4403 4404 // Check whether we should use scientific notation. 4405 bool FormatScientific; 4406 if (!FormatMaxPadding) 4407 FormatScientific = true; 4408 else { 4409 if (exp >= 0) { 4410 // 765e3 --> 765000 4411 // ^^^ 4412 // But we shouldn't make the number look more precise than it is. 4413 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4414 NDigits + (unsigned) exp > FormatPrecision); 4415 } else { 4416 // Power of the most significant digit. 4417 int MSD = exp + (int) (NDigits - 1); 4418 if (MSD >= 0) { 4419 // 765e-2 == 7.65 4420 FormatScientific = false; 4421 } else { 4422 // 765e-5 == 0.00765 4423 // ^ ^^ 4424 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4425 } 4426 } 4427 } 4428 4429 // Scientific formatting is pretty straightforward. 4430 if (FormatScientific) { 4431 exp += (NDigits - 1); 4432 4433 Str.push_back(buffer[NDigits-1]); 4434 Str.push_back('.'); 4435 if (NDigits == 1 && TruncateZero) 4436 Str.push_back('0'); 4437 else 4438 for (unsigned I = 1; I != NDigits; ++I) 4439 Str.push_back(buffer[NDigits-1-I]); 4440 // Fill with zeros up to FormatPrecision. 4441 if (!TruncateZero && FormatPrecision > NDigits - 1) 4442 Str.append(FormatPrecision - NDigits + 1, '0'); 4443 // For !TruncateZero we use lower 'e'. 4444 Str.push_back(TruncateZero ? 'E' : 'e'); 4445 4446 Str.push_back(exp >= 0 ? '+' : '-'); 4447 if (exp < 0) 4448 exp = -exp; 4449 SmallVector<char, 6> expbuf; 4450 do { 4451 expbuf.push_back((char) ('0' + (exp % 10))); 4452 exp /= 10; 4453 } while (exp); 4454 // Exponent always at least two digits if we do not truncate zeros. 4455 if (!TruncateZero && expbuf.size() < 2) 4456 expbuf.push_back('0'); 4457 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4458 Str.push_back(expbuf[E-1-I]); 4459 return; 4460 } 4461 4462 // Non-scientific, positive exponents. 4463 if (exp >= 0) { 4464 for (unsigned I = 0; I != NDigits; ++I) 4465 Str.push_back(buffer[NDigits-1-I]); 4466 for (unsigned I = 0; I != (unsigned) exp; ++I) 4467 Str.push_back('0'); 4468 return; 4469 } 4470 4471 // Non-scientific, negative exponents. 4472 4473 // The number of digits to the left of the decimal point. 4474 int NWholeDigits = exp + (int) NDigits; 4475 4476 unsigned I = 0; 4477 if (NWholeDigits > 0) { 4478 for (; I != (unsigned) NWholeDigits; ++I) 4479 Str.push_back(buffer[NDigits-I-1]); 4480 Str.push_back('.'); 4481 } else { 4482 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4483 4484 Str.push_back('0'); 4485 Str.push_back('.'); 4486 for (unsigned Z = 1; Z != NZeros; ++Z) 4487 Str.push_back('0'); 4488 } 4489 4490 for (; I != NDigits; ++I) 4491 Str.push_back(buffer[NDigits-I-1]); 4492 4493 } 4494 } // namespace 4495 4496 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4497 unsigned FormatMaxPadding, bool TruncateZero) const { 4498 switch (category) { 4499 case fcInfinity: 4500 if (isNegative()) 4501 return append(Str, "-Inf"); 4502 else 4503 return append(Str, "+Inf"); 4504 4505 case fcNaN: return append(Str, "NaN"); 4506 4507 case fcZero: 4508 if (isNegative()) 4509 Str.push_back('-'); 4510 4511 if (!FormatMaxPadding) { 4512 if (TruncateZero) 4513 append(Str, "0.0E+0"); 4514 else { 4515 append(Str, "0.0"); 4516 if (FormatPrecision > 1) 4517 Str.append(FormatPrecision - 1, '0'); 4518 append(Str, "e+00"); 4519 } 4520 } else 4521 Str.push_back('0'); 4522 return; 4523 4524 case fcNormal: 4525 break; 4526 } 4527 4528 // Decompose the number into an APInt and an exponent. 4529 int exp = exponent - ((int) semantics->precision - 1); 4530 APInt significand( 4531 semantics->precision, 4532 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4533 4534 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4535 FormatMaxPadding, TruncateZero); 4536 4537 } 4538 4539 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4540 // Special floats and denormals have no exact inverse. 4541 if (!isFiniteNonZero()) 4542 return false; 4543 4544 // Check that the number is a power of two by making sure that only the 4545 // integer bit is set in the significand. 4546 if (significandLSB() != semantics->precision - 1) 4547 return false; 4548 4549 // Get the inverse. 4550 IEEEFloat reciprocal(*semantics, 1ULL); 4551 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4552 return false; 4553 4554 // Avoid multiplication with a denormal, it is not safe on all platforms and 4555 // may be slower than a normal division. 4556 if (reciprocal.isDenormal()) 4557 return false; 4558 4559 assert(reciprocal.isFiniteNonZero() && 4560 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4561 4562 if (inv) 4563 *inv = APFloat(reciprocal, *semantics); 4564 4565 return true; 4566 } 4567 4568 int IEEEFloat::getExactLog2Abs() const { 4569 if (!isFinite() || isZero()) 4570 return INT_MIN; 4571 4572 const integerPart *Parts = significandParts(); 4573 const int PartCount = partCountForBits(semantics->precision); 4574 4575 int PopCount = 0; 4576 for (int i = 0; i < PartCount; ++i) { 4577 PopCount += llvm::popcount(Parts[i]); 4578 if (PopCount > 1) 4579 return INT_MIN; 4580 } 4581 4582 if (exponent != semantics->minExponent) 4583 return exponent; 4584 4585 int CountrParts = 0; 4586 for (int i = 0; i < PartCount; 4587 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4588 if (Parts[i] != 0) { 4589 return exponent - semantics->precision + CountrParts + 4590 llvm::countr_zero(Parts[i]) + 1; 4591 } 4592 } 4593 4594 llvm_unreachable("didn't find the set bit"); 4595 } 4596 4597 bool IEEEFloat::isSignaling() const { 4598 if (!isNaN()) 4599 return false; 4600 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4601 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4602 return false; 4603 4604 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4605 // first bit of the trailing significand being 0. 4606 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4607 } 4608 4609 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4610 /// 4611 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4612 /// appropriate sign switching before/after the computation. 4613 APFloat::opStatus IEEEFloat::next(bool nextDown) { 4614 // If we are performing nextDown, swap sign so we have -x. 4615 if (nextDown) 4616 changeSign(); 4617 4618 // Compute nextUp(x) 4619 opStatus result = opOK; 4620 4621 // Handle each float category separately. 4622 switch (category) { 4623 case fcInfinity: 4624 // nextUp(+inf) = +inf 4625 if (!isNegative()) 4626 break; 4627 // nextUp(-inf) = -getLargest() 4628 makeLargest(true); 4629 break; 4630 case fcNaN: 4631 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4632 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4633 // change the payload. 4634 if (isSignaling()) { 4635 result = opInvalidOp; 4636 // For consistency, propagate the sign of the sNaN to the qNaN. 4637 makeNaN(false, isNegative(), nullptr); 4638 } 4639 break; 4640 case fcZero: 4641 // nextUp(pm 0) = +getSmallest() 4642 makeSmallest(false); 4643 break; 4644 case fcNormal: 4645 // nextUp(-getSmallest()) = -0 4646 if (isSmallest() && isNegative()) { 4647 APInt::tcSet(significandParts(), 0, partCount()); 4648 category = fcZero; 4649 exponent = 0; 4650 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4651 sign = false; 4652 if (!semantics->hasZero) 4653 makeSmallestNormalized(false); 4654 break; 4655 } 4656 4657 if (isLargest() && !isNegative()) { 4658 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4659 // nextUp(getLargest()) == NAN 4660 makeNaN(); 4661 break; 4662 } else if (semantics->nonFiniteBehavior == 4663 fltNonfiniteBehavior::FiniteOnly) { 4664 // nextUp(getLargest()) == getLargest() 4665 break; 4666 } else { 4667 // nextUp(getLargest()) == INFINITY 4668 APInt::tcSet(significandParts(), 0, partCount()); 4669 category = fcInfinity; 4670 exponent = semantics->maxExponent + 1; 4671 break; 4672 } 4673 } 4674 4675 // nextUp(normal) == normal + inc. 4676 if (isNegative()) { 4677 // If we are negative, we need to decrement the significand. 4678 4679 // We only cross a binade boundary that requires adjusting the exponent 4680 // if: 4681 // 1. exponent != semantics->minExponent. This implies we are not in the 4682 // smallest binade or are dealing with denormals. 4683 // 2. Our significand excluding the integral bit is all zeros. 4684 bool WillCrossBinadeBoundary = 4685 exponent != semantics->minExponent && isSignificandAllZeros(); 4686 4687 // Decrement the significand. 4688 // 4689 // We always do this since: 4690 // 1. If we are dealing with a non-binade decrement, by definition we 4691 // just decrement the significand. 4692 // 2. If we are dealing with a normal -> normal binade decrement, since 4693 // we have an explicit integral bit the fact that all bits but the 4694 // integral bit are zero implies that subtracting one will yield a 4695 // significand with 0 integral bit and 1 in all other spots. Thus we 4696 // must just adjust the exponent and set the integral bit to 1. 4697 // 3. If we are dealing with a normal -> denormal binade decrement, 4698 // since we set the integral bit to 0 when we represent denormals, we 4699 // just decrement the significand. 4700 integerPart *Parts = significandParts(); 4701 APInt::tcDecrement(Parts, partCount()); 4702 4703 if (WillCrossBinadeBoundary) { 4704 // Our result is a normal number. Do the following: 4705 // 1. Set the integral bit to 1. 4706 // 2. Decrement the exponent. 4707 APInt::tcSetBit(Parts, semantics->precision - 1); 4708 exponent--; 4709 } 4710 } else { 4711 // If we are positive, we need to increment the significand. 4712 4713 // We only cross a binade boundary that requires adjusting the exponent if 4714 // the input is not a denormal and all of said input's significand bits 4715 // are set. If all of said conditions are true: clear the significand, set 4716 // the integral bit to 1, and increment the exponent. If we have a 4717 // denormal always increment since moving denormals and the numbers in the 4718 // smallest normal binade have the same exponent in our representation. 4719 // If there are only exponents, any increment always crosses the 4720 // BinadeBoundary. 4721 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) || 4722 (!isDenormal() && isSignificandAllOnes()); 4723 4724 if (WillCrossBinadeBoundary) { 4725 integerPart *Parts = significandParts(); 4726 APInt::tcSet(Parts, 0, partCount()); 4727 APInt::tcSetBit(Parts, semantics->precision - 1); 4728 assert(exponent != semantics->maxExponent && 4729 "We can not increment an exponent beyond the maxExponent allowed" 4730 " by the given floating point semantics."); 4731 exponent++; 4732 } else { 4733 incrementSignificand(); 4734 } 4735 } 4736 break; 4737 } 4738 4739 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4740 if (nextDown) 4741 changeSign(); 4742 4743 return result; 4744 } 4745 4746 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4747 return ::exponentNaN(*semantics); 4748 } 4749 4750 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4751 return ::exponentInf(*semantics); 4752 } 4753 4754 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4755 return ::exponentZero(*semantics); 4756 } 4757 4758 void IEEEFloat::makeInf(bool Negative) { 4759 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4760 llvm_unreachable("This floating point format does not support Inf"); 4761 4762 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4763 // There is no Inf, so make NaN instead. 4764 makeNaN(false, Negative); 4765 return; 4766 } 4767 category = fcInfinity; 4768 sign = Negative; 4769 exponent = exponentInf(); 4770 APInt::tcSet(significandParts(), 0, partCount()); 4771 } 4772 4773 void IEEEFloat::makeZero(bool Negative) { 4774 if (!semantics->hasZero) 4775 llvm_unreachable("This floating point format does not support Zero"); 4776 4777 category = fcZero; 4778 sign = Negative; 4779 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4780 // Merge negative zero to positive because 0b10000...000 is used for NaN 4781 sign = false; 4782 } 4783 exponent = exponentZero(); 4784 APInt::tcSet(significandParts(), 0, partCount()); 4785 } 4786 4787 void IEEEFloat::makeQuiet() { 4788 assert(isNaN()); 4789 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4790 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4791 } 4792 4793 int ilogb(const IEEEFloat &Arg) { 4794 if (Arg.isNaN()) 4795 return APFloat::IEK_NaN; 4796 if (Arg.isZero()) 4797 return APFloat::IEK_Zero; 4798 if (Arg.isInfinity()) 4799 return APFloat::IEK_Inf; 4800 if (!Arg.isDenormal()) 4801 return Arg.exponent; 4802 4803 IEEEFloat Normalized(Arg); 4804 int SignificandBits = Arg.getSemantics().precision - 1; 4805 4806 Normalized.exponent += SignificandBits; 4807 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero); 4808 return Normalized.exponent - SignificandBits; 4809 } 4810 4811 IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) { 4812 auto MaxExp = X.getSemantics().maxExponent; 4813 auto MinExp = X.getSemantics().minExponent; 4814 4815 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4816 // overflow; clamp it to a safe range before adding, but ensure that the range 4817 // is large enough that the clamp does not change the result. The range we 4818 // need to support is the difference between the largest possible exponent and 4819 // the normalized exponent of half the smallest denormal. 4820 4821 int SignificandBits = X.getSemantics().precision - 1; 4822 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4823 4824 // Clamp to one past the range ends to let normalize handle overlflow. 4825 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4826 X.normalize(RoundingMode, lfExactlyZero); 4827 if (X.isNaN()) 4828 X.makeQuiet(); 4829 return X; 4830 } 4831 4832 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) { 4833 Exp = ilogb(Val); 4834 4835 // Quiet signalling nans. 4836 if (Exp == APFloat::IEK_NaN) { 4837 IEEEFloat Quiet(Val); 4838 Quiet.makeQuiet(); 4839 return Quiet; 4840 } 4841 4842 if (Exp == APFloat::IEK_Inf) 4843 return Val; 4844 4845 // 1 is added because frexp is defined to return a normalized fraction in 4846 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4847 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1; 4848 return scalbn(Val, -Exp, RM); 4849 } 4850 4851 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4852 : Semantics(&S), 4853 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4854 assert(Semantics == &semPPCDoubleDouble); 4855 } 4856 4857 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4858 : Semantics(&S), 4859 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4860 APFloat(semIEEEdouble, uninitialized)}) { 4861 assert(Semantics == &semPPCDoubleDouble); 4862 } 4863 4864 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4865 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4866 APFloat(semIEEEdouble)}) { 4867 assert(Semantics == &semPPCDoubleDouble); 4868 } 4869 4870 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4871 : Semantics(&S), 4872 Floats(new APFloat[2]{ 4873 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4874 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4875 assert(Semantics == &semPPCDoubleDouble); 4876 } 4877 4878 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4879 APFloat &&Second) 4880 : Semantics(&S), 4881 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4882 assert(Semantics == &semPPCDoubleDouble); 4883 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4884 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4885 } 4886 4887 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4888 : Semantics(RHS.Semantics), 4889 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4890 APFloat(RHS.Floats[1])} 4891 : nullptr) { 4892 assert(Semantics == &semPPCDoubleDouble); 4893 } 4894 4895 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4896 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4897 RHS.Semantics = &semBogus; 4898 assert(Semantics == &semPPCDoubleDouble); 4899 } 4900 4901 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4902 if (Semantics == RHS.Semantics && RHS.Floats) { 4903 Floats[0] = RHS.Floats[0]; 4904 Floats[1] = RHS.Floats[1]; 4905 } else if (this != &RHS) { 4906 this->~DoubleAPFloat(); 4907 new (this) DoubleAPFloat(RHS); 4908 } 4909 return *this; 4910 } 4911 4912 // Implement addition, subtraction, multiplication and division based on: 4913 // "Software for Doubled-Precision Floating-Point Computations", 4914 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4915 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4916 const APFloat &c, const APFloat &cc, 4917 roundingMode RM) { 4918 int Status = opOK; 4919 APFloat z = a; 4920 Status |= z.add(c, RM); 4921 if (!z.isFinite()) { 4922 if (!z.isInfinity()) { 4923 Floats[0] = std::move(z); 4924 Floats[1].makeZero(/* Neg = */ false); 4925 return (opStatus)Status; 4926 } 4927 Status = opOK; 4928 auto AComparedToC = a.compareAbsoluteValue(c); 4929 z = cc; 4930 Status |= z.add(aa, RM); 4931 if (AComparedToC == APFloat::cmpGreaterThan) { 4932 // z = cc + aa + c + a; 4933 Status |= z.add(c, RM); 4934 Status |= z.add(a, RM); 4935 } else { 4936 // z = cc + aa + a + c; 4937 Status |= z.add(a, RM); 4938 Status |= z.add(c, RM); 4939 } 4940 if (!z.isFinite()) { 4941 Floats[0] = std::move(z); 4942 Floats[1].makeZero(/* Neg = */ false); 4943 return (opStatus)Status; 4944 } 4945 Floats[0] = z; 4946 APFloat zz = aa; 4947 Status |= zz.add(cc, RM); 4948 if (AComparedToC == APFloat::cmpGreaterThan) { 4949 // Floats[1] = a - z + c + zz; 4950 Floats[1] = a; 4951 Status |= Floats[1].subtract(z, RM); 4952 Status |= Floats[1].add(c, RM); 4953 Status |= Floats[1].add(zz, RM); 4954 } else { 4955 // Floats[1] = c - z + a + zz; 4956 Floats[1] = c; 4957 Status |= Floats[1].subtract(z, RM); 4958 Status |= Floats[1].add(a, RM); 4959 Status |= Floats[1].add(zz, RM); 4960 } 4961 } else { 4962 // q = a - z; 4963 APFloat q = a; 4964 Status |= q.subtract(z, RM); 4965 4966 // zz = q + c + (a - (q + z)) + aa + cc; 4967 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4968 auto zz = q; 4969 Status |= zz.add(c, RM); 4970 Status |= q.add(z, RM); 4971 Status |= q.subtract(a, RM); 4972 q.changeSign(); 4973 Status |= zz.add(q, RM); 4974 Status |= zz.add(aa, RM); 4975 Status |= zz.add(cc, RM); 4976 if (zz.isZero() && !zz.isNegative()) { 4977 Floats[0] = std::move(z); 4978 Floats[1].makeZero(/* Neg = */ false); 4979 return opOK; 4980 } 4981 Floats[0] = z; 4982 Status |= Floats[0].add(zz, RM); 4983 if (!Floats[0].isFinite()) { 4984 Floats[1].makeZero(/* Neg = */ false); 4985 return (opStatus)Status; 4986 } 4987 Floats[1] = std::move(z); 4988 Status |= Floats[1].subtract(Floats[0], RM); 4989 Status |= Floats[1].add(zz, RM); 4990 } 4991 return (opStatus)Status; 4992 } 4993 4994 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 4995 const DoubleAPFloat &RHS, 4996 DoubleAPFloat &Out, 4997 roundingMode RM) { 4998 if (LHS.getCategory() == fcNaN) { 4999 Out = LHS; 5000 return opOK; 5001 } 5002 if (RHS.getCategory() == fcNaN) { 5003 Out = RHS; 5004 return opOK; 5005 } 5006 if (LHS.getCategory() == fcZero) { 5007 Out = RHS; 5008 return opOK; 5009 } 5010 if (RHS.getCategory() == fcZero) { 5011 Out = LHS; 5012 return opOK; 5013 } 5014 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 5015 LHS.isNegative() != RHS.isNegative()) { 5016 Out.makeNaN(false, Out.isNegative(), nullptr); 5017 return opInvalidOp; 5018 } 5019 if (LHS.getCategory() == fcInfinity) { 5020 Out = LHS; 5021 return opOK; 5022 } 5023 if (RHS.getCategory() == fcInfinity) { 5024 Out = RHS; 5025 return opOK; 5026 } 5027 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 5028 5029 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 5030 CC(RHS.Floats[1]); 5031 assert(&A.getSemantics() == &semIEEEdouble); 5032 assert(&AA.getSemantics() == &semIEEEdouble); 5033 assert(&C.getSemantics() == &semIEEEdouble); 5034 assert(&CC.getSemantics() == &semIEEEdouble); 5035 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 5036 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 5037 return Out.addImpl(A, AA, C, CC, RM); 5038 } 5039 5040 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 5041 roundingMode RM) { 5042 return addWithSpecial(*this, RHS, *this, RM); 5043 } 5044 5045 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 5046 roundingMode RM) { 5047 changeSign(); 5048 auto Ret = add(RHS, RM); 5049 changeSign(); 5050 return Ret; 5051 } 5052 5053 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 5054 APFloat::roundingMode RM) { 5055 const auto &LHS = *this; 5056 auto &Out = *this; 5057 /* Interesting observation: For special categories, finding the lowest 5058 common ancestor of the following layered graph gives the correct 5059 return category: 5060 5061 NaN 5062 / \ 5063 Zero Inf 5064 \ / 5065 Normal 5066 5067 e.g. NaN * NaN = NaN 5068 Zero * Inf = NaN 5069 Normal * Zero = Zero 5070 Normal * Inf = Inf 5071 */ 5072 if (LHS.getCategory() == fcNaN) { 5073 Out = LHS; 5074 return opOK; 5075 } 5076 if (RHS.getCategory() == fcNaN) { 5077 Out = RHS; 5078 return opOK; 5079 } 5080 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 5081 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 5082 Out.makeNaN(false, false, nullptr); 5083 return opOK; 5084 } 5085 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 5086 Out = LHS; 5087 return opOK; 5088 } 5089 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 5090 Out = RHS; 5091 return opOK; 5092 } 5093 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 5094 "Special cases not handled exhaustively"); 5095 5096 int Status = opOK; 5097 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 5098 // t = a * c 5099 APFloat T = A; 5100 Status |= T.multiply(C, RM); 5101 if (!T.isFiniteNonZero()) { 5102 Floats[0] = T; 5103 Floats[1].makeZero(/* Neg = */ false); 5104 return (opStatus)Status; 5105 } 5106 5107 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 5108 APFloat Tau = A; 5109 T.changeSign(); 5110 Status |= Tau.fusedMultiplyAdd(C, T, RM); 5111 T.changeSign(); 5112 { 5113 // v = a * d 5114 APFloat V = A; 5115 Status |= V.multiply(D, RM); 5116 // w = b * c 5117 APFloat W = B; 5118 Status |= W.multiply(C, RM); 5119 Status |= V.add(W, RM); 5120 // tau += v + w 5121 Status |= Tau.add(V, RM); 5122 } 5123 // u = t + tau 5124 APFloat U = T; 5125 Status |= U.add(Tau, RM); 5126 5127 Floats[0] = U; 5128 if (!U.isFinite()) { 5129 Floats[1].makeZero(/* Neg = */ false); 5130 } else { 5131 // Floats[1] = (t - u) + tau 5132 Status |= T.subtract(U, RM); 5133 Status |= T.add(Tau, RM); 5134 Floats[1] = T; 5135 } 5136 return (opStatus)Status; 5137 } 5138 5139 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 5140 APFloat::roundingMode RM) { 5141 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5142 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5143 auto Ret = 5144 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 5145 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5146 return Ret; 5147 } 5148 5149 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 5150 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5151 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5152 auto Ret = 5153 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5154 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5155 return Ret; 5156 } 5157 5158 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 5159 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5160 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5161 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5162 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5163 return Ret; 5164 } 5165 5166 APFloat::opStatus 5167 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5168 const DoubleAPFloat &Addend, 5169 APFloat::roundingMode RM) { 5170 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5171 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5172 auto Ret = Tmp.fusedMultiplyAdd( 5173 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5174 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5175 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5176 return Ret; 5177 } 5178 5179 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5180 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5181 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5182 auto Ret = Tmp.roundToIntegral(RM); 5183 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5184 return Ret; 5185 } 5186 5187 void DoubleAPFloat::changeSign() { 5188 Floats[0].changeSign(); 5189 Floats[1].changeSign(); 5190 } 5191 5192 APFloat::cmpResult 5193 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5194 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5195 if (Result != cmpEqual) 5196 return Result; 5197 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5198 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5199 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5200 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5201 if (Against && !RHSAgainst) 5202 return cmpLessThan; 5203 if (!Against && RHSAgainst) 5204 return cmpGreaterThan; 5205 if (!Against && !RHSAgainst) 5206 return Result; 5207 if (Against && RHSAgainst) 5208 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5209 } 5210 return Result; 5211 } 5212 5213 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5214 return Floats[0].getCategory(); 5215 } 5216 5217 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5218 5219 void DoubleAPFloat::makeInf(bool Neg) { 5220 Floats[0].makeInf(Neg); 5221 Floats[1].makeZero(/* Neg = */ false); 5222 } 5223 5224 void DoubleAPFloat::makeZero(bool Neg) { 5225 Floats[0].makeZero(Neg); 5226 Floats[1].makeZero(/* Neg = */ false); 5227 } 5228 5229 void DoubleAPFloat::makeLargest(bool Neg) { 5230 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5231 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5232 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5233 if (Neg) 5234 changeSign(); 5235 } 5236 5237 void DoubleAPFloat::makeSmallest(bool Neg) { 5238 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5239 Floats[0].makeSmallest(Neg); 5240 Floats[1].makeZero(/* Neg = */ false); 5241 } 5242 5243 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5244 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5245 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5246 if (Neg) 5247 Floats[0].changeSign(); 5248 Floats[1].makeZero(/* Neg = */ false); 5249 } 5250 5251 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5252 Floats[0].makeNaN(SNaN, Neg, fill); 5253 Floats[1].makeZero(/* Neg = */ false); 5254 } 5255 5256 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5257 auto Result = Floats[0].compare(RHS.Floats[0]); 5258 // |Float[0]| > |Float[1]| 5259 if (Result == APFloat::cmpEqual) 5260 return Floats[1].compare(RHS.Floats[1]); 5261 return Result; 5262 } 5263 5264 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5265 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5266 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5267 } 5268 5269 hash_code hash_value(const DoubleAPFloat &Arg) { 5270 if (Arg.Floats) 5271 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5272 return hash_combine(Arg.Semantics); 5273 } 5274 5275 APInt DoubleAPFloat::bitcastToAPInt() const { 5276 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5277 uint64_t Data[] = { 5278 Floats[0].bitcastToAPInt().getRawData()[0], 5279 Floats[1].bitcastToAPInt().getRawData()[0], 5280 }; 5281 return APInt(128, 2, Data); 5282 } 5283 5284 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5285 roundingMode RM) { 5286 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5287 APFloat Tmp(semPPCDoubleDoubleLegacy); 5288 auto Ret = Tmp.convertFromString(S, RM); 5289 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5290 return Ret; 5291 } 5292 5293 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5294 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5295 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5296 auto Ret = Tmp.next(nextDown); 5297 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5298 return Ret; 5299 } 5300 5301 APFloat::opStatus 5302 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5303 unsigned int Width, bool IsSigned, 5304 roundingMode RM, bool *IsExact) const { 5305 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5306 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5307 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5308 } 5309 5310 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5311 bool IsSigned, 5312 roundingMode RM) { 5313 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5314 APFloat Tmp(semPPCDoubleDoubleLegacy); 5315 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5316 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5317 return Ret; 5318 } 5319 5320 APFloat::opStatus 5321 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5322 unsigned int InputSize, 5323 bool IsSigned, roundingMode RM) { 5324 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5325 APFloat Tmp(semPPCDoubleDoubleLegacy); 5326 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5327 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5328 return Ret; 5329 } 5330 5331 APFloat::opStatus 5332 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5333 unsigned int InputSize, 5334 bool IsSigned, roundingMode RM) { 5335 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5336 APFloat Tmp(semPPCDoubleDoubleLegacy); 5337 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5338 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5339 return Ret; 5340 } 5341 5342 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5343 unsigned int HexDigits, 5344 bool UpperCase, 5345 roundingMode RM) const { 5346 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5347 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5348 .convertToHexString(DST, HexDigits, UpperCase, RM); 5349 } 5350 5351 bool DoubleAPFloat::isDenormal() const { 5352 return getCategory() == fcNormal && 5353 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5354 // (double)(Hi + Lo) == Hi defines a normal number. 5355 Floats[0] != Floats[0] + Floats[1]); 5356 } 5357 5358 bool DoubleAPFloat::isSmallest() const { 5359 if (getCategory() != fcNormal) 5360 return false; 5361 DoubleAPFloat Tmp(*this); 5362 Tmp.makeSmallest(this->isNegative()); 5363 return Tmp.compare(*this) == cmpEqual; 5364 } 5365 5366 bool DoubleAPFloat::isSmallestNormalized() const { 5367 if (getCategory() != fcNormal) 5368 return false; 5369 5370 DoubleAPFloat Tmp(*this); 5371 Tmp.makeSmallestNormalized(this->isNegative()); 5372 return Tmp.compare(*this) == cmpEqual; 5373 } 5374 5375 bool DoubleAPFloat::isLargest() const { 5376 if (getCategory() != fcNormal) 5377 return false; 5378 DoubleAPFloat Tmp(*this); 5379 Tmp.makeLargest(this->isNegative()); 5380 return Tmp.compare(*this) == cmpEqual; 5381 } 5382 5383 bool DoubleAPFloat::isInteger() const { 5384 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5385 return Floats[0].isInteger() && Floats[1].isInteger(); 5386 } 5387 5388 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5389 unsigned FormatPrecision, 5390 unsigned FormatMaxPadding, 5391 bool TruncateZero) const { 5392 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5393 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5394 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5395 } 5396 5397 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5398 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5399 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5400 if (!inv) 5401 return Tmp.getExactInverse(nullptr); 5402 APFloat Inv(semPPCDoubleDoubleLegacy); 5403 auto Ret = Tmp.getExactInverse(&Inv); 5404 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5405 return Ret; 5406 } 5407 5408 int DoubleAPFloat::getExactLog2() const { 5409 // TODO: Implement me 5410 return INT_MIN; 5411 } 5412 5413 int DoubleAPFloat::getExactLog2Abs() const { 5414 // TODO: Implement me 5415 return INT_MIN; 5416 } 5417 5418 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5419 APFloat::roundingMode RM) { 5420 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5421 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5422 scalbn(Arg.Floats[1], Exp, RM)); 5423 } 5424 5425 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5426 APFloat::roundingMode RM) { 5427 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5428 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5429 APFloat Second = Arg.Floats[1]; 5430 if (Arg.getCategory() == APFloat::fcNormal) 5431 Second = scalbn(Second, -Exp, RM); 5432 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5433 } 5434 5435 } // namespace detail 5436 5437 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5438 if (usesLayout<IEEEFloat>(Semantics)) { 5439 new (&IEEE) IEEEFloat(std::move(F)); 5440 return; 5441 } 5442 if (usesLayout<DoubleAPFloat>(Semantics)) { 5443 const fltSemantics& S = F.getSemantics(); 5444 new (&Double) 5445 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5446 APFloat(semIEEEdouble)); 5447 return; 5448 } 5449 llvm_unreachable("Unexpected semantics"); 5450 } 5451 5452 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5453 roundingMode RM) { 5454 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5455 } 5456 5457 hash_code hash_value(const APFloat &Arg) { 5458 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5459 return hash_value(Arg.U.IEEE); 5460 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5461 return hash_value(Arg.U.Double); 5462 llvm_unreachable("Unexpected semantics"); 5463 } 5464 5465 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5466 : APFloat(Semantics) { 5467 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5468 assert(StatusOrErr && "Invalid floating point representation"); 5469 consumeError(StatusOrErr.takeError()); 5470 } 5471 5472 FPClassTest APFloat::classify() const { 5473 if (isZero()) 5474 return isNegative() ? fcNegZero : fcPosZero; 5475 if (isNormal()) 5476 return isNegative() ? fcNegNormal : fcPosNormal; 5477 if (isDenormal()) 5478 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5479 if (isInfinity()) 5480 return isNegative() ? fcNegInf : fcPosInf; 5481 assert(isNaN() && "Other class of FP constant"); 5482 return isSignaling() ? fcSNan : fcQNan; 5483 } 5484 5485 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5486 roundingMode RM, bool *losesInfo) { 5487 if (&getSemantics() == &ToSemantics) { 5488 *losesInfo = false; 5489 return opOK; 5490 } 5491 if (usesLayout<IEEEFloat>(getSemantics()) && 5492 usesLayout<IEEEFloat>(ToSemantics)) 5493 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5494 if (usesLayout<IEEEFloat>(getSemantics()) && 5495 usesLayout<DoubleAPFloat>(ToSemantics)) { 5496 assert(&ToSemantics == &semPPCDoubleDouble); 5497 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5498 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5499 return Ret; 5500 } 5501 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5502 usesLayout<IEEEFloat>(ToSemantics)) { 5503 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5504 *this = APFloat(std::move(getIEEE()), ToSemantics); 5505 return Ret; 5506 } 5507 llvm_unreachable("Unexpected semantics"); 5508 } 5509 5510 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5511 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5512 } 5513 5514 void APFloat::print(raw_ostream &OS) const { 5515 SmallVector<char, 16> Buffer; 5516 toString(Buffer); 5517 OS << Buffer; 5518 } 5519 5520 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5521 LLVM_DUMP_METHOD void APFloat::dump() const { 5522 print(dbgs()); 5523 dbgs() << '\n'; 5524 } 5525 #endif 5526 5527 void APFloat::Profile(FoldingSetNodeID &NID) const { 5528 NID.Add(bitcastToAPInt()); 5529 } 5530 5531 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5532 an APSInt, whose initial bit-width and signed-ness are used to determine the 5533 precision of the conversion. 5534 */ 5535 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5536 roundingMode rounding_mode, 5537 bool *isExact) const { 5538 unsigned bitWidth = result.getBitWidth(); 5539 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5540 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5541 rounding_mode, isExact); 5542 // Keeps the original signed-ness. 5543 result = APInt(bitWidth, parts); 5544 return status; 5545 } 5546 5547 double APFloat::convertToDouble() const { 5548 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5549 return getIEEE().convertToDouble(); 5550 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5551 "Float semantics is not representable by IEEEdouble"); 5552 APFloat Temp = *this; 5553 bool LosesInfo; 5554 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5555 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5556 (void)St; 5557 return Temp.getIEEE().convertToDouble(); 5558 } 5559 5560 #ifdef HAS_IEE754_FLOAT128 5561 float128 APFloat::convertToQuad() const { 5562 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5563 return getIEEE().convertToQuad(); 5564 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5565 "Float semantics is not representable by IEEEquad"); 5566 APFloat Temp = *this; 5567 bool LosesInfo; 5568 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5569 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5570 (void)St; 5571 return Temp.getIEEE().convertToQuad(); 5572 } 5573 #endif 5574 5575 float APFloat::convertToFloat() const { 5576 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5577 return getIEEE().convertToFloat(); 5578 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5579 "Float semantics is not representable by IEEEsingle"); 5580 APFloat Temp = *this; 5581 bool LosesInfo; 5582 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5583 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5584 (void)St; 5585 return Temp.getIEEE().convertToFloat(); 5586 } 5587 5588 } // namespace llvm 5589 5590 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5591