1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision floating 10 // point values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/FloatingPointMode.h" 18 #include "llvm/ADT/FoldingSet.h" 19 #include "llvm/ADT/Hashing.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/StringExtras.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/Config/llvm-config.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cstring> 29 #include <limits.h> 30 31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \ 32 do { \ 33 if (usesLayout<IEEEFloat>(getSemantics())) \ 34 return U.IEEE.METHOD_CALL; \ 35 if (usesLayout<DoubleAPFloat>(getSemantics())) \ 36 return U.Double.METHOD_CALL; \ 37 llvm_unreachable("Unexpected semantics"); \ 38 } while (false) 39 40 using namespace llvm; 41 42 /// A macro used to combine two fcCategory enums into one key which can be used 43 /// in a switch statement to classify how the interaction of two APFloat's 44 /// categories affects an operation. 45 /// 46 /// TODO: If clang source code is ever allowed to use constexpr in its own 47 /// codebase, change this into a static inline function. 48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 49 50 /* Assumed in hexadecimal significand parsing, and conversion to 51 hexadecimal strings. */ 52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 53 54 namespace llvm { 55 56 // How the nonfinite values Inf and NaN are represented. 57 enum class fltNonfiniteBehavior { 58 // Represents standard IEEE 754 behavior. A value is nonfinite if the 59 // exponent field is all 1s. In such cases, a value is Inf if the 60 // significand bits are all zero, and NaN otherwise 61 IEEE754, 62 63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, 64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no 65 // representation for Inf, and operations that would ordinarily produce Inf 66 // produce NaN instead. 67 // The details of the NaN representation(s) in this form are determined by the 68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available 69 // encodings do not distinguish between signalling and quiet NaN. 70 NanOnly, 71 72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and 73 // Float4E2M1FN types, which do not support Inf or NaN values. 74 FiniteOnly, 75 }; 76 77 // How NaN values are represented. This is curently only used in combination 78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE 79 // while having IEEE non-finite behavior is liable to lead to unexpected 80 // results. 81 enum class fltNanEncoding { 82 // Represents the standard IEEE behavior where a value is NaN if its 83 // exponent is all 1s and the significand is non-zero. 84 IEEE, 85 86 // Represents the behavior in the Float8E4M3FN floating point type where NaN 87 // is represented by having the exponent and mantissa set to all 1s. 88 // This behavior matches the FP8 E4M3 type described in 89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs 90 // as non-signalling, although the paper does not state whether the NaN 91 // values are signalling or not. 92 AllOnes, 93 94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types 95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent 96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since 97 // there is only one NaN value, it is treated as quiet NaN. This matches the 98 // behavior described in https://arxiv.org/abs/2206.02915 . 99 NegativeZero, 100 }; 101 102 /* Represents floating point arithmetic semantics. */ 103 struct fltSemantics { 104 /* The largest E such that 2^E is representable; this matches the 105 definition of IEEE 754. */ 106 APFloatBase::ExponentType maxExponent; 107 108 /* The smallest E such that 2^E is a normalized number; this 109 matches the definition of IEEE 754. */ 110 APFloatBase::ExponentType minExponent; 111 112 /* Number of bits in the significand. This includes the integer 113 bit. */ 114 unsigned int precision; 115 116 /* Number of bits actually used in the semantics. */ 117 unsigned int sizeInBits; 118 119 fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; 120 121 fltNanEncoding nanEncoding = fltNanEncoding::IEEE; 122 123 /* Whether this semantics has an encoding for Zero */ 124 bool hasZero = true; 125 126 /* Whether this semantics can represent signed values */ 127 bool hasSignedRepr = true; 128 129 // Returns true if any number described by this semantics can be precisely 130 // represented by the specified semantics. Does not take into account 131 // the value of fltNonfiniteBehavior. 132 bool isRepresentableBy(const fltSemantics &S) const { 133 return maxExponent <= S.maxExponent && minExponent >= S.minExponent && 134 precision <= S.precision; 135 } 136 }; 137 138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; 139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; 140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; 141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; 142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; 143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; 144 static constexpr fltSemantics semFloat8E5M2FNUZ = { 145 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8}; 147 static constexpr fltSemantics semFloat8E4M3FN = { 148 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; 149 static constexpr fltSemantics semFloat8E4M3FNUZ = { 150 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 152 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; 153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8}; 154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; 155 static constexpr fltSemantics semFloat8E8M0FNU = { 156 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes, 157 false, false}; 158 159 static constexpr fltSemantics semFloat6E3M2FN = { 160 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; 161 static constexpr fltSemantics semFloat6E2M3FN = { 162 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; 163 static constexpr fltSemantics semFloat4E2M1FN = { 164 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; 165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; 166 static constexpr fltSemantics semBogus = {0, 0, 0, 0}; 167 168 /* The IBM double-double semantics. Such a number consists of a pair of IEEE 169 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, 170 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. 171 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent 172 to each other, and two 11-bit exponents. 173 174 Note: we need to make the value different from semBogus as otherwise 175 an unsafe optimization may collapse both values to a single address, 176 and we heavily rely on them having distinct addresses. */ 177 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; 178 179 /* These are legacy semantics for the fallback, inaccrurate implementation of 180 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the 181 operation. It's equivalent to having an IEEE number with consecutive 106 182 bits of mantissa and 11 bits of exponent. 183 184 It's not equivalent to IBM double-double. For example, a legit IBM 185 double-double, 1 + epsilon: 186 187 1 + epsilon = 1 + (1 >> 1076) 188 189 is not representable by a consecutive 106 bits of mantissa. 190 191 Currently, these semantics are used in the following way: 192 193 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> 194 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> 195 semPPCDoubleDoubleLegacy -> IEEE operations 196 197 We use bitcastToAPInt() to get the bit representation (in APInt) of the 198 underlying IEEEdouble, then use the APInt constructor to construct the 199 legacy IEEE float. 200 201 TODO: Implement all operations in semPPCDoubleDouble, and delete these 202 semantics. */ 203 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, 204 53 + 53, 128}; 205 206 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { 207 switch (S) { 208 case S_IEEEhalf: 209 return IEEEhalf(); 210 case S_BFloat: 211 return BFloat(); 212 case S_IEEEsingle: 213 return IEEEsingle(); 214 case S_IEEEdouble: 215 return IEEEdouble(); 216 case S_IEEEquad: 217 return IEEEquad(); 218 case S_PPCDoubleDouble: 219 return PPCDoubleDouble(); 220 case S_Float8E5M2: 221 return Float8E5M2(); 222 case S_Float8E5M2FNUZ: 223 return Float8E5M2FNUZ(); 224 case S_Float8E4M3: 225 return Float8E4M3(); 226 case S_Float8E4M3FN: 227 return Float8E4M3FN(); 228 case S_Float8E4M3FNUZ: 229 return Float8E4M3FNUZ(); 230 case S_Float8E4M3B11FNUZ: 231 return Float8E4M3B11FNUZ(); 232 case S_Float8E3M4: 233 return Float8E3M4(); 234 case S_FloatTF32: 235 return FloatTF32(); 236 case S_Float8E8M0FNU: 237 return Float8E8M0FNU(); 238 case S_Float6E3M2FN: 239 return Float6E3M2FN(); 240 case S_Float6E2M3FN: 241 return Float6E2M3FN(); 242 case S_Float4E2M1FN: 243 return Float4E2M1FN(); 244 case S_x87DoubleExtended: 245 return x87DoubleExtended(); 246 } 247 llvm_unreachable("Unrecognised floating semantics"); 248 } 249 250 APFloatBase::Semantics 251 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { 252 if (&Sem == &llvm::APFloat::IEEEhalf()) 253 return S_IEEEhalf; 254 else if (&Sem == &llvm::APFloat::BFloat()) 255 return S_BFloat; 256 else if (&Sem == &llvm::APFloat::IEEEsingle()) 257 return S_IEEEsingle; 258 else if (&Sem == &llvm::APFloat::IEEEdouble()) 259 return S_IEEEdouble; 260 else if (&Sem == &llvm::APFloat::IEEEquad()) 261 return S_IEEEquad; 262 else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) 263 return S_PPCDoubleDouble; 264 else if (&Sem == &llvm::APFloat::Float8E5M2()) 265 return S_Float8E5M2; 266 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) 267 return S_Float8E5M2FNUZ; 268 else if (&Sem == &llvm::APFloat::Float8E4M3()) 269 return S_Float8E4M3; 270 else if (&Sem == &llvm::APFloat::Float8E4M3FN()) 271 return S_Float8E4M3FN; 272 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) 273 return S_Float8E4M3FNUZ; 274 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) 275 return S_Float8E4M3B11FNUZ; 276 else if (&Sem == &llvm::APFloat::Float8E3M4()) 277 return S_Float8E3M4; 278 else if (&Sem == &llvm::APFloat::FloatTF32()) 279 return S_FloatTF32; 280 else if (&Sem == &llvm::APFloat::Float8E8M0FNU()) 281 return S_Float8E8M0FNU; 282 else if (&Sem == &llvm::APFloat::Float6E3M2FN()) 283 return S_Float6E3M2FN; 284 else if (&Sem == &llvm::APFloat::Float6E2M3FN()) 285 return S_Float6E2M3FN; 286 else if (&Sem == &llvm::APFloat::Float4E2M1FN()) 287 return S_Float4E2M1FN; 288 else if (&Sem == &llvm::APFloat::x87DoubleExtended()) 289 return S_x87DoubleExtended; 290 else 291 llvm_unreachable("Unknown floating semantics"); 292 } 293 294 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } 295 const fltSemantics &APFloatBase::BFloat() { return semBFloat; } 296 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } 297 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } 298 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } 299 const fltSemantics &APFloatBase::PPCDoubleDouble() { 300 return semPPCDoubleDouble; 301 } 302 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } 303 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } 304 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; } 305 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } 306 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } 307 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { 308 return semFloat8E4M3B11FNUZ; 309 } 310 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; } 311 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } 312 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; } 313 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } 314 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } 315 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } 316 const fltSemantics &APFloatBase::x87DoubleExtended() { 317 return semX87DoubleExtended; 318 } 319 const fltSemantics &APFloatBase::Bogus() { return semBogus; } 320 321 constexpr RoundingMode APFloatBase::rmNearestTiesToEven; 322 constexpr RoundingMode APFloatBase::rmTowardPositive; 323 constexpr RoundingMode APFloatBase::rmTowardNegative; 324 constexpr RoundingMode APFloatBase::rmTowardZero; 325 constexpr RoundingMode APFloatBase::rmNearestTiesToAway; 326 327 /* A tight upper bound on number of parts required to hold the value 328 pow(5, power) is 329 330 power * 815 / (351 * integerPartWidth) + 1 331 332 However, whilst the result may require only this many parts, 333 because we are multiplying two values to get it, the 334 multiplication may require an extra part with the excess part 335 being zero (consider the trivial case of 1 * 1, tcFullMultiply 336 requires two parts to hold the single-part result). So we add an 337 extra one to guarantee enough space whilst multiplying. */ 338 const unsigned int maxExponent = 16383; 339 const unsigned int maxPrecision = 113; 340 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 341 const unsigned int maxPowerOfFiveParts = 342 2 + 343 ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); 344 345 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { 346 return semantics.precision; 347 } 348 APFloatBase::ExponentType 349 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { 350 return semantics.maxExponent; 351 } 352 APFloatBase::ExponentType 353 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { 354 return semantics.minExponent; 355 } 356 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { 357 return semantics.sizeInBits; 358 } 359 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, 360 bool isSigned) { 361 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need 362 // at least one more bit than the MaxExponent to hold the max FP value. 363 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; 364 // Extra sign bit needed. 365 if (isSigned) 366 ++MinBitWidth; 367 return MinBitWidth; 368 } 369 370 bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) { 371 return semantics.hasZero; 372 } 373 374 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) { 375 return semantics.hasSignedRepr; 376 } 377 378 bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) { 379 return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754; 380 } 381 382 bool APFloatBase::semanticsHasNaN(const fltSemantics &semantics) { 383 return semantics.nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly; 384 } 385 386 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, 387 const fltSemantics &Dst) { 388 // Exponent range must be larger. 389 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) 390 return false; 391 392 // If the mantissa is long enough, the result value could still be denormal 393 // with a larger exponent range. 394 // 395 // FIXME: This condition is probably not accurate but also shouldn't be a 396 // practical concern with existing types. 397 return Dst.precision >= Src.precision; 398 } 399 400 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { 401 return Sem.sizeInBits; 402 } 403 404 static constexpr APFloatBase::ExponentType 405 exponentZero(const fltSemantics &semantics) { 406 return semantics.minExponent - 1; 407 } 408 409 static constexpr APFloatBase::ExponentType 410 exponentInf(const fltSemantics &semantics) { 411 return semantics.maxExponent + 1; 412 } 413 414 static constexpr APFloatBase::ExponentType 415 exponentNaN(const fltSemantics &semantics) { 416 if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 417 if (semantics.nanEncoding == fltNanEncoding::NegativeZero) 418 return exponentZero(semantics); 419 if (semantics.hasSignedRepr) 420 return semantics.maxExponent; 421 } 422 return semantics.maxExponent + 1; 423 } 424 425 /* A bunch of private, handy routines. */ 426 427 static inline Error createError(const Twine &Err) { 428 return make_error<StringError>(Err, inconvertibleErrorCode()); 429 } 430 431 static constexpr inline unsigned int partCountForBits(unsigned int bits) { 432 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) / 433 APFloatBase::integerPartWidth); 434 } 435 436 /* Returns 0U-9U. Return values >= 10U are not digits. */ 437 static inline unsigned int 438 decDigitValue(unsigned int c) 439 { 440 return c - '0'; 441 } 442 443 /* Return the value of a decimal exponent of the form 444 [+-]ddddddd. 445 446 If the exponent overflows, returns a large exponent with the 447 appropriate sign. */ 448 static Expected<int> readExponent(StringRef::iterator begin, 449 StringRef::iterator end) { 450 bool isNegative; 451 unsigned int absExponent; 452 const unsigned int overlargeExponent = 24000; /* FIXME. */ 453 StringRef::iterator p = begin; 454 455 // Treat no exponent as 0 to match binutils 456 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) { 457 return 0; 458 } 459 460 isNegative = (*p == '-'); 461 if (*p == '-' || *p == '+') { 462 p++; 463 if (p == end) 464 return createError("Exponent has no digits"); 465 } 466 467 absExponent = decDigitValue(*p++); 468 if (absExponent >= 10U) 469 return createError("Invalid character in exponent"); 470 471 for (; p != end; ++p) { 472 unsigned int value; 473 474 value = decDigitValue(*p); 475 if (value >= 10U) 476 return createError("Invalid character in exponent"); 477 478 absExponent = absExponent * 10U + value; 479 if (absExponent >= overlargeExponent) { 480 absExponent = overlargeExponent; 481 break; 482 } 483 } 484 485 if (isNegative) 486 return -(int) absExponent; 487 else 488 return (int) absExponent; 489 } 490 491 /* This is ugly and needs cleaning up, but I don't immediately see 492 how whilst remaining safe. */ 493 static Expected<int> totalExponent(StringRef::iterator p, 494 StringRef::iterator end, 495 int exponentAdjustment) { 496 int unsignedExponent; 497 bool negative, overflow; 498 int exponent = 0; 499 500 if (p == end) 501 return createError("Exponent has no digits"); 502 503 negative = *p == '-'; 504 if (*p == '-' || *p == '+') { 505 p++; 506 if (p == end) 507 return createError("Exponent has no digits"); 508 } 509 510 unsignedExponent = 0; 511 overflow = false; 512 for (; p != end; ++p) { 513 unsigned int value; 514 515 value = decDigitValue(*p); 516 if (value >= 10U) 517 return createError("Invalid character in exponent"); 518 519 unsignedExponent = unsignedExponent * 10 + value; 520 if (unsignedExponent > 32767) { 521 overflow = true; 522 break; 523 } 524 } 525 526 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 527 overflow = true; 528 529 if (!overflow) { 530 exponent = unsignedExponent; 531 if (negative) 532 exponent = -exponent; 533 exponent += exponentAdjustment; 534 if (exponent > 32767 || exponent < -32768) 535 overflow = true; 536 } 537 538 if (overflow) 539 exponent = negative ? -32768: 32767; 540 541 return exponent; 542 } 543 544 static Expected<StringRef::iterator> 545 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 546 StringRef::iterator *dot) { 547 StringRef::iterator p = begin; 548 *dot = end; 549 while (p != end && *p == '0') 550 p++; 551 552 if (p != end && *p == '.') { 553 *dot = p++; 554 555 if (end - begin == 1) 556 return createError("Significand has no digits"); 557 558 while (p != end && *p == '0') 559 p++; 560 } 561 562 return p; 563 } 564 565 /* Given a normal decimal floating point number of the form 566 567 dddd.dddd[eE][+-]ddd 568 569 where the decimal point and exponent are optional, fill out the 570 structure D. Exponent is appropriate if the significand is 571 treated as an integer, and normalizedExponent if the significand 572 is taken to have the decimal point after a single leading 573 non-zero digit. 574 575 If the value is zero, V->firstSigDigit points to a non-digit, and 576 the return exponent is zero. 577 */ 578 struct decimalInfo { 579 const char *firstSigDigit; 580 const char *lastSigDigit; 581 int exponent; 582 int normalizedExponent; 583 }; 584 585 static Error interpretDecimal(StringRef::iterator begin, 586 StringRef::iterator end, decimalInfo *D) { 587 StringRef::iterator dot = end; 588 589 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 590 if (!PtrOrErr) 591 return PtrOrErr.takeError(); 592 StringRef::iterator p = *PtrOrErr; 593 594 D->firstSigDigit = p; 595 D->exponent = 0; 596 D->normalizedExponent = 0; 597 598 for (; p != end; ++p) { 599 if (*p == '.') { 600 if (dot != end) 601 return createError("String contains multiple dots"); 602 dot = p++; 603 if (p == end) 604 break; 605 } 606 if (decDigitValue(*p) >= 10U) 607 break; 608 } 609 610 if (p != end) { 611 if (*p != 'e' && *p != 'E') 612 return createError("Invalid character in significand"); 613 if (p == begin) 614 return createError("Significand has no digits"); 615 if (dot != end && p - begin == 1) 616 return createError("Significand has no digits"); 617 618 /* p points to the first non-digit in the string */ 619 auto ExpOrErr = readExponent(p + 1, end); 620 if (!ExpOrErr) 621 return ExpOrErr.takeError(); 622 D->exponent = *ExpOrErr; 623 624 /* Implied decimal point? */ 625 if (dot == end) 626 dot = p; 627 } 628 629 /* If number is all zeroes accept any exponent. */ 630 if (p != D->firstSigDigit) { 631 /* Drop insignificant trailing zeroes. */ 632 if (p != begin) { 633 do 634 do 635 p--; 636 while (p != begin && *p == '0'); 637 while (p != begin && *p == '.'); 638 } 639 640 /* Adjust the exponents for any decimal point. */ 641 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 642 D->normalizedExponent = (D->exponent + 643 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 644 - (dot > D->firstSigDigit && dot < p))); 645 } 646 647 D->lastSigDigit = p; 648 return Error::success(); 649 } 650 651 /* Return the trailing fraction of a hexadecimal number. 652 DIGITVALUE is the first hex digit of the fraction, P points to 653 the next digit. */ 654 static Expected<lostFraction> 655 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 656 unsigned int digitValue) { 657 unsigned int hexDigit; 658 659 /* If the first trailing digit isn't 0 or 8 we can work out the 660 fraction immediately. */ 661 if (digitValue > 8) 662 return lfMoreThanHalf; 663 else if (digitValue < 8 && digitValue > 0) 664 return lfLessThanHalf; 665 666 // Otherwise we need to find the first non-zero digit. 667 while (p != end && (*p == '0' || *p == '.')) 668 p++; 669 670 if (p == end) 671 return createError("Invalid trailing hexadecimal fraction!"); 672 673 hexDigit = hexDigitValue(*p); 674 675 /* If we ran off the end it is exactly zero or one-half, otherwise 676 a little more. */ 677 if (hexDigit == UINT_MAX) 678 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 679 else 680 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 681 } 682 683 /* Return the fraction lost were a bignum truncated losing the least 684 significant BITS bits. */ 685 static lostFraction 686 lostFractionThroughTruncation(const APFloatBase::integerPart *parts, 687 unsigned int partCount, 688 unsigned int bits) 689 { 690 unsigned int lsb; 691 692 lsb = APInt::tcLSB(parts, partCount); 693 694 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ 695 if (bits <= lsb) 696 return lfExactlyZero; 697 if (bits == lsb + 1) 698 return lfExactlyHalf; 699 if (bits <= partCount * APFloatBase::integerPartWidth && 700 APInt::tcExtractBit(parts, bits - 1)) 701 return lfMoreThanHalf; 702 703 return lfLessThanHalf; 704 } 705 706 /* Shift DST right BITS bits noting lost fraction. */ 707 static lostFraction 708 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits) 709 { 710 lostFraction lost_fraction; 711 712 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 713 714 APInt::tcShiftRight(dst, parts, bits); 715 716 return lost_fraction; 717 } 718 719 /* Combine the effect of two lost fractions. */ 720 static lostFraction 721 combineLostFractions(lostFraction moreSignificant, 722 lostFraction lessSignificant) 723 { 724 if (lessSignificant != lfExactlyZero) { 725 if (moreSignificant == lfExactlyZero) 726 moreSignificant = lfLessThanHalf; 727 else if (moreSignificant == lfExactlyHalf) 728 moreSignificant = lfMoreThanHalf; 729 } 730 731 return moreSignificant; 732 } 733 734 /* The error from the true value, in half-ulps, on multiplying two 735 floating point numbers, which differ from the value they 736 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 737 than the returned value. 738 739 See "How to Read Floating Point Numbers Accurately" by William D 740 Clinger. */ 741 static unsigned int 742 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 743 { 744 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 745 746 if (HUerr1 + HUerr2 == 0) 747 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 748 else 749 return inexactMultiply + 2 * (HUerr1 + HUerr2); 750 } 751 752 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 753 when the least significant BITS are truncated. BITS cannot be 754 zero. */ 755 static APFloatBase::integerPart 756 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, 757 bool isNearest) { 758 unsigned int count, partBits; 759 APFloatBase::integerPart part, boundary; 760 761 assert(bits != 0); 762 763 bits--; 764 count = bits / APFloatBase::integerPartWidth; 765 partBits = bits % APFloatBase::integerPartWidth + 1; 766 767 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits)); 768 769 if (isNearest) 770 boundary = (APFloatBase::integerPart) 1 << (partBits - 1); 771 else 772 boundary = 0; 773 774 if (count == 0) { 775 if (part - boundary <= boundary - part) 776 return part - boundary; 777 else 778 return boundary - part; 779 } 780 781 if (part == boundary) { 782 while (--count) 783 if (parts[count]) 784 return ~(APFloatBase::integerPart) 0; /* A lot. */ 785 786 return parts[0]; 787 } else if (part == boundary - 1) { 788 while (--count) 789 if (~parts[count]) 790 return ~(APFloatBase::integerPart) 0; /* A lot. */ 791 792 return -parts[0]; 793 } 794 795 return ~(APFloatBase::integerPart) 0; /* A lot. */ 796 } 797 798 /* Place pow(5, power) in DST, and return the number of parts used. 799 DST must be at least one part larger than size of the answer. */ 800 static unsigned int 801 powerOf5(APFloatBase::integerPart *dst, unsigned int power) { 802 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 }; 803 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 804 pow5s[0] = 78125 * 5; 805 806 unsigned int partsCount = 1; 807 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 808 unsigned int result; 809 assert(power <= maxExponent); 810 811 p1 = dst; 812 p2 = scratch; 813 814 *p1 = firstEightPowers[power & 7]; 815 power >>= 3; 816 817 result = 1; 818 pow5 = pow5s; 819 820 for (unsigned int n = 0; power; power >>= 1, n++) { 821 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 822 if (n != 0) { 823 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount, 824 partsCount, partsCount); 825 partsCount *= 2; 826 if (pow5[partsCount - 1] == 0) 827 partsCount--; 828 } 829 830 if (power & 1) { 831 APFloatBase::integerPart *tmp; 832 833 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount); 834 result += partsCount; 835 if (p2[result - 1] == 0) 836 result--; 837 838 /* Now result is in p1 with partsCount parts and p2 is scratch 839 space. */ 840 tmp = p1; 841 p1 = p2; 842 p2 = tmp; 843 } 844 845 pow5 += partsCount; 846 } 847 848 if (p1 != dst) 849 APInt::tcAssign(dst, p1, result); 850 851 return result; 852 } 853 854 /* Zero at the end to avoid modular arithmetic when adding one; used 855 when rounding up during hexadecimal output. */ 856 static const char hexDigitsLower[] = "0123456789abcdef0"; 857 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 858 static const char infinityL[] = "infinity"; 859 static const char infinityU[] = "INFINITY"; 860 static const char NaNL[] = "nan"; 861 static const char NaNU[] = "NAN"; 862 863 /* Write out an integerPart in hexadecimal, starting with the most 864 significant nibble. Write out exactly COUNT hexdigits, return 865 COUNT. */ 866 static unsigned int 867 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count, 868 const char *hexDigitChars) 869 { 870 unsigned int result = count; 871 872 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4); 873 874 part >>= (APFloatBase::integerPartWidth - 4 * count); 875 while (count--) { 876 dst[count] = hexDigitChars[part & 0xf]; 877 part >>= 4; 878 } 879 880 return result; 881 } 882 883 /* Write out an unsigned decimal integer. */ 884 static char * 885 writeUnsignedDecimal (char *dst, unsigned int n) 886 { 887 char buff[40], *p; 888 889 p = buff; 890 do 891 *p++ = '0' + n % 10; 892 while (n /= 10); 893 894 do 895 *dst++ = *--p; 896 while (p != buff); 897 898 return dst; 899 } 900 901 /* Write out a signed decimal integer. */ 902 static char * 903 writeSignedDecimal (char *dst, int value) 904 { 905 if (value < 0) { 906 *dst++ = '-'; 907 dst = writeUnsignedDecimal(dst, -(unsigned) value); 908 } else 909 dst = writeUnsignedDecimal(dst, value); 910 911 return dst; 912 } 913 914 namespace detail { 915 /* Constructors. */ 916 void IEEEFloat::initialize(const fltSemantics *ourSemantics) { 917 unsigned int count; 918 919 semantics = ourSemantics; 920 count = partCount(); 921 if (count > 1) 922 significand.parts = new integerPart[count]; 923 } 924 925 void IEEEFloat::freeSignificand() { 926 if (needsCleanup()) 927 delete [] significand.parts; 928 } 929 930 void IEEEFloat::assign(const IEEEFloat &rhs) { 931 assert(semantics == rhs.semantics); 932 933 sign = rhs.sign; 934 category = rhs.category; 935 exponent = rhs.exponent; 936 if (isFiniteNonZero() || category == fcNaN) 937 copySignificand(rhs); 938 } 939 940 void IEEEFloat::copySignificand(const IEEEFloat &rhs) { 941 assert(isFiniteNonZero() || category == fcNaN); 942 assert(rhs.partCount() >= partCount()); 943 944 APInt::tcAssign(significandParts(), rhs.significandParts(), 945 partCount()); 946 } 947 948 /* Make this number a NaN, with an arbitrary but deterministic value 949 for the significand. If double or longer, this is a signalling NaN, 950 which may not be ideal. If float, this is QNaN(0). */ 951 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { 952 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 953 llvm_unreachable("This floating point format does not support NaN"); 954 955 if (Negative && !semantics->hasSignedRepr) 956 llvm_unreachable( 957 "This floating point format does not support signed values"); 958 959 category = fcNaN; 960 sign = Negative; 961 exponent = exponentNaN(); 962 963 integerPart *significand = significandParts(); 964 unsigned numParts = partCount(); 965 966 APInt fill_storage; 967 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 968 // Finite-only types do not distinguish signalling and quiet NaN, so 969 // make them all signalling. 970 SNaN = false; 971 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 972 sign = true; 973 fill_storage = APInt::getZero(semantics->precision - 1); 974 } else { 975 fill_storage = APInt::getAllOnes(semantics->precision - 1); 976 } 977 fill = &fill_storage; 978 } 979 980 // Set the significand bits to the fill. 981 if (!fill || fill->getNumWords() < numParts) 982 APInt::tcSet(significand, 0, numParts); 983 if (fill) { 984 APInt::tcAssign(significand, fill->getRawData(), 985 std::min(fill->getNumWords(), numParts)); 986 987 // Zero out the excess bits of the significand. 988 unsigned bitsToPreserve = semantics->precision - 1; 989 unsigned part = bitsToPreserve / 64; 990 bitsToPreserve %= 64; 991 significand[part] &= ((1ULL << bitsToPreserve) - 1); 992 for (part++; part != numParts; ++part) 993 significand[part] = 0; 994 } 995 996 unsigned QNaNBit = 997 (semantics->precision >= 2) ? (semantics->precision - 2) : 0; 998 999 if (SNaN) { 1000 // We always have to clear the QNaN bit to make it an SNaN. 1001 APInt::tcClearBit(significand, QNaNBit); 1002 1003 // If there are no bits set in the payload, we have to set 1004 // *something* to make it a NaN instead of an infinity; 1005 // conventionally, this is the next bit down from the QNaN bit. 1006 if (APInt::tcIsZero(significand, numParts)) 1007 APInt::tcSetBit(significand, QNaNBit - 1); 1008 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 1009 // The only NaN is a quiet NaN, and it has no bits sets in the significand. 1010 // Do nothing. 1011 } else { 1012 // We always have to set the QNaN bit to make it a QNaN. 1013 APInt::tcSetBit(significand, QNaNBit); 1014 } 1015 1016 // For x87 extended precision, we want to make a NaN, not a 1017 // pseudo-NaN. Maybe we should expose the ability to make 1018 // pseudo-NaNs? 1019 if (semantics == &semX87DoubleExtended) 1020 APInt::tcSetBit(significand, QNaNBit + 1); 1021 } 1022 1023 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) { 1024 if (this != &rhs) { 1025 if (semantics != rhs.semantics) { 1026 freeSignificand(); 1027 initialize(rhs.semantics); 1028 } 1029 assign(rhs); 1030 } 1031 1032 return *this; 1033 } 1034 1035 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) { 1036 freeSignificand(); 1037 1038 semantics = rhs.semantics; 1039 significand = rhs.significand; 1040 exponent = rhs.exponent; 1041 category = rhs.category; 1042 sign = rhs.sign; 1043 1044 rhs.semantics = &semBogus; 1045 return *this; 1046 } 1047 1048 bool IEEEFloat::isDenormal() const { 1049 return isFiniteNonZero() && (exponent == semantics->minExponent) && 1050 (APInt::tcExtractBit(significandParts(), 1051 semantics->precision - 1) == 0); 1052 } 1053 1054 bool IEEEFloat::isSmallest() const { 1055 // The smallest number by magnitude in our format will be the smallest 1056 // denormal, i.e. the floating point number with exponent being minimum 1057 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 1058 return isFiniteNonZero() && exponent == semantics->minExponent && 1059 significandMSB() == 0; 1060 } 1061 1062 bool IEEEFloat::isSmallestNormalized() const { 1063 return getCategory() == fcNormal && exponent == semantics->minExponent && 1064 isSignificandAllZerosExceptMSB(); 1065 } 1066 1067 unsigned int IEEEFloat::getNumHighBits() const { 1068 const unsigned int PartCount = partCountForBits(semantics->precision); 1069 const unsigned int Bits = PartCount * integerPartWidth; 1070 1071 // Compute how many bits are used in the final word. 1072 // When precision is just 1, it represents the 'Pth' 1073 // Precision bit and not the actual significand bit. 1074 const unsigned int NumHighBits = (semantics->precision > 1) 1075 ? (Bits - semantics->precision + 1) 1076 : (Bits - semantics->precision); 1077 return NumHighBits; 1078 } 1079 1080 bool IEEEFloat::isSignificandAllOnes() const { 1081 // Test if the significand excluding the integral bit is all ones. This allows 1082 // us to test for binade boundaries. 1083 const integerPart *Parts = significandParts(); 1084 const unsigned PartCount = partCountForBits(semantics->precision); 1085 for (unsigned i = 0; i < PartCount - 1; i++) 1086 if (~Parts[i]) 1087 return false; 1088 1089 // Set the unused high bits to all ones when we compare. 1090 const unsigned NumHighBits = getNumHighBits(); 1091 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1092 "Can not have more high bits to fill than integerPartWidth"); 1093 const integerPart HighBitFill = 1094 ~integerPart(0) << (integerPartWidth - NumHighBits); 1095 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill))) 1096 return false; 1097 1098 return true; 1099 } 1100 1101 bool IEEEFloat::isSignificandAllOnesExceptLSB() const { 1102 // Test if the significand excluding the integral bit is all ones except for 1103 // the least significant bit. 1104 const integerPart *Parts = significandParts(); 1105 1106 if (Parts[0] & 1) 1107 return false; 1108 1109 const unsigned PartCount = partCountForBits(semantics->precision); 1110 for (unsigned i = 0; i < PartCount - 1; i++) { 1111 if (~Parts[i] & ~unsigned{!i}) 1112 return false; 1113 } 1114 1115 // Set the unused high bits to all ones when we compare. 1116 const unsigned NumHighBits = getNumHighBits(); 1117 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 && 1118 "Can not have more high bits to fill than integerPartWidth"); 1119 const integerPart HighBitFill = ~integerPart(0) 1120 << (integerPartWidth - NumHighBits); 1121 if (~(Parts[PartCount - 1] | HighBitFill | 0x1)) 1122 return false; 1123 1124 return true; 1125 } 1126 1127 bool IEEEFloat::isSignificandAllZeros() const { 1128 // Test if the significand excluding the integral bit is all zeros. This 1129 // allows us to test for binade boundaries. 1130 const integerPart *Parts = significandParts(); 1131 const unsigned PartCount = partCountForBits(semantics->precision); 1132 1133 for (unsigned i = 0; i < PartCount - 1; i++) 1134 if (Parts[i]) 1135 return false; 1136 1137 // Compute how many bits are used in the final word. 1138 const unsigned NumHighBits = getNumHighBits(); 1139 assert(NumHighBits < integerPartWidth && "Can not have more high bits to " 1140 "clear than integerPartWidth"); 1141 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 1142 1143 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask)) 1144 return false; 1145 1146 return true; 1147 } 1148 1149 bool IEEEFloat::isSignificandAllZerosExceptMSB() const { 1150 const integerPart *Parts = significandParts(); 1151 const unsigned PartCount = partCountForBits(semantics->precision); 1152 1153 for (unsigned i = 0; i < PartCount - 1; i++) { 1154 if (Parts[i]) 1155 return false; 1156 } 1157 1158 const unsigned NumHighBits = getNumHighBits(); 1159 const integerPart MSBMask = integerPart(1) 1160 << (integerPartWidth - NumHighBits); 1161 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask)); 1162 } 1163 1164 bool IEEEFloat::isLargest() const { 1165 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent; 1166 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1167 semantics->nanEncoding == fltNanEncoding::AllOnes) { 1168 // The largest number by magnitude in our format will be the floating point 1169 // number with maximum exponent and with significand that is all ones except 1170 // the LSB. 1171 return (IsMaxExp && APFloat::hasSignificand(*semantics)) 1172 ? isSignificandAllOnesExceptLSB() 1173 : IsMaxExp; 1174 } else { 1175 // The largest number by magnitude in our format will be the floating point 1176 // number with maximum exponent and with significand that is all ones. 1177 return IsMaxExp && isSignificandAllOnes(); 1178 } 1179 } 1180 1181 bool IEEEFloat::isInteger() const { 1182 // This could be made more efficient; I'm going for obviously correct. 1183 if (!isFinite()) return false; 1184 IEEEFloat truncated = *this; 1185 truncated.roundToIntegral(rmTowardZero); 1186 return compare(truncated) == cmpEqual; 1187 } 1188 1189 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const { 1190 if (this == &rhs) 1191 return true; 1192 if (semantics != rhs.semantics || 1193 category != rhs.category || 1194 sign != rhs.sign) 1195 return false; 1196 if (category==fcZero || category==fcInfinity) 1197 return true; 1198 1199 if (isFiniteNonZero() && exponent != rhs.exponent) 1200 return false; 1201 1202 return std::equal(significandParts(), significandParts() + partCount(), 1203 rhs.significandParts()); 1204 } 1205 1206 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) { 1207 initialize(&ourSemantics); 1208 sign = 0; 1209 category = fcNormal; 1210 zeroSignificand(); 1211 exponent = ourSemantics.precision - 1; 1212 significandParts()[0] = value; 1213 normalize(rmNearestTiesToEven, lfExactlyZero); 1214 } 1215 1216 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) { 1217 initialize(&ourSemantics); 1218 // The Float8E8MOFNU format does not have a representation 1219 // for zero. So, use the closest representation instead. 1220 // Moreover, the all-zero encoding represents a valid 1221 // normal value (which is the smallestNormalized here). 1222 // Hence, we call makeSmallestNormalized (where category is 1223 // 'fcNormal') instead of makeZero (where category is 'fcZero'). 1224 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false); 1225 } 1226 1227 // Delegate to the previous constructor, because later copy constructor may 1228 // actually inspects category, which can't be garbage. 1229 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag) 1230 : IEEEFloat(ourSemantics) {} 1231 1232 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) { 1233 initialize(rhs.semantics); 1234 assign(rhs); 1235 } 1236 1237 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) { 1238 *this = std::move(rhs); 1239 } 1240 1241 IEEEFloat::~IEEEFloat() { freeSignificand(); } 1242 1243 unsigned int IEEEFloat::partCount() const { 1244 return partCountForBits(semantics->precision + 1); 1245 } 1246 1247 const APFloat::integerPart *IEEEFloat::significandParts() const { 1248 return const_cast<IEEEFloat *>(this)->significandParts(); 1249 } 1250 1251 APFloat::integerPart *IEEEFloat::significandParts() { 1252 if (partCount() > 1) 1253 return significand.parts; 1254 else 1255 return &significand.part; 1256 } 1257 1258 void IEEEFloat::zeroSignificand() { 1259 APInt::tcSet(significandParts(), 0, partCount()); 1260 } 1261 1262 /* Increment an fcNormal floating point number's significand. */ 1263 void IEEEFloat::incrementSignificand() { 1264 integerPart carry; 1265 1266 carry = APInt::tcIncrement(significandParts(), partCount()); 1267 1268 /* Our callers should never cause us to overflow. */ 1269 assert(carry == 0); 1270 (void)carry; 1271 } 1272 1273 /* Add the significand of the RHS. Returns the carry flag. */ 1274 APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) { 1275 integerPart *parts; 1276 1277 parts = significandParts(); 1278 1279 assert(semantics == rhs.semantics); 1280 assert(exponent == rhs.exponent); 1281 1282 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 1283 } 1284 1285 /* Subtract the significand of the RHS with a borrow flag. Returns 1286 the borrow flag. */ 1287 APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs, 1288 integerPart borrow) { 1289 integerPart *parts; 1290 1291 parts = significandParts(); 1292 1293 assert(semantics == rhs.semantics); 1294 assert(exponent == rhs.exponent); 1295 1296 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 1297 partCount()); 1298 } 1299 1300 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 1301 on to the full-precision result of the multiplication. Returns the 1302 lost fraction. */ 1303 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs, 1304 IEEEFloat addend, 1305 bool ignoreAddend) { 1306 unsigned int omsb; // One, not zero, based MSB. 1307 unsigned int partsCount, newPartsCount, precision; 1308 integerPart *lhsSignificand; 1309 integerPart scratch[4]; 1310 integerPart *fullSignificand; 1311 lostFraction lost_fraction; 1312 bool ignored; 1313 1314 assert(semantics == rhs.semantics); 1315 1316 precision = semantics->precision; 1317 1318 // Allocate space for twice as many bits as the original significand, plus one 1319 // extra bit for the addition to overflow into. 1320 newPartsCount = partCountForBits(precision * 2 + 1); 1321 1322 if (newPartsCount > 4) 1323 fullSignificand = new integerPart[newPartsCount]; 1324 else 1325 fullSignificand = scratch; 1326 1327 lhsSignificand = significandParts(); 1328 partsCount = partCount(); 1329 1330 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 1331 rhs.significandParts(), partsCount, partsCount); 1332 1333 lost_fraction = lfExactlyZero; 1334 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1335 exponent += rhs.exponent; 1336 1337 // Assume the operands involved in the multiplication are single-precision 1338 // FP, and the two multiplicants are: 1339 // *this = a23 . a22 ... a0 * 2^e1 1340 // rhs = b23 . b22 ... b0 * 2^e2 1341 // the result of multiplication is: 1342 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 1343 // Note that there are three significant bits at the left-hand side of the 1344 // radix point: two for the multiplication, and an overflow bit for the 1345 // addition (that will always be zero at this point). Move the radix point 1346 // toward left by two bits, and adjust exponent accordingly. 1347 exponent += 2; 1348 1349 if (!ignoreAddend && addend.isNonZero()) { 1350 // The intermediate result of the multiplication has "2 * precision" 1351 // signicant bit; adjust the addend to be consistent with mul result. 1352 // 1353 Significand savedSignificand = significand; 1354 const fltSemantics *savedSemantics = semantics; 1355 fltSemantics extendedSemantics; 1356 opStatus status; 1357 unsigned int extendedPrecision; 1358 1359 // Normalize our MSB to one below the top bit to allow for overflow. 1360 extendedPrecision = 2 * precision + 1; 1361 if (omsb != extendedPrecision - 1) { 1362 assert(extendedPrecision > omsb); 1363 APInt::tcShiftLeft(fullSignificand, newPartsCount, 1364 (extendedPrecision - 1) - omsb); 1365 exponent -= (extendedPrecision - 1) - omsb; 1366 } 1367 1368 /* Create new semantics. */ 1369 extendedSemantics = *semantics; 1370 extendedSemantics.precision = extendedPrecision; 1371 1372 if (newPartsCount == 1) 1373 significand.part = fullSignificand[0]; 1374 else 1375 significand.parts = fullSignificand; 1376 semantics = &extendedSemantics; 1377 1378 // Make a copy so we can convert it to the extended semantics. 1379 // Note that we cannot convert the addend directly, as the extendedSemantics 1380 // is a local variable (which we take a reference to). 1381 IEEEFloat extendedAddend(addend); 1382 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero, 1383 &ignored); 1384 assert(status == APFloat::opOK); 1385 (void)status; 1386 1387 // Shift the significand of the addend right by one bit. This guarantees 1388 // that the high bit of the significand is zero (same as fullSignificand), 1389 // so the addition will overflow (if it does overflow at all) into the top bit. 1390 lost_fraction = extendedAddend.shiftSignificandRight(1); 1391 assert(lost_fraction == lfExactlyZero && 1392 "Lost precision while shifting addend for fused-multiply-add."); 1393 1394 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1395 1396 /* Restore our state. */ 1397 if (newPartsCount == 1) 1398 fullSignificand[0] = significand.part; 1399 significand = savedSignificand; 1400 semantics = savedSemantics; 1401 1402 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1403 } 1404 1405 // Convert the result having "2 * precision" significant-bits back to the one 1406 // having "precision" significant-bits. First, move the radix point from 1407 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1408 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1409 exponent -= precision + 1; 1410 1411 // In case MSB resides at the left-hand side of radix point, shift the 1412 // mantissa right by some amount to make sure the MSB reside right before 1413 // the radix point (i.e. "MSB . rest-significant-bits"). 1414 // 1415 // Note that the result is not normalized when "omsb < precision". So, the 1416 // caller needs to call IEEEFloat::normalize() if normalized value is 1417 // expected. 1418 if (omsb > precision) { 1419 unsigned int bits, significantParts; 1420 lostFraction lf; 1421 1422 bits = omsb - precision; 1423 significantParts = partCountForBits(omsb); 1424 lf = shiftRight(fullSignificand, significantParts, bits); 1425 lost_fraction = combineLostFractions(lf, lost_fraction); 1426 exponent += bits; 1427 } 1428 1429 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1430 1431 if (newPartsCount > 4) 1432 delete [] fullSignificand; 1433 1434 return lost_fraction; 1435 } 1436 1437 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) { 1438 // When the given semantics has zero, the addend here is a zero. 1439 // i.e . it belongs to the 'fcZero' category. 1440 // But when the semantics does not support zero, we need to 1441 // explicitly convey that this addend should be ignored 1442 // for multiplication. 1443 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero); 1444 } 1445 1446 /* Multiply the significands of LHS and RHS to DST. */ 1447 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) { 1448 unsigned int bit, i, partsCount; 1449 const integerPart *rhsSignificand; 1450 integerPart *lhsSignificand, *dividend, *divisor; 1451 integerPart scratch[4]; 1452 lostFraction lost_fraction; 1453 1454 assert(semantics == rhs.semantics); 1455 1456 lhsSignificand = significandParts(); 1457 rhsSignificand = rhs.significandParts(); 1458 partsCount = partCount(); 1459 1460 if (partsCount > 2) 1461 dividend = new integerPart[partsCount * 2]; 1462 else 1463 dividend = scratch; 1464 1465 divisor = dividend + partsCount; 1466 1467 /* Copy the dividend and divisor as they will be modified in-place. */ 1468 for (i = 0; i < partsCount; i++) { 1469 dividend[i] = lhsSignificand[i]; 1470 divisor[i] = rhsSignificand[i]; 1471 lhsSignificand[i] = 0; 1472 } 1473 1474 exponent -= rhs.exponent; 1475 1476 unsigned int precision = semantics->precision; 1477 1478 /* Normalize the divisor. */ 1479 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1480 if (bit) { 1481 exponent += bit; 1482 APInt::tcShiftLeft(divisor, partsCount, bit); 1483 } 1484 1485 /* Normalize the dividend. */ 1486 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1487 if (bit) { 1488 exponent -= bit; 1489 APInt::tcShiftLeft(dividend, partsCount, bit); 1490 } 1491 1492 /* Ensure the dividend >= divisor initially for the loop below. 1493 Incidentally, this means that the division loop below is 1494 guaranteed to set the integer bit to one. */ 1495 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1496 exponent--; 1497 APInt::tcShiftLeft(dividend, partsCount, 1); 1498 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1499 } 1500 1501 /* Long division. */ 1502 for (bit = precision; bit; bit -= 1) { 1503 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1504 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1505 APInt::tcSetBit(lhsSignificand, bit - 1); 1506 } 1507 1508 APInt::tcShiftLeft(dividend, partsCount, 1); 1509 } 1510 1511 /* Figure out the lost fraction. */ 1512 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1513 1514 if (cmp > 0) 1515 lost_fraction = lfMoreThanHalf; 1516 else if (cmp == 0) 1517 lost_fraction = lfExactlyHalf; 1518 else if (APInt::tcIsZero(dividend, partsCount)) 1519 lost_fraction = lfExactlyZero; 1520 else 1521 lost_fraction = lfLessThanHalf; 1522 1523 if (partsCount > 2) 1524 delete [] dividend; 1525 1526 return lost_fraction; 1527 } 1528 1529 unsigned int IEEEFloat::significandMSB() const { 1530 return APInt::tcMSB(significandParts(), partCount()); 1531 } 1532 1533 unsigned int IEEEFloat::significandLSB() const { 1534 return APInt::tcLSB(significandParts(), partCount()); 1535 } 1536 1537 /* Note that a zero result is NOT normalized to fcZero. */ 1538 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) { 1539 /* Our exponent should not overflow. */ 1540 assert((ExponentType) (exponent + bits) >= exponent); 1541 1542 exponent += bits; 1543 1544 return shiftRight(significandParts(), partCount(), bits); 1545 } 1546 1547 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1548 void IEEEFloat::shiftSignificandLeft(unsigned int bits) { 1549 assert(bits < semantics->precision || 1550 (semantics->precision == 1 && bits <= 1)); 1551 1552 if (bits) { 1553 unsigned int partsCount = partCount(); 1554 1555 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1556 exponent -= bits; 1557 1558 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1559 } 1560 } 1561 1562 APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const { 1563 int compare; 1564 1565 assert(semantics == rhs.semantics); 1566 assert(isFiniteNonZero()); 1567 assert(rhs.isFiniteNonZero()); 1568 1569 compare = exponent - rhs.exponent; 1570 1571 /* If exponents are equal, do an unsigned bignum comparison of the 1572 significands. */ 1573 if (compare == 0) 1574 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1575 partCount()); 1576 1577 if (compare > 0) 1578 return cmpGreaterThan; 1579 else if (compare < 0) 1580 return cmpLessThan; 1581 else 1582 return cmpEqual; 1583 } 1584 1585 /* Set the least significant BITS bits of a bignum, clear the 1586 rest. */ 1587 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, 1588 unsigned bits) { 1589 unsigned i = 0; 1590 while (bits > APInt::APINT_BITS_PER_WORD) { 1591 dst[i++] = ~(APInt::WordType)0; 1592 bits -= APInt::APINT_BITS_PER_WORD; 1593 } 1594 1595 if (bits) 1596 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits); 1597 1598 while (i < parts) 1599 dst[i++] = 0; 1600 } 1601 1602 /* Handle overflow. Sign is preserved. We either become infinity or 1603 the largest finite number. */ 1604 APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { 1605 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { 1606 /* Infinity? */ 1607 if (rounding_mode == rmNearestTiesToEven || 1608 rounding_mode == rmNearestTiesToAway || 1609 (rounding_mode == rmTowardPositive && !sign) || 1610 (rounding_mode == rmTowardNegative && sign)) { 1611 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 1612 makeNaN(false, sign); 1613 else 1614 category = fcInfinity; 1615 return static_cast<opStatus>(opOverflow | opInexact); 1616 } 1617 } 1618 1619 /* Otherwise we become the largest finite number. */ 1620 category = fcNormal; 1621 exponent = semantics->maxExponent; 1622 tcSetLeastSignificantBits(significandParts(), partCount(), 1623 semantics->precision); 1624 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1625 semantics->nanEncoding == fltNanEncoding::AllOnes) 1626 APInt::tcClearBit(significandParts(), 0); 1627 1628 return opInexact; 1629 } 1630 1631 /* Returns TRUE if, when truncating the current number, with BIT the 1632 new LSB, with the given lost fraction and rounding mode, the result 1633 would need to be rounded away from zero (i.e., by increasing the 1634 signficand). This routine must work for fcZero of both signs, and 1635 fcNormal numbers. */ 1636 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode, 1637 lostFraction lost_fraction, 1638 unsigned int bit) const { 1639 /* NaNs and infinities should not have lost fractions. */ 1640 assert(isFiniteNonZero() || category == fcZero); 1641 1642 /* Current callers never pass this so we don't handle it. */ 1643 assert(lost_fraction != lfExactlyZero); 1644 1645 switch (rounding_mode) { 1646 case rmNearestTiesToAway: 1647 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1648 1649 case rmNearestTiesToEven: 1650 if (lost_fraction == lfMoreThanHalf) 1651 return true; 1652 1653 /* Our zeroes don't have a significand to test. */ 1654 if (lost_fraction == lfExactlyHalf && category != fcZero) 1655 return APInt::tcExtractBit(significandParts(), bit); 1656 1657 return false; 1658 1659 case rmTowardZero: 1660 return false; 1661 1662 case rmTowardPositive: 1663 return !sign; 1664 1665 case rmTowardNegative: 1666 return sign; 1667 1668 default: 1669 break; 1670 } 1671 llvm_unreachable("Invalid rounding mode found"); 1672 } 1673 1674 APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, 1675 lostFraction lost_fraction) { 1676 unsigned int omsb; /* One, not zero, based MSB. */ 1677 int exponentChange; 1678 1679 if (!isFiniteNonZero()) 1680 return opOK; 1681 1682 /* Before rounding normalize the exponent of fcNormal numbers. */ 1683 omsb = significandMSB() + 1; 1684 1685 if (omsb) { 1686 /* OMSB is numbered from 1. We want to place it in the integer 1687 bit numbered PRECISION if possible, with a compensating change in 1688 the exponent. */ 1689 exponentChange = omsb - semantics->precision; 1690 1691 /* If the resulting exponent is too high, overflow according to 1692 the rounding mode. */ 1693 if (exponent + exponentChange > semantics->maxExponent) 1694 return handleOverflow(rounding_mode); 1695 1696 /* Subnormal numbers have exponent minExponent, and their MSB 1697 is forced based on that. */ 1698 if (exponent + exponentChange < semantics->minExponent) 1699 exponentChange = semantics->minExponent - exponent; 1700 1701 /* Shifting left is easy as we don't lose precision. */ 1702 if (exponentChange < 0) { 1703 assert(lost_fraction == lfExactlyZero); 1704 1705 shiftSignificandLeft(-exponentChange); 1706 1707 return opOK; 1708 } 1709 1710 if (exponentChange > 0) { 1711 lostFraction lf; 1712 1713 /* Shift right and capture any new lost fraction. */ 1714 lf = shiftSignificandRight(exponentChange); 1715 1716 lost_fraction = combineLostFractions(lf, lost_fraction); 1717 1718 /* Keep OMSB up-to-date. */ 1719 if (omsb > (unsigned) exponentChange) 1720 omsb -= exponentChange; 1721 else 1722 omsb = 0; 1723 } 1724 } 1725 1726 // The all-ones values is an overflow if NaN is all ones. If NaN is 1727 // represented by negative zero, then it is a valid finite value. 1728 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1729 semantics->nanEncoding == fltNanEncoding::AllOnes && 1730 exponent == semantics->maxExponent && isSignificandAllOnes()) 1731 return handleOverflow(rounding_mode); 1732 1733 /* Now round the number according to rounding_mode given the lost 1734 fraction. */ 1735 1736 /* As specified in IEEE 754, since we do not trap we do not report 1737 underflow for exact results. */ 1738 if (lost_fraction == lfExactlyZero) { 1739 /* Canonicalize zeroes. */ 1740 if (omsb == 0) { 1741 category = fcZero; 1742 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1743 sign = false; 1744 if (!semantics->hasZero) 1745 makeSmallestNormalized(false); 1746 } 1747 1748 return opOK; 1749 } 1750 1751 /* Increment the significand if we're rounding away from zero. */ 1752 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1753 if (omsb == 0) 1754 exponent = semantics->minExponent; 1755 1756 incrementSignificand(); 1757 omsb = significandMSB() + 1; 1758 1759 /* Did the significand increment overflow? */ 1760 if (omsb == (unsigned) semantics->precision + 1) { 1761 /* Renormalize by incrementing the exponent and shifting our 1762 significand right one. However if we already have the 1763 maximum exponent we overflow to infinity. */ 1764 if (exponent == semantics->maxExponent) 1765 // Invoke overflow handling with a rounding mode that will guarantee 1766 // that the result gets turned into the correct infinity representation. 1767 // This is needed instead of just setting the category to infinity to 1768 // account for 8-bit floating point types that have no inf, only NaN. 1769 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); 1770 1771 shiftSignificandRight(1); 1772 1773 return opInexact; 1774 } 1775 1776 // The all-ones values is an overflow if NaN is all ones. If NaN is 1777 // represented by negative zero, then it is a valid finite value. 1778 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 1779 semantics->nanEncoding == fltNanEncoding::AllOnes && 1780 exponent == semantics->maxExponent && isSignificandAllOnes()) 1781 return handleOverflow(rounding_mode); 1782 } 1783 1784 /* The normal case - we were and are not denormal, and any 1785 significand increment above didn't overflow. */ 1786 if (omsb == semantics->precision) 1787 return opInexact; 1788 1789 /* We have a non-zero denormal. */ 1790 assert(omsb < semantics->precision); 1791 1792 /* Canonicalize zeroes. */ 1793 if (omsb == 0) { 1794 category = fcZero; 1795 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 1796 sign = false; 1797 // This condition handles the case where the semantics 1798 // does not have zero but uses the all-zero encoding 1799 // to represent the smallest normal value. 1800 if (!semantics->hasZero) 1801 makeSmallestNormalized(false); 1802 } 1803 1804 /* The fcZero case is a denormal that underflowed to zero. */ 1805 return (opStatus) (opUnderflow | opInexact); 1806 } 1807 1808 APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs, 1809 bool subtract) { 1810 switch (PackCategoriesIntoKey(category, rhs.category)) { 1811 default: 1812 llvm_unreachable(nullptr); 1813 1814 case PackCategoriesIntoKey(fcZero, fcNaN): 1815 case PackCategoriesIntoKey(fcNormal, fcNaN): 1816 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1817 assign(rhs); 1818 [[fallthrough]]; 1819 case PackCategoriesIntoKey(fcNaN, fcZero): 1820 case PackCategoriesIntoKey(fcNaN, fcNormal): 1821 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1822 case PackCategoriesIntoKey(fcNaN, fcNaN): 1823 if (isSignaling()) { 1824 makeQuiet(); 1825 return opInvalidOp; 1826 } 1827 return rhs.isSignaling() ? opInvalidOp : opOK; 1828 1829 case PackCategoriesIntoKey(fcNormal, fcZero): 1830 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1831 case PackCategoriesIntoKey(fcInfinity, fcZero): 1832 return opOK; 1833 1834 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1835 case PackCategoriesIntoKey(fcZero, fcInfinity): 1836 category = fcInfinity; 1837 sign = rhs.sign ^ subtract; 1838 return opOK; 1839 1840 case PackCategoriesIntoKey(fcZero, fcNormal): 1841 assign(rhs); 1842 sign = rhs.sign ^ subtract; 1843 return opOK; 1844 1845 case PackCategoriesIntoKey(fcZero, fcZero): 1846 /* Sign depends on rounding mode; handled by caller. */ 1847 return opOK; 1848 1849 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1850 /* Differently signed infinities can only be validly 1851 subtracted. */ 1852 if (((sign ^ rhs.sign)!=0) != subtract) { 1853 makeNaN(); 1854 return opInvalidOp; 1855 } 1856 1857 return opOK; 1858 1859 case PackCategoriesIntoKey(fcNormal, fcNormal): 1860 return opDivByZero; 1861 } 1862 } 1863 1864 /* Add or subtract two normal numbers. */ 1865 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs, 1866 bool subtract) { 1867 integerPart carry; 1868 lostFraction lost_fraction; 1869 int bits; 1870 1871 /* Determine if the operation on the absolute values is effectively 1872 an addition or subtraction. */ 1873 subtract ^= static_cast<bool>(sign ^ rhs.sign); 1874 1875 /* Are we bigger exponent-wise than the RHS? */ 1876 bits = exponent - rhs.exponent; 1877 1878 /* Subtraction is more subtle than one might naively expect. */ 1879 if (subtract) { 1880 if ((bits < 0) && !semantics->hasSignedRepr) 1881 llvm_unreachable( 1882 "This floating point format does not support signed values"); 1883 1884 IEEEFloat temp_rhs(rhs); 1885 1886 if (bits == 0) 1887 lost_fraction = lfExactlyZero; 1888 else if (bits > 0) { 1889 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1890 shiftSignificandLeft(1); 1891 } else { 1892 lost_fraction = shiftSignificandRight(-bits - 1); 1893 temp_rhs.shiftSignificandLeft(1); 1894 } 1895 1896 // Should we reverse the subtraction. 1897 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) { 1898 carry = temp_rhs.subtractSignificand 1899 (*this, lost_fraction != lfExactlyZero); 1900 copySignificand(temp_rhs); 1901 sign = !sign; 1902 } else { 1903 carry = subtractSignificand 1904 (temp_rhs, lost_fraction != lfExactlyZero); 1905 } 1906 1907 /* Invert the lost fraction - it was on the RHS and 1908 subtracted. */ 1909 if (lost_fraction == lfLessThanHalf) 1910 lost_fraction = lfMoreThanHalf; 1911 else if (lost_fraction == lfMoreThanHalf) 1912 lost_fraction = lfLessThanHalf; 1913 1914 /* The code above is intended to ensure that no borrow is 1915 necessary. */ 1916 assert(!carry); 1917 (void)carry; 1918 } else { 1919 if (bits > 0) { 1920 IEEEFloat temp_rhs(rhs); 1921 1922 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1923 carry = addSignificand(temp_rhs); 1924 } else { 1925 lost_fraction = shiftSignificandRight(-bits); 1926 carry = addSignificand(rhs); 1927 } 1928 1929 /* We have a guard bit; generating a carry cannot happen. */ 1930 assert(!carry); 1931 (void)carry; 1932 } 1933 1934 return lost_fraction; 1935 } 1936 1937 APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) { 1938 switch (PackCategoriesIntoKey(category, rhs.category)) { 1939 default: 1940 llvm_unreachable(nullptr); 1941 1942 case PackCategoriesIntoKey(fcZero, fcNaN): 1943 case PackCategoriesIntoKey(fcNormal, fcNaN): 1944 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1945 assign(rhs); 1946 sign = false; 1947 [[fallthrough]]; 1948 case PackCategoriesIntoKey(fcNaN, fcZero): 1949 case PackCategoriesIntoKey(fcNaN, fcNormal): 1950 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1951 case PackCategoriesIntoKey(fcNaN, fcNaN): 1952 sign ^= rhs.sign; // restore the original sign 1953 if (isSignaling()) { 1954 makeQuiet(); 1955 return opInvalidOp; 1956 } 1957 return rhs.isSignaling() ? opInvalidOp : opOK; 1958 1959 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1960 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1961 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1962 category = fcInfinity; 1963 return opOK; 1964 1965 case PackCategoriesIntoKey(fcZero, fcNormal): 1966 case PackCategoriesIntoKey(fcNormal, fcZero): 1967 case PackCategoriesIntoKey(fcZero, fcZero): 1968 category = fcZero; 1969 return opOK; 1970 1971 case PackCategoriesIntoKey(fcZero, fcInfinity): 1972 case PackCategoriesIntoKey(fcInfinity, fcZero): 1973 makeNaN(); 1974 return opInvalidOp; 1975 1976 case PackCategoriesIntoKey(fcNormal, fcNormal): 1977 return opOK; 1978 } 1979 } 1980 1981 APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { 1982 switch (PackCategoriesIntoKey(category, rhs.category)) { 1983 default: 1984 llvm_unreachable(nullptr); 1985 1986 case PackCategoriesIntoKey(fcZero, fcNaN): 1987 case PackCategoriesIntoKey(fcNormal, fcNaN): 1988 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1989 assign(rhs); 1990 sign = false; 1991 [[fallthrough]]; 1992 case PackCategoriesIntoKey(fcNaN, fcZero): 1993 case PackCategoriesIntoKey(fcNaN, fcNormal): 1994 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1995 case PackCategoriesIntoKey(fcNaN, fcNaN): 1996 sign ^= rhs.sign; // restore the original sign 1997 if (isSignaling()) { 1998 makeQuiet(); 1999 return opInvalidOp; 2000 } 2001 return rhs.isSignaling() ? opInvalidOp : opOK; 2002 2003 case PackCategoriesIntoKey(fcInfinity, fcZero): 2004 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2005 case PackCategoriesIntoKey(fcZero, fcInfinity): 2006 case PackCategoriesIntoKey(fcZero, fcNormal): 2007 return opOK; 2008 2009 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2010 category = fcZero; 2011 return opOK; 2012 2013 case PackCategoriesIntoKey(fcNormal, fcZero): 2014 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) 2015 makeNaN(false, sign); 2016 else 2017 category = fcInfinity; 2018 return opDivByZero; 2019 2020 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2021 case PackCategoriesIntoKey(fcZero, fcZero): 2022 makeNaN(); 2023 return opInvalidOp; 2024 2025 case PackCategoriesIntoKey(fcNormal, fcNormal): 2026 return opOK; 2027 } 2028 } 2029 2030 APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) { 2031 switch (PackCategoriesIntoKey(category, rhs.category)) { 2032 default: 2033 llvm_unreachable(nullptr); 2034 2035 case PackCategoriesIntoKey(fcZero, fcNaN): 2036 case PackCategoriesIntoKey(fcNormal, fcNaN): 2037 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2038 assign(rhs); 2039 [[fallthrough]]; 2040 case PackCategoriesIntoKey(fcNaN, fcZero): 2041 case PackCategoriesIntoKey(fcNaN, fcNormal): 2042 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2043 case PackCategoriesIntoKey(fcNaN, fcNaN): 2044 if (isSignaling()) { 2045 makeQuiet(); 2046 return opInvalidOp; 2047 } 2048 return rhs.isSignaling() ? opInvalidOp : opOK; 2049 2050 case PackCategoriesIntoKey(fcZero, fcInfinity): 2051 case PackCategoriesIntoKey(fcZero, fcNormal): 2052 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2053 return opOK; 2054 2055 case PackCategoriesIntoKey(fcNormal, fcZero): 2056 case PackCategoriesIntoKey(fcInfinity, fcZero): 2057 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2058 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2059 case PackCategoriesIntoKey(fcZero, fcZero): 2060 makeNaN(); 2061 return opInvalidOp; 2062 2063 case PackCategoriesIntoKey(fcNormal, fcNormal): 2064 return opOK; 2065 } 2066 } 2067 2068 APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { 2069 switch (PackCategoriesIntoKey(category, rhs.category)) { 2070 default: 2071 llvm_unreachable(nullptr); 2072 2073 case PackCategoriesIntoKey(fcZero, fcNaN): 2074 case PackCategoriesIntoKey(fcNormal, fcNaN): 2075 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2076 assign(rhs); 2077 [[fallthrough]]; 2078 case PackCategoriesIntoKey(fcNaN, fcZero): 2079 case PackCategoriesIntoKey(fcNaN, fcNormal): 2080 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2081 case PackCategoriesIntoKey(fcNaN, fcNaN): 2082 if (isSignaling()) { 2083 makeQuiet(); 2084 return opInvalidOp; 2085 } 2086 return rhs.isSignaling() ? opInvalidOp : opOK; 2087 2088 case PackCategoriesIntoKey(fcZero, fcInfinity): 2089 case PackCategoriesIntoKey(fcZero, fcNormal): 2090 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2091 return opOK; 2092 2093 case PackCategoriesIntoKey(fcNormal, fcZero): 2094 case PackCategoriesIntoKey(fcInfinity, fcZero): 2095 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2096 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2097 case PackCategoriesIntoKey(fcZero, fcZero): 2098 makeNaN(); 2099 return opInvalidOp; 2100 2101 case PackCategoriesIntoKey(fcNormal, fcNormal): 2102 return opDivByZero; // fake status, indicating this is not a special case 2103 } 2104 } 2105 2106 /* Change sign. */ 2107 void IEEEFloat::changeSign() { 2108 // With NaN-as-negative-zero, neither NaN or negative zero can change 2109 // their signs. 2110 if (semantics->nanEncoding == fltNanEncoding::NegativeZero && 2111 (isZero() || isNaN())) 2112 return; 2113 /* Look mummy, this one's easy. */ 2114 sign = !sign; 2115 } 2116 2117 /* Normalized addition or subtraction. */ 2118 APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, 2119 roundingMode rounding_mode, 2120 bool subtract) { 2121 opStatus fs; 2122 2123 fs = addOrSubtractSpecials(rhs, subtract); 2124 2125 /* This return code means it was not a simple case. */ 2126 if (fs == opDivByZero) { 2127 lostFraction lost_fraction; 2128 2129 lost_fraction = addOrSubtractSignificand(rhs, subtract); 2130 fs = normalize(rounding_mode, lost_fraction); 2131 2132 /* Can only be zero if we lost no fraction. */ 2133 assert(category != fcZero || lost_fraction == lfExactlyZero); 2134 } 2135 2136 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2137 positive zero unless rounding to minus infinity, except that 2138 adding two like-signed zeroes gives that zero. */ 2139 if (category == fcZero) { 2140 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 2141 sign = (rounding_mode == rmTowardNegative); 2142 // NaN-in-negative-zero means zeros need to be normalized to +0. 2143 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2144 sign = false; 2145 } 2146 2147 return fs; 2148 } 2149 2150 /* Normalized addition. */ 2151 APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs, 2152 roundingMode rounding_mode) { 2153 return addOrSubtract(rhs, rounding_mode, false); 2154 } 2155 2156 /* Normalized subtraction. */ 2157 APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs, 2158 roundingMode rounding_mode) { 2159 return addOrSubtract(rhs, rounding_mode, true); 2160 } 2161 2162 /* Normalized multiply. */ 2163 APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, 2164 roundingMode rounding_mode) { 2165 opStatus fs; 2166 2167 sign ^= rhs.sign; 2168 fs = multiplySpecials(rhs); 2169 2170 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2171 sign = false; 2172 if (isFiniteNonZero()) { 2173 lostFraction lost_fraction = multiplySignificand(rhs); 2174 fs = normalize(rounding_mode, lost_fraction); 2175 if (lost_fraction != lfExactlyZero) 2176 fs = (opStatus) (fs | opInexact); 2177 } 2178 2179 return fs; 2180 } 2181 2182 /* Normalized divide. */ 2183 APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, 2184 roundingMode rounding_mode) { 2185 opStatus fs; 2186 2187 sign ^= rhs.sign; 2188 fs = divideSpecials(rhs); 2189 2190 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) 2191 sign = false; 2192 if (isFiniteNonZero()) { 2193 lostFraction lost_fraction = divideSignificand(rhs); 2194 fs = normalize(rounding_mode, lost_fraction); 2195 if (lost_fraction != lfExactlyZero) 2196 fs = (opStatus) (fs | opInexact); 2197 } 2198 2199 return fs; 2200 } 2201 2202 /* Normalized remainder. */ 2203 APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { 2204 opStatus fs; 2205 unsigned int origSign = sign; 2206 2207 // First handle the special cases. 2208 fs = remainderSpecials(rhs); 2209 if (fs != opDivByZero) 2210 return fs; 2211 2212 fs = opOK; 2213 2214 // Make sure the current value is less than twice the denom. If the addition 2215 // did not succeed (an overflow has happened), which means that the finite 2216 // value we currently posses must be less than twice the denom (as we are 2217 // using the same semantics). 2218 IEEEFloat P2 = rhs; 2219 if (P2.add(rhs, rmNearestTiesToEven) == opOK) { 2220 fs = mod(P2); 2221 assert(fs == opOK); 2222 } 2223 2224 // Lets work with absolute numbers. 2225 IEEEFloat P = rhs; 2226 P.sign = false; 2227 sign = false; 2228 2229 // 2230 // To calculate the remainder we use the following scheme. 2231 // 2232 // The remainder is defained as follows: 2233 // 2234 // remainder = numer - rquot * denom = x - r * p 2235 // 2236 // Where r is the result of: x/p, rounded toward the nearest integral value 2237 // (with halfway cases rounded toward the even number). 2238 // 2239 // Currently, (after x mod 2p): 2240 // r is the number of 2p's present inside x, which is inherently, an even 2241 // number of p's. 2242 // 2243 // We may split the remaining calculation into 4 options: 2244 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2245 // - if x == 0.5p then we round to the nearest even number which is 0, and we 2246 // are done as well. 2247 // - if 0.5p < x < p then we round to nearest number which is 1, and we have 2248 // to subtract 1p at least once. 2249 // - if x >= p then we must subtract p at least once, as x must be a 2250 // remainder. 2251 // 2252 // By now, we were done, or we added 1 to r, which in turn, now an odd number. 2253 // 2254 // We can now split the remaining calculation to the following 3 options: 2255 // - if x < 0.5p then we round to the nearest number with is 0, and are done. 2256 // - if x == 0.5p then we round to the nearest even number. As r is odd, we 2257 // must round up to the next even number. so we must subtract p once more. 2258 // - if x > 0.5p (and inherently x < p) then we must round r up to the next 2259 // integral, and subtract p once more. 2260 // 2261 2262 // Extend the semantics to prevent an overflow/underflow or inexact result. 2263 bool losesInfo; 2264 fltSemantics extendedSemantics = *semantics; 2265 extendedSemantics.maxExponent++; 2266 extendedSemantics.minExponent--; 2267 extendedSemantics.precision += 2; 2268 2269 IEEEFloat VEx = *this; 2270 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2271 assert(fs == opOK && !losesInfo); 2272 IEEEFloat PEx = P; 2273 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2274 assert(fs == opOK && !losesInfo); 2275 2276 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose 2277 // any fraction. 2278 fs = VEx.add(VEx, rmNearestTiesToEven); 2279 assert(fs == opOK); 2280 2281 if (VEx.compare(PEx) == cmpGreaterThan) { 2282 fs = subtract(P, rmNearestTiesToEven); 2283 assert(fs == opOK); 2284 2285 // Make VEx = this.add(this), but because we have different semantics, we do 2286 // not want to `convert` again, so we just subtract PEx twice (which equals 2287 // to the desired value). 2288 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2289 assert(fs == opOK); 2290 fs = VEx.subtract(PEx, rmNearestTiesToEven); 2291 assert(fs == opOK); 2292 2293 cmpResult result = VEx.compare(PEx); 2294 if (result == cmpGreaterThan || result == cmpEqual) { 2295 fs = subtract(P, rmNearestTiesToEven); 2296 assert(fs == opOK); 2297 } 2298 } 2299 2300 if (isZero()) { 2301 sign = origSign; // IEEE754 requires this 2302 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2303 // But some 8-bit floats only have positive 0. 2304 sign = false; 2305 } 2306 2307 else 2308 sign ^= origSign; 2309 return fs; 2310 } 2311 2312 /* Normalized llvm frem (C fmod). */ 2313 APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { 2314 opStatus fs; 2315 fs = modSpecials(rhs); 2316 unsigned int origSign = sign; 2317 2318 while (isFiniteNonZero() && rhs.isFiniteNonZero() && 2319 compareAbsoluteValue(rhs) != cmpLessThan) { 2320 int Exp = ilogb(*this) - ilogb(rhs); 2321 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven); 2322 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly 2323 // check for it. 2324 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan) 2325 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven); 2326 V.sign = sign; 2327 2328 fs = subtract(V, rmNearestTiesToEven); 2329 2330 // When the semantics supports zero, this loop's 2331 // exit-condition is handled by the 'isFiniteNonZero' 2332 // category check above. However, when the semantics 2333 // does not have 'fcZero' and we have reached the 2334 // minimum possible value, (and any further subtract 2335 // will underflow to the same value) explicitly 2336 // provide an exit-path here. 2337 if (!semantics->hasZero && this->isSmallest()) 2338 break; 2339 2340 assert(fs==opOK); 2341 } 2342 if (isZero()) { 2343 sign = origSign; // fmod requires this 2344 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2345 sign = false; 2346 } 2347 return fs; 2348 } 2349 2350 /* Normalized fused-multiply-add. */ 2351 APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, 2352 const IEEEFloat &addend, 2353 roundingMode rounding_mode) { 2354 opStatus fs; 2355 2356 /* Post-multiplication sign, before addition. */ 2357 sign ^= multiplicand.sign; 2358 2359 /* If and only if all arguments are normal do we need to do an 2360 extended-precision calculation. */ 2361 if (isFiniteNonZero() && 2362 multiplicand.isFiniteNonZero() && 2363 addend.isFinite()) { 2364 lostFraction lost_fraction; 2365 2366 lost_fraction = multiplySignificand(multiplicand, addend); 2367 fs = normalize(rounding_mode, lost_fraction); 2368 if (lost_fraction != lfExactlyZero) 2369 fs = (opStatus) (fs | opInexact); 2370 2371 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 2372 positive zero unless rounding to minus infinity, except that 2373 adding two like-signed zeroes gives that zero. */ 2374 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { 2375 sign = (rounding_mode == rmTowardNegative); 2376 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 2377 sign = false; 2378 } 2379 } else { 2380 fs = multiplySpecials(multiplicand); 2381 2382 /* FS can only be opOK or opInvalidOp. There is no more work 2383 to do in the latter case. The IEEE-754R standard says it is 2384 implementation-defined in this case whether, if ADDEND is a 2385 quiet NaN, we raise invalid op; this implementation does so. 2386 2387 If we need to do the addition we can do so with normal 2388 precision. */ 2389 if (fs == opOK) 2390 fs = addOrSubtract(addend, rounding_mode, false); 2391 } 2392 2393 return fs; 2394 } 2395 2396 /* Rounding-mode correct round to integral value. */ 2397 APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) { 2398 opStatus fs; 2399 2400 if (isInfinity()) 2401 // [IEEE Std 754-2008 6.1]: 2402 // The behavior of infinity in floating-point arithmetic is derived from the 2403 // limiting cases of real arithmetic with operands of arbitrarily 2404 // large magnitude, when such a limit exists. 2405 // ... 2406 // Operations on infinite operands are usually exact and therefore signal no 2407 // exceptions ... 2408 return opOK; 2409 2410 if (isNaN()) { 2411 if (isSignaling()) { 2412 // [IEEE Std 754-2008 6.2]: 2413 // Under default exception handling, any operation signaling an invalid 2414 // operation exception and for which a floating-point result is to be 2415 // delivered shall deliver a quiet NaN. 2416 makeQuiet(); 2417 // [IEEE Std 754-2008 6.2]: 2418 // Signaling NaNs shall be reserved operands that, under default exception 2419 // handling, signal the invalid operation exception(see 7.2) for every 2420 // general-computational and signaling-computational operation except for 2421 // the conversions described in 5.12. 2422 return opInvalidOp; 2423 } else { 2424 // [IEEE Std 754-2008 6.2]: 2425 // For an operation with quiet NaN inputs, other than maximum and minimum 2426 // operations, if a floating-point result is to be delivered the result 2427 // shall be a quiet NaN which should be one of the input NaNs. 2428 // ... 2429 // Every general-computational and quiet-computational operation involving 2430 // one or more input NaNs, none of them signaling, shall signal no 2431 // exception, except fusedMultiplyAdd might signal the invalid operation 2432 // exception(see 7.2). 2433 return opOK; 2434 } 2435 } 2436 2437 if (isZero()) { 2438 // [IEEE Std 754-2008 6.3]: 2439 // ... the sign of the result of conversions, the quantize operation, the 2440 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is 2441 // the sign of the first or only operand. 2442 return opOK; 2443 } 2444 2445 // If the exponent is large enough, we know that this value is already 2446 // integral, and the arithmetic below would potentially cause it to saturate 2447 // to +/-Inf. Bail out early instead. 2448 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics)) 2449 return opOK; 2450 2451 // The algorithm here is quite simple: we add 2^(p-1), where p is the 2452 // precision of our format, and then subtract it back off again. The choice 2453 // of rounding modes for the addition/subtraction determines the rounding mode 2454 // for our integral rounding as well. 2455 // NOTE: When the input value is negative, we do subtraction followed by 2456 // addition instead. 2457 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)), 2458 1); 2459 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1; 2460 IEEEFloat MagicConstant(*semantics); 2461 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 2462 rmNearestTiesToEven); 2463 assert(fs == opOK); 2464 MagicConstant.sign = sign; 2465 2466 // Preserve the input sign so that we can handle the case of zero result 2467 // correctly. 2468 bool inputSign = isNegative(); 2469 2470 fs = add(MagicConstant, rounding_mode); 2471 2472 // Current value and 'MagicConstant' are both integers, so the result of the 2473 // subtraction is always exact according to Sterbenz' lemma. 2474 subtract(MagicConstant, rounding_mode); 2475 2476 // Restore the input sign. 2477 if (inputSign != isNegative()) 2478 changeSign(); 2479 2480 return fs; 2481 } 2482 2483 /* Comparison requires normalized numbers. */ 2484 APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const { 2485 cmpResult result; 2486 2487 assert(semantics == rhs.semantics); 2488 2489 switch (PackCategoriesIntoKey(category, rhs.category)) { 2490 default: 2491 llvm_unreachable(nullptr); 2492 2493 case PackCategoriesIntoKey(fcNaN, fcZero): 2494 case PackCategoriesIntoKey(fcNaN, fcNormal): 2495 case PackCategoriesIntoKey(fcNaN, fcInfinity): 2496 case PackCategoriesIntoKey(fcNaN, fcNaN): 2497 case PackCategoriesIntoKey(fcZero, fcNaN): 2498 case PackCategoriesIntoKey(fcNormal, fcNaN): 2499 case PackCategoriesIntoKey(fcInfinity, fcNaN): 2500 return cmpUnordered; 2501 2502 case PackCategoriesIntoKey(fcInfinity, fcNormal): 2503 case PackCategoriesIntoKey(fcInfinity, fcZero): 2504 case PackCategoriesIntoKey(fcNormal, fcZero): 2505 if (sign) 2506 return cmpLessThan; 2507 else 2508 return cmpGreaterThan; 2509 2510 case PackCategoriesIntoKey(fcNormal, fcInfinity): 2511 case PackCategoriesIntoKey(fcZero, fcInfinity): 2512 case PackCategoriesIntoKey(fcZero, fcNormal): 2513 if (rhs.sign) 2514 return cmpGreaterThan; 2515 else 2516 return cmpLessThan; 2517 2518 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 2519 if (sign == rhs.sign) 2520 return cmpEqual; 2521 else if (sign) 2522 return cmpLessThan; 2523 else 2524 return cmpGreaterThan; 2525 2526 case PackCategoriesIntoKey(fcZero, fcZero): 2527 return cmpEqual; 2528 2529 case PackCategoriesIntoKey(fcNormal, fcNormal): 2530 break; 2531 } 2532 2533 /* Two normal numbers. Do they have the same sign? */ 2534 if (sign != rhs.sign) { 2535 if (sign) 2536 result = cmpLessThan; 2537 else 2538 result = cmpGreaterThan; 2539 } else { 2540 /* Compare absolute values; invert result if negative. */ 2541 result = compareAbsoluteValue(rhs); 2542 2543 if (sign) { 2544 if (result == cmpLessThan) 2545 result = cmpGreaterThan; 2546 else if (result == cmpGreaterThan) 2547 result = cmpLessThan; 2548 } 2549 } 2550 2551 return result; 2552 } 2553 2554 /// IEEEFloat::convert - convert a value of one floating point type to another. 2555 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 2556 /// records whether the transformation lost information, i.e. whether 2557 /// converting the result back to the original type will produce the 2558 /// original value (this is almost the same as return value==fsOK, but there 2559 /// are edge cases where this is not so). 2560 2561 APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, 2562 roundingMode rounding_mode, 2563 bool *losesInfo) { 2564 lostFraction lostFraction; 2565 unsigned int newPartCount, oldPartCount; 2566 opStatus fs; 2567 int shift; 2568 const fltSemantics &fromSemantics = *semantics; 2569 bool is_signaling = isSignaling(); 2570 2571 lostFraction = lfExactlyZero; 2572 newPartCount = partCountForBits(toSemantics.precision + 1); 2573 oldPartCount = partCount(); 2574 shift = toSemantics.precision - fromSemantics.precision; 2575 2576 bool X86SpecialNan = false; 2577 if (&fromSemantics == &semX87DoubleExtended && 2578 &toSemantics != &semX87DoubleExtended && category == fcNaN && 2579 (!(*significandParts() & 0x8000000000000000ULL) || 2580 !(*significandParts() & 0x4000000000000000ULL))) { 2581 // x86 has some unusual NaNs which cannot be represented in any other 2582 // format; note them here. 2583 X86SpecialNan = true; 2584 } 2585 2586 // If this is a truncation of a denormal number, and the target semantics 2587 // has larger exponent range than the source semantics (this can happen 2588 // when truncating from PowerPC double-double to double format), the 2589 // right shift could lose result mantissa bits. Adjust exponent instead 2590 // of performing excessive shift. 2591 // Also do a similar trick in case shifting denormal would produce zero 2592 // significand as this case isn't handled correctly by normalize. 2593 if (shift < 0 && isFiniteNonZero()) { 2594 int omsb = significandMSB() + 1; 2595 int exponentChange = omsb - fromSemantics.precision; 2596 if (exponent + exponentChange < toSemantics.minExponent) 2597 exponentChange = toSemantics.minExponent - exponent; 2598 if (exponentChange < shift) 2599 exponentChange = shift; 2600 if (exponentChange < 0) { 2601 shift -= exponentChange; 2602 exponent += exponentChange; 2603 } else if (omsb <= -shift) { 2604 exponentChange = omsb + shift - 1; // leave at least one bit set 2605 shift -= exponentChange; 2606 exponent += exponentChange; 2607 } 2608 } 2609 2610 // If this is a truncation, perform the shift before we narrow the storage. 2611 if (shift < 0 && (isFiniteNonZero() || 2612 (category == fcNaN && semantics->nonFiniteBehavior != 2613 fltNonfiniteBehavior::NanOnly))) 2614 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2615 2616 // Fix the storage so it can hold to new value. 2617 if (newPartCount > oldPartCount) { 2618 // The new type requires more storage; make it available. 2619 integerPart *newParts; 2620 newParts = new integerPart[newPartCount]; 2621 APInt::tcSet(newParts, 0, newPartCount); 2622 if (isFiniteNonZero() || category==fcNaN) 2623 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2624 freeSignificand(); 2625 significand.parts = newParts; 2626 } else if (newPartCount == 1 && oldPartCount != 1) { 2627 // Switch to built-in storage for a single part. 2628 integerPart newPart = 0; 2629 if (isFiniteNonZero() || category==fcNaN) 2630 newPart = significandParts()[0]; 2631 freeSignificand(); 2632 significand.part = newPart; 2633 } 2634 2635 // Now that we have the right storage, switch the semantics. 2636 semantics = &toSemantics; 2637 2638 // If this is an extension, perform the shift now that the storage is 2639 // available. 2640 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2641 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2642 2643 if (isFiniteNonZero()) { 2644 fs = normalize(rounding_mode, lostFraction); 2645 *losesInfo = (fs != opOK); 2646 } else if (category == fcNaN) { 2647 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2648 *losesInfo = 2649 fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly; 2650 makeNaN(false, sign); 2651 return is_signaling ? opInvalidOp : opOK; 2652 } 2653 2654 // If NaN is negative zero, we need to create a new NaN to avoid converting 2655 // NaN to -Inf. 2656 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && 2657 semantics->nanEncoding != fltNanEncoding::NegativeZero) 2658 makeNaN(false, false); 2659 2660 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2661 2662 // For x87 extended precision, we want to make a NaN, not a special NaN if 2663 // the input wasn't special either. 2664 if (!X86SpecialNan && semantics == &semX87DoubleExtended) 2665 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2666 2667 // Convert of sNaN creates qNaN and raises an exception (invalid op). 2668 // This also guarantees that a sNaN does not become Inf on a truncation 2669 // that loses all payload bits. 2670 if (is_signaling) { 2671 makeQuiet(); 2672 fs = opInvalidOp; 2673 } else { 2674 fs = opOK; 2675 } 2676 } else if (category == fcInfinity && 2677 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 2678 makeNaN(false, sign); 2679 *losesInfo = true; 2680 fs = opInexact; 2681 } else if (category == fcZero && 2682 semantics->nanEncoding == fltNanEncoding::NegativeZero) { 2683 // Negative zero loses info, but positive zero doesn't. 2684 *losesInfo = 2685 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; 2686 fs = *losesInfo ? opInexact : opOK; 2687 // NaN is negative zero means -0 -> +0, which can lose information 2688 sign = false; 2689 } else { 2690 *losesInfo = false; 2691 fs = opOK; 2692 } 2693 2694 if (category == fcZero && !semantics->hasZero) 2695 makeSmallestNormalized(false); 2696 return fs; 2697 } 2698 2699 /* Convert a floating point number to an integer according to the 2700 rounding mode. If the rounded integer value is out of range this 2701 returns an invalid operation exception and the contents of the 2702 destination parts are unspecified. If the rounded value is in 2703 range but the floating point number is not the exact integer, the C 2704 standard doesn't require an inexact exception to be raised. IEEE 2705 854 does require it so we do that. 2706 2707 Note that for conversions to integer type the C standard requires 2708 round-to-zero to always be used. */ 2709 APFloat::opStatus IEEEFloat::convertToSignExtendedInteger( 2710 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned, 2711 roundingMode rounding_mode, bool *isExact) const { 2712 lostFraction lost_fraction; 2713 const integerPart *src; 2714 unsigned int dstPartsCount, truncatedBits; 2715 2716 *isExact = false; 2717 2718 /* Handle the three special cases first. */ 2719 if (category == fcInfinity || category == fcNaN) 2720 return opInvalidOp; 2721 2722 dstPartsCount = partCountForBits(width); 2723 assert(dstPartsCount <= parts.size() && "Integer too big"); 2724 2725 if (category == fcZero) { 2726 APInt::tcSet(parts.data(), 0, dstPartsCount); 2727 // Negative zero can't be represented as an int. 2728 *isExact = !sign; 2729 return opOK; 2730 } 2731 2732 src = significandParts(); 2733 2734 /* Step 1: place our absolute value, with any fraction truncated, in 2735 the destination. */ 2736 if (exponent < 0) { 2737 /* Our absolute value is less than one; truncate everything. */ 2738 APInt::tcSet(parts.data(), 0, dstPartsCount); 2739 /* For exponent -1 the integer bit represents .5, look at that. 2740 For smaller exponents leftmost truncated bit is 0. */ 2741 truncatedBits = semantics->precision -1U - exponent; 2742 } else { 2743 /* We want the most significant (exponent + 1) bits; the rest are 2744 truncated. */ 2745 unsigned int bits = exponent + 1U; 2746 2747 /* Hopelessly large in magnitude? */ 2748 if (bits > width) 2749 return opInvalidOp; 2750 2751 if (bits < semantics->precision) { 2752 /* We truncate (semantics->precision - bits) bits. */ 2753 truncatedBits = semantics->precision - bits; 2754 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits); 2755 } else { 2756 /* We want at least as many bits as are available. */ 2757 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision, 2758 0); 2759 APInt::tcShiftLeft(parts.data(), dstPartsCount, 2760 bits - semantics->precision); 2761 truncatedBits = 0; 2762 } 2763 } 2764 2765 /* Step 2: work out any lost fraction, and increment the absolute 2766 value if we would round away from zero. */ 2767 if (truncatedBits) { 2768 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2769 truncatedBits); 2770 if (lost_fraction != lfExactlyZero && 2771 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2772 if (APInt::tcIncrement(parts.data(), dstPartsCount)) 2773 return opInvalidOp; /* Overflow. */ 2774 } 2775 } else { 2776 lost_fraction = lfExactlyZero; 2777 } 2778 2779 /* Step 3: check if we fit in the destination. */ 2780 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1; 2781 2782 if (sign) { 2783 if (!isSigned) { 2784 /* Negative numbers cannot be represented as unsigned. */ 2785 if (omsb != 0) 2786 return opInvalidOp; 2787 } else { 2788 /* It takes omsb bits to represent the unsigned integer value. 2789 We lose a bit for the sign, but care is needed as the 2790 maximally negative integer is a special case. */ 2791 if (omsb == width && 2792 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb) 2793 return opInvalidOp; 2794 2795 /* This case can happen because of rounding. */ 2796 if (omsb > width) 2797 return opInvalidOp; 2798 } 2799 2800 APInt::tcNegate (parts.data(), dstPartsCount); 2801 } else { 2802 if (omsb >= width + !isSigned) 2803 return opInvalidOp; 2804 } 2805 2806 if (lost_fraction == lfExactlyZero) { 2807 *isExact = true; 2808 return opOK; 2809 } else 2810 return opInexact; 2811 } 2812 2813 /* Same as convertToSignExtendedInteger, except we provide 2814 deterministic values in case of an invalid operation exception, 2815 namely zero for NaNs and the minimal or maximal value respectively 2816 for underflow or overflow. 2817 The *isExact output tells whether the result is exact, in the sense 2818 that converting it back to the original floating point type produces 2819 the original value. This is almost equivalent to result==opOK, 2820 except for negative zeroes. 2821 */ 2822 APFloat::opStatus 2823 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts, 2824 unsigned int width, bool isSigned, 2825 roundingMode rounding_mode, bool *isExact) const { 2826 opStatus fs; 2827 2828 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2829 isExact); 2830 2831 if (fs == opInvalidOp) { 2832 unsigned int bits, dstPartsCount; 2833 2834 dstPartsCount = partCountForBits(width); 2835 assert(dstPartsCount <= parts.size() && "Integer too big"); 2836 2837 if (category == fcNaN) 2838 bits = 0; 2839 else if (sign) 2840 bits = isSigned; 2841 else 2842 bits = width - isSigned; 2843 2844 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits); 2845 if (sign && isSigned) 2846 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1); 2847 } 2848 2849 return fs; 2850 } 2851 2852 /* Convert an unsigned integer SRC to a floating point number, 2853 rounding according to ROUNDING_MODE. The sign of the floating 2854 point number is not modified. */ 2855 APFloat::opStatus IEEEFloat::convertFromUnsignedParts( 2856 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) { 2857 unsigned int omsb, precision, dstCount; 2858 integerPart *dst; 2859 lostFraction lost_fraction; 2860 2861 category = fcNormal; 2862 omsb = APInt::tcMSB(src, srcCount) + 1; 2863 dst = significandParts(); 2864 dstCount = partCount(); 2865 precision = semantics->precision; 2866 2867 /* We want the most significant PRECISION bits of SRC. There may not 2868 be that many; extract what we can. */ 2869 if (precision <= omsb) { 2870 exponent = omsb - 1; 2871 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2872 omsb - precision); 2873 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2874 } else { 2875 exponent = precision - 1; 2876 lost_fraction = lfExactlyZero; 2877 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2878 } 2879 2880 return normalize(rounding_mode, lost_fraction); 2881 } 2882 2883 APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned, 2884 roundingMode rounding_mode) { 2885 unsigned int partCount = Val.getNumWords(); 2886 APInt api = Val; 2887 2888 sign = false; 2889 if (isSigned && api.isNegative()) { 2890 sign = true; 2891 api = -api; 2892 } 2893 2894 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2895 } 2896 2897 /* Convert a two's complement integer SRC to a floating point number, 2898 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2899 integer is signed, in which case it must be sign-extended. */ 2900 APFloat::opStatus 2901 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src, 2902 unsigned int srcCount, bool isSigned, 2903 roundingMode rounding_mode) { 2904 opStatus status; 2905 2906 if (isSigned && 2907 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2908 integerPart *copy; 2909 2910 /* If we're signed and negative negate a copy. */ 2911 sign = true; 2912 copy = new integerPart[srcCount]; 2913 APInt::tcAssign(copy, src, srcCount); 2914 APInt::tcNegate(copy, srcCount); 2915 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2916 delete [] copy; 2917 } else { 2918 sign = false; 2919 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2920 } 2921 2922 return status; 2923 } 2924 2925 /* FIXME: should this just take a const APInt reference? */ 2926 APFloat::opStatus 2927 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2928 unsigned int width, bool isSigned, 2929 roundingMode rounding_mode) { 2930 unsigned int partCount = partCountForBits(width); 2931 APInt api = APInt(width, ArrayRef(parts, partCount)); 2932 2933 sign = false; 2934 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2935 sign = true; 2936 api = -api; 2937 } 2938 2939 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2940 } 2941 2942 Expected<APFloat::opStatus> 2943 IEEEFloat::convertFromHexadecimalString(StringRef s, 2944 roundingMode rounding_mode) { 2945 lostFraction lost_fraction = lfExactlyZero; 2946 2947 category = fcNormal; 2948 zeroSignificand(); 2949 exponent = 0; 2950 2951 integerPart *significand = significandParts(); 2952 unsigned partsCount = partCount(); 2953 unsigned bitPos = partsCount * integerPartWidth; 2954 bool computedTrailingFraction = false; 2955 2956 // Skip leading zeroes and any (hexa)decimal point. 2957 StringRef::iterator begin = s.begin(); 2958 StringRef::iterator end = s.end(); 2959 StringRef::iterator dot; 2960 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2961 if (!PtrOrErr) 2962 return PtrOrErr.takeError(); 2963 StringRef::iterator p = *PtrOrErr; 2964 StringRef::iterator firstSignificantDigit = p; 2965 2966 while (p != end) { 2967 integerPart hex_value; 2968 2969 if (*p == '.') { 2970 if (dot != end) 2971 return createError("String contains multiple dots"); 2972 dot = p++; 2973 continue; 2974 } 2975 2976 hex_value = hexDigitValue(*p); 2977 if (hex_value == UINT_MAX) 2978 break; 2979 2980 p++; 2981 2982 // Store the number while we have space. 2983 if (bitPos) { 2984 bitPos -= 4; 2985 hex_value <<= bitPos % integerPartWidth; 2986 significand[bitPos / integerPartWidth] |= hex_value; 2987 } else if (!computedTrailingFraction) { 2988 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value); 2989 if (!FractOrErr) 2990 return FractOrErr.takeError(); 2991 lost_fraction = *FractOrErr; 2992 computedTrailingFraction = true; 2993 } 2994 } 2995 2996 /* Hex floats require an exponent but not a hexadecimal point. */ 2997 if (p == end) 2998 return createError("Hex strings require an exponent"); 2999 if (*p != 'p' && *p != 'P') 3000 return createError("Invalid character in significand"); 3001 if (p == begin) 3002 return createError("Significand has no digits"); 3003 if (dot != end && p - begin == 1) 3004 return createError("Significand has no digits"); 3005 3006 /* Ignore the exponent if we are zero. */ 3007 if (p != firstSignificantDigit) { 3008 int expAdjustment; 3009 3010 /* Implicit hexadecimal point? */ 3011 if (dot == end) 3012 dot = p; 3013 3014 /* Calculate the exponent adjustment implicit in the number of 3015 significant digits. */ 3016 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 3017 if (expAdjustment < 0) 3018 expAdjustment++; 3019 expAdjustment = expAdjustment * 4 - 1; 3020 3021 /* Adjust for writing the significand starting at the most 3022 significant nibble. */ 3023 expAdjustment += semantics->precision; 3024 expAdjustment -= partsCount * integerPartWidth; 3025 3026 /* Adjust for the given exponent. */ 3027 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment); 3028 if (!ExpOrErr) 3029 return ExpOrErr.takeError(); 3030 exponent = *ExpOrErr; 3031 } 3032 3033 return normalize(rounding_mode, lost_fraction); 3034 } 3035 3036 APFloat::opStatus 3037 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts, 3038 unsigned sigPartCount, int exp, 3039 roundingMode rounding_mode) { 3040 unsigned int parts, pow5PartCount; 3041 fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; 3042 integerPart pow5Parts[maxPowerOfFiveParts]; 3043 bool isNearest; 3044 3045 isNearest = (rounding_mode == rmNearestTiesToEven || 3046 rounding_mode == rmNearestTiesToAway); 3047 3048 parts = partCountForBits(semantics->precision + 11); 3049 3050 /* Calculate pow(5, abs(exp)). */ 3051 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 3052 3053 for (;; parts *= 2) { 3054 opStatus sigStatus, powStatus; 3055 unsigned int excessPrecision, truncatedBits; 3056 3057 calcSemantics.precision = parts * integerPartWidth - 1; 3058 excessPrecision = calcSemantics.precision - semantics->precision; 3059 truncatedBits = excessPrecision; 3060 3061 IEEEFloat decSig(calcSemantics, uninitialized); 3062 decSig.makeZero(sign); 3063 IEEEFloat pow5(calcSemantics); 3064 3065 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 3066 rmNearestTiesToEven); 3067 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 3068 rmNearestTiesToEven); 3069 /* Add exp, as 10^n = 5^n * 2^n. */ 3070 decSig.exponent += exp; 3071 3072 lostFraction calcLostFraction; 3073 integerPart HUerr, HUdistance; 3074 unsigned int powHUerr; 3075 3076 if (exp >= 0) { 3077 /* multiplySignificand leaves the precision-th bit set to 1. */ 3078 calcLostFraction = decSig.multiplySignificand(pow5); 3079 powHUerr = powStatus != opOK; 3080 } else { 3081 calcLostFraction = decSig.divideSignificand(pow5); 3082 /* Denormal numbers have less precision. */ 3083 if (decSig.exponent < semantics->minExponent) { 3084 excessPrecision += (semantics->minExponent - decSig.exponent); 3085 truncatedBits = excessPrecision; 3086 if (excessPrecision > calcSemantics.precision) 3087 excessPrecision = calcSemantics.precision; 3088 } 3089 /* Extra half-ulp lost in reciprocal of exponent. */ 3090 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 3091 } 3092 3093 /* Both multiplySignificand and divideSignificand return the 3094 result with the integer bit set. */ 3095 assert(APInt::tcExtractBit 3096 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 3097 3098 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 3099 powHUerr); 3100 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 3101 excessPrecision, isNearest); 3102 3103 /* Are we guaranteed to round correctly if we truncate? */ 3104 if (HUdistance >= HUerr) { 3105 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 3106 calcSemantics.precision - excessPrecision, 3107 excessPrecision); 3108 /* Take the exponent of decSig. If we tcExtract-ed less bits 3109 above we must adjust our exponent to compensate for the 3110 implicit right shift. */ 3111 exponent = (decSig.exponent + semantics->precision 3112 - (calcSemantics.precision - excessPrecision)); 3113 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 3114 decSig.partCount(), 3115 truncatedBits); 3116 return normalize(rounding_mode, calcLostFraction); 3117 } 3118 } 3119 } 3120 3121 Expected<APFloat::opStatus> 3122 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { 3123 decimalInfo D; 3124 opStatus fs; 3125 3126 /* Scan the text. */ 3127 StringRef::iterator p = str.begin(); 3128 if (Error Err = interpretDecimal(p, str.end(), &D)) 3129 return std::move(Err); 3130 3131 /* Handle the quick cases. First the case of no significant digits, 3132 i.e. zero, and then exponents that are obviously too large or too 3133 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 3134 definitely overflows if 3135 3136 (exp - 1) * L >= maxExponent 3137 3138 and definitely underflows to zero where 3139 3140 (exp + 1) * L <= minExponent - precision 3141 3142 With integer arithmetic the tightest bounds for L are 3143 3144 93/28 < L < 196/59 [ numerator <= 256 ] 3145 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 3146 */ 3147 3148 // Test if we have a zero number allowing for strings with no null terminators 3149 // and zero decimals with non-zero exponents. 3150 // 3151 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 3152 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 3153 // be at most one dot. On the other hand, if we have a zero with a non-zero 3154 // exponent, then we know that D.firstSigDigit will be non-numeric. 3155 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 3156 category = fcZero; 3157 fs = opOK; 3158 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 3159 sign = false; 3160 if (!semantics->hasZero) 3161 makeSmallestNormalized(false); 3162 3163 /* Check whether the normalized exponent is high enough to overflow 3164 max during the log-rebasing in the max-exponent check below. */ 3165 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 3166 fs = handleOverflow(rounding_mode); 3167 3168 /* If it wasn't, then it also wasn't high enough to overflow max 3169 during the log-rebasing in the min-exponent check. Check that it 3170 won't overflow min in either check, then perform the min-exponent 3171 check. */ 3172 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 3173 (D.normalizedExponent + 1) * 28738 <= 3174 8651 * (semantics->minExponent - (int) semantics->precision)) { 3175 /* Underflow to zero and round. */ 3176 category = fcNormal; 3177 zeroSignificand(); 3178 fs = normalize(rounding_mode, lfLessThanHalf); 3179 3180 /* We can finally safely perform the max-exponent check. */ 3181 } else if ((D.normalizedExponent - 1) * 42039 3182 >= 12655 * semantics->maxExponent) { 3183 /* Overflow and round. */ 3184 fs = handleOverflow(rounding_mode); 3185 } else { 3186 integerPart *decSignificand; 3187 unsigned int partCount; 3188 3189 /* A tight upper bound on number of bits required to hold an 3190 N-digit decimal integer is N * 196 / 59. Allocate enough space 3191 to hold the full significand, and an extra part required by 3192 tcMultiplyPart. */ 3193 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 3194 partCount = partCountForBits(1 + 196 * partCount / 59); 3195 decSignificand = new integerPart[partCount + 1]; 3196 partCount = 0; 3197 3198 /* Convert to binary efficiently - we do almost all multiplication 3199 in an integerPart. When this would overflow do we do a single 3200 bignum multiplication, and then revert again to multiplication 3201 in an integerPart. */ 3202 do { 3203 integerPart decValue, val, multiplier; 3204 3205 val = 0; 3206 multiplier = 1; 3207 3208 do { 3209 if (*p == '.') { 3210 p++; 3211 if (p == str.end()) { 3212 break; 3213 } 3214 } 3215 decValue = decDigitValue(*p++); 3216 if (decValue >= 10U) { 3217 delete[] decSignificand; 3218 return createError("Invalid character in significand"); 3219 } 3220 multiplier *= 10; 3221 val = val * 10 + decValue; 3222 /* The maximum number that can be multiplied by ten with any 3223 digit added without overflowing an integerPart. */ 3224 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 3225 3226 /* Multiply out the current part. */ 3227 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 3228 partCount, partCount + 1, false); 3229 3230 /* If we used another part (likely but not guaranteed), increase 3231 the count. */ 3232 if (decSignificand[partCount]) 3233 partCount++; 3234 } while (p <= D.lastSigDigit); 3235 3236 category = fcNormal; 3237 fs = roundSignificandWithExponent(decSignificand, partCount, 3238 D.exponent, rounding_mode); 3239 3240 delete [] decSignificand; 3241 } 3242 3243 return fs; 3244 } 3245 3246 bool IEEEFloat::convertFromStringSpecials(StringRef str) { 3247 const size_t MIN_NAME_SIZE = 3; 3248 3249 if (str.size() < MIN_NAME_SIZE) 3250 return false; 3251 3252 if (str == "inf" || str == "INFINITY" || str == "+Inf") { 3253 makeInf(false); 3254 return true; 3255 } 3256 3257 bool IsNegative = str.front() == '-'; 3258 if (IsNegative) { 3259 str = str.drop_front(); 3260 if (str.size() < MIN_NAME_SIZE) 3261 return false; 3262 3263 if (str == "inf" || str == "INFINITY" || str == "Inf") { 3264 makeInf(true); 3265 return true; 3266 } 3267 } 3268 3269 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. 3270 bool IsSignaling = str.front() == 's' || str.front() == 'S'; 3271 if (IsSignaling) { 3272 str = str.drop_front(); 3273 if (str.size() < MIN_NAME_SIZE) 3274 return false; 3275 } 3276 3277 if (str.starts_with("nan") || str.starts_with("NaN")) { 3278 str = str.drop_front(3); 3279 3280 // A NaN without payload. 3281 if (str.empty()) { 3282 makeNaN(IsSignaling, IsNegative); 3283 return true; 3284 } 3285 3286 // Allow the payload to be inside parentheses. 3287 if (str.front() == '(') { 3288 // Parentheses should be balanced (and not empty). 3289 if (str.size() <= 2 || str.back() != ')') 3290 return false; 3291 3292 str = str.slice(1, str.size() - 1); 3293 } 3294 3295 // Determine the payload number's radix. 3296 unsigned Radix = 10; 3297 if (str[0] == '0') { 3298 if (str.size() > 1 && tolower(str[1]) == 'x') { 3299 str = str.drop_front(2); 3300 Radix = 16; 3301 } else 3302 Radix = 8; 3303 } 3304 3305 // Parse the payload and make the NaN. 3306 APInt Payload; 3307 if (!str.getAsInteger(Radix, Payload)) { 3308 makeNaN(IsSignaling, IsNegative, &Payload); 3309 return true; 3310 } 3311 } 3312 3313 return false; 3314 } 3315 3316 Expected<APFloat::opStatus> 3317 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) { 3318 if (str.empty()) 3319 return createError("Invalid string length"); 3320 3321 // Handle special cases. 3322 if (convertFromStringSpecials(str)) 3323 return opOK; 3324 3325 /* Handle a leading minus sign. */ 3326 StringRef::iterator p = str.begin(); 3327 size_t slen = str.size(); 3328 sign = *p == '-' ? 1 : 0; 3329 if (sign && !semantics->hasSignedRepr) 3330 llvm_unreachable( 3331 "This floating point format does not support signed values"); 3332 3333 if (*p == '-' || *p == '+') { 3334 p++; 3335 slen--; 3336 if (!slen) 3337 return createError("String has no digits"); 3338 } 3339 3340 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 3341 if (slen == 2) 3342 return createError("Invalid string"); 3343 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 3344 rounding_mode); 3345 } 3346 3347 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 3348 } 3349 3350 /* Write out a hexadecimal representation of the floating point value 3351 to DST, which must be of sufficient size, in the C99 form 3352 [-]0xh.hhhhp[+-]d. Return the number of characters written, 3353 excluding the terminating NUL. 3354 3355 If UPPERCASE, the output is in upper case, otherwise in lower case. 3356 3357 HEXDIGITS digits appear altogether, rounding the value if 3358 necessary. If HEXDIGITS is 0, the minimal precision to display the 3359 number precisely is used instead. If nothing would appear after 3360 the decimal point it is suppressed. 3361 3362 The decimal exponent is always printed and has at least one digit. 3363 Zero values display an exponent of zero. Infinities and NaNs 3364 appear as "infinity" or "nan" respectively. 3365 3366 The above rules are as specified by C99. There is ambiguity about 3367 what the leading hexadecimal digit should be. This implementation 3368 uses whatever is necessary so that the exponent is displayed as 3369 stored. This implies the exponent will fall within the IEEE format 3370 range, and the leading hexadecimal digit will be 0 (for denormals), 3371 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 3372 any other digits zero). 3373 */ 3374 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits, 3375 bool upperCase, 3376 roundingMode rounding_mode) const { 3377 char *p; 3378 3379 p = dst; 3380 if (sign) 3381 *dst++ = '-'; 3382 3383 switch (category) { 3384 case fcInfinity: 3385 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 3386 dst += sizeof infinityL - 1; 3387 break; 3388 3389 case fcNaN: 3390 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 3391 dst += sizeof NaNU - 1; 3392 break; 3393 3394 case fcZero: 3395 *dst++ = '0'; 3396 *dst++ = upperCase ? 'X': 'x'; 3397 *dst++ = '0'; 3398 if (hexDigits > 1) { 3399 *dst++ = '.'; 3400 memset (dst, '0', hexDigits - 1); 3401 dst += hexDigits - 1; 3402 } 3403 *dst++ = upperCase ? 'P': 'p'; 3404 *dst++ = '0'; 3405 break; 3406 3407 case fcNormal: 3408 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 3409 break; 3410 } 3411 3412 *dst = 0; 3413 3414 return static_cast<unsigned int>(dst - p); 3415 } 3416 3417 /* Does the hard work of outputting the correctly rounded hexadecimal 3418 form of a normal floating point number with the specified number of 3419 hexadecimal digits. If HEXDIGITS is zero the minimum number of 3420 digits necessary to print the value precisely is output. */ 3421 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 3422 bool upperCase, 3423 roundingMode rounding_mode) const { 3424 unsigned int count, valueBits, shift, partsCount, outputDigits; 3425 const char *hexDigitChars; 3426 const integerPart *significand; 3427 char *p; 3428 bool roundUp; 3429 3430 *dst++ = '0'; 3431 *dst++ = upperCase ? 'X': 'x'; 3432 3433 roundUp = false; 3434 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 3435 3436 significand = significandParts(); 3437 partsCount = partCount(); 3438 3439 /* +3 because the first digit only uses the single integer bit, so 3440 we have 3 virtual zero most-significant-bits. */ 3441 valueBits = semantics->precision + 3; 3442 shift = integerPartWidth - valueBits % integerPartWidth; 3443 3444 /* The natural number of digits required ignoring trailing 3445 insignificant zeroes. */ 3446 outputDigits = (valueBits - significandLSB () + 3) / 4; 3447 3448 /* hexDigits of zero means use the required number for the 3449 precision. Otherwise, see if we are truncating. If we are, 3450 find out if we need to round away from zero. */ 3451 if (hexDigits) { 3452 if (hexDigits < outputDigits) { 3453 /* We are dropping non-zero bits, so need to check how to round. 3454 "bits" is the number of dropped bits. */ 3455 unsigned int bits; 3456 lostFraction fraction; 3457 3458 bits = valueBits - hexDigits * 4; 3459 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 3460 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 3461 } 3462 outputDigits = hexDigits; 3463 } 3464 3465 /* Write the digits consecutively, and start writing in the location 3466 of the hexadecimal point. We move the most significant digit 3467 left and add the hexadecimal point later. */ 3468 p = ++dst; 3469 3470 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 3471 3472 while (outputDigits && count) { 3473 integerPart part; 3474 3475 /* Put the most significant integerPartWidth bits in "part". */ 3476 if (--count == partsCount) 3477 part = 0; /* An imaginary higher zero part. */ 3478 else 3479 part = significand[count] << shift; 3480 3481 if (count && shift) 3482 part |= significand[count - 1] >> (integerPartWidth - shift); 3483 3484 /* Convert as much of "part" to hexdigits as we can. */ 3485 unsigned int curDigits = integerPartWidth / 4; 3486 3487 if (curDigits > outputDigits) 3488 curDigits = outputDigits; 3489 dst += partAsHex (dst, part, curDigits, hexDigitChars); 3490 outputDigits -= curDigits; 3491 } 3492 3493 if (roundUp) { 3494 char *q = dst; 3495 3496 /* Note that hexDigitChars has a trailing '0'. */ 3497 do { 3498 q--; 3499 *q = hexDigitChars[hexDigitValue (*q) + 1]; 3500 } while (*q == '0'); 3501 assert(q >= p); 3502 } else { 3503 /* Add trailing zeroes. */ 3504 memset (dst, '0', outputDigits); 3505 dst += outputDigits; 3506 } 3507 3508 /* Move the most significant digit to before the point, and if there 3509 is something after the decimal point add it. This must come 3510 after rounding above. */ 3511 p[-1] = p[0]; 3512 if (dst -1 == p) 3513 dst--; 3514 else 3515 p[0] = '.'; 3516 3517 /* Finally output the exponent. */ 3518 *dst++ = upperCase ? 'P': 'p'; 3519 3520 return writeSignedDecimal (dst, exponent); 3521 } 3522 3523 hash_code hash_value(const IEEEFloat &Arg) { 3524 if (!Arg.isFiniteNonZero()) 3525 return hash_combine((uint8_t)Arg.category, 3526 // NaN has no sign, fix it at zero. 3527 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 3528 Arg.semantics->precision); 3529 3530 // Normal floats need their exponent and significand hashed. 3531 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 3532 Arg.semantics->precision, Arg.exponent, 3533 hash_combine_range( 3534 Arg.significandParts(), 3535 Arg.significandParts() + Arg.partCount())); 3536 } 3537 3538 // Conversion from APFloat to/from host float/double. It may eventually be 3539 // possible to eliminate these and have everybody deal with APFloats, but that 3540 // will take a while. This approach will not easily extend to long double. 3541 // Current implementation requires integerPartWidth==64, which is correct at 3542 // the moment but could be made more general. 3543 3544 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 3545 // the actual IEEE respresentations. We compensate for that here. 3546 3547 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const { 3548 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended); 3549 assert(partCount()==2); 3550 3551 uint64_t myexponent, mysignificand; 3552 3553 if (isFiniteNonZero()) { 3554 myexponent = exponent+16383; //bias 3555 mysignificand = significandParts()[0]; 3556 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 3557 myexponent = 0; // denormal 3558 } else if (category==fcZero) { 3559 myexponent = 0; 3560 mysignificand = 0; 3561 } else if (category==fcInfinity) { 3562 myexponent = 0x7fff; 3563 mysignificand = 0x8000000000000000ULL; 3564 } else { 3565 assert(category == fcNaN && "Unknown category"); 3566 myexponent = 0x7fff; 3567 mysignificand = significandParts()[0]; 3568 } 3569 3570 uint64_t words[2]; 3571 words[0] = mysignificand; 3572 words[1] = ((uint64_t)(sign & 1) << 15) | 3573 (myexponent & 0x7fffLL); 3574 return APInt(80, words); 3575 } 3576 3577 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { 3578 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy); 3579 assert(partCount()==2); 3580 3581 uint64_t words[2]; 3582 opStatus fs; 3583 bool losesInfo; 3584 3585 // Convert number to double. To avoid spurious underflows, we re- 3586 // normalize against the "double" minExponent first, and only *then* 3587 // truncate the mantissa. The result of that second conversion 3588 // may be inexact, but should never underflow. 3589 // Declare fltSemantics before APFloat that uses it (and 3590 // saves pointer to it) to ensure correct destruction order. 3591 fltSemantics extendedSemantics = *semantics; 3592 extendedSemantics.minExponent = semIEEEdouble.minExponent; 3593 IEEEFloat extended(*this); 3594 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3595 assert(fs == opOK && !losesInfo); 3596 (void)fs; 3597 3598 IEEEFloat u(extended); 3599 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3600 assert(fs == opOK || fs == opInexact); 3601 (void)fs; 3602 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 3603 3604 // If conversion was exact or resulted in a special case, we're done; 3605 // just set the second double to zero. Otherwise, re-convert back to 3606 // the extended format and compute the difference. This now should 3607 // convert exactly to double. 3608 if (u.isFiniteNonZero() && losesInfo) { 3609 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 3610 assert(fs == opOK && !losesInfo); 3611 (void)fs; 3612 3613 IEEEFloat v(extended); 3614 v.subtract(u, rmNearestTiesToEven); 3615 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo); 3616 assert(fs == opOK && !losesInfo); 3617 (void)fs; 3618 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 3619 } else { 3620 words[1] = 0; 3621 } 3622 3623 return APInt(128, words); 3624 } 3625 3626 template <const fltSemantics &S> 3627 APInt IEEEFloat::convertIEEEFloatToAPInt() const { 3628 assert(semantics == &S); 3629 const int bias = 3630 (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1); 3631 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3632 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; 3633 constexpr integerPart integer_bit = 3634 integerPart{1} << (trailing_significand_bits % integerPartWidth); 3635 constexpr uint64_t significand_mask = integer_bit - 1; 3636 constexpr unsigned int exponent_bits = 3637 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits) 3638 : S.sizeInBits; 3639 static_assert(exponent_bits < 64); 3640 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3641 3642 uint64_t myexponent; 3643 std::array<integerPart, partCountForBits(trailing_significand_bits)> 3644 mysignificand; 3645 3646 if (isFiniteNonZero()) { 3647 myexponent = exponent + bias; 3648 std::copy_n(significandParts(), mysignificand.size(), 3649 mysignificand.begin()); 3650 if (myexponent == 1 && 3651 !(significandParts()[integer_bit_part] & integer_bit)) 3652 myexponent = 0; // denormal 3653 } else if (category == fcZero) { 3654 if (!S.hasZero) 3655 llvm_unreachable("semantics does not support zero!"); 3656 myexponent = ::exponentZero(S) + bias; 3657 mysignificand.fill(0); 3658 } else if (category == fcInfinity) { 3659 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 3660 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3661 llvm_unreachable("semantics don't support inf!"); 3662 myexponent = ::exponentInf(S) + bias; 3663 mysignificand.fill(0); 3664 } else { 3665 assert(category == fcNaN && "Unknown category!"); 3666 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 3667 llvm_unreachable("semantics don't support NaN!"); 3668 myexponent = ::exponentNaN(S) + bias; 3669 std::copy_n(significandParts(), mysignificand.size(), 3670 mysignificand.begin()); 3671 } 3672 std::array<uint64_t, (S.sizeInBits + 63) / 64> words; 3673 auto words_iter = 3674 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); 3675 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) { 3676 // Clear the integer bit. 3677 words[mysignificand.size() - 1] &= significand_mask; 3678 } 3679 std::fill(words_iter, words.end(), uint64_t{0}); 3680 constexpr size_t last_word = words.size() - 1; 3681 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) 3682 << ((S.sizeInBits - 1) % 64); 3683 words[last_word] |= shifted_sign; 3684 uint64_t shifted_exponent = (myexponent & exponent_mask) 3685 << (trailing_significand_bits % 64); 3686 words[last_word] |= shifted_exponent; 3687 if constexpr (last_word == 0) { 3688 return APInt(S.sizeInBits, words[0]); 3689 } 3690 return APInt(S.sizeInBits, words); 3691 } 3692 3693 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { 3694 assert(partCount() == 2); 3695 return convertIEEEFloatToAPInt<semIEEEquad>(); 3696 } 3697 3698 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { 3699 assert(partCount()==1); 3700 return convertIEEEFloatToAPInt<semIEEEdouble>(); 3701 } 3702 3703 APInt IEEEFloat::convertFloatAPFloatToAPInt() const { 3704 assert(partCount()==1); 3705 return convertIEEEFloatToAPInt<semIEEEsingle>(); 3706 } 3707 3708 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { 3709 assert(partCount() == 1); 3710 return convertIEEEFloatToAPInt<semBFloat>(); 3711 } 3712 3713 APInt IEEEFloat::convertHalfAPFloatToAPInt() const { 3714 assert(partCount()==1); 3715 return convertIEEEFloatToAPInt<semIEEEhalf>(); 3716 } 3717 3718 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { 3719 assert(partCount() == 1); 3720 return convertIEEEFloatToAPInt<semFloat8E5M2>(); 3721 } 3722 3723 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { 3724 assert(partCount() == 1); 3725 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); 3726 } 3727 3728 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const { 3729 assert(partCount() == 1); 3730 return convertIEEEFloatToAPInt<semFloat8E4M3>(); 3731 } 3732 3733 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { 3734 assert(partCount() == 1); 3735 return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); 3736 } 3737 3738 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { 3739 assert(partCount() == 1); 3740 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); 3741 } 3742 3743 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { 3744 assert(partCount() == 1); 3745 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); 3746 } 3747 3748 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const { 3749 assert(partCount() == 1); 3750 return convertIEEEFloatToAPInt<semFloat8E3M4>(); 3751 } 3752 3753 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { 3754 assert(partCount() == 1); 3755 return convertIEEEFloatToAPInt<semFloatTF32>(); 3756 } 3757 3758 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const { 3759 assert(partCount() == 1); 3760 return convertIEEEFloatToAPInt<semFloat8E8M0FNU>(); 3761 } 3762 3763 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { 3764 assert(partCount() == 1); 3765 return convertIEEEFloatToAPInt<semFloat6E3M2FN>(); 3766 } 3767 3768 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { 3769 assert(partCount() == 1); 3770 return convertIEEEFloatToAPInt<semFloat6E2M3FN>(); 3771 } 3772 3773 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { 3774 assert(partCount() == 1); 3775 return convertIEEEFloatToAPInt<semFloat4E2M1FN>(); 3776 } 3777 3778 // This function creates an APInt that is just a bit map of the floating 3779 // point constant as it would appear in memory. It is not a conversion, 3780 // and treating the result as a normal integer is unlikely to be useful. 3781 3782 APInt IEEEFloat::bitcastToAPInt() const { 3783 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf) 3784 return convertHalfAPFloatToAPInt(); 3785 3786 if (semantics == (const llvm::fltSemantics *)&semBFloat) 3787 return convertBFloatAPFloatToAPInt(); 3788 3789 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle) 3790 return convertFloatAPFloatToAPInt(); 3791 3792 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble) 3793 return convertDoubleAPFloatToAPInt(); 3794 3795 if (semantics == (const llvm::fltSemantics*)&semIEEEquad) 3796 return convertQuadrupleAPFloatToAPInt(); 3797 3798 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy) 3799 return convertPPCDoubleDoubleAPFloatToAPInt(); 3800 3801 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) 3802 return convertFloat8E5M2APFloatToAPInt(); 3803 3804 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) 3805 return convertFloat8E5M2FNUZAPFloatToAPInt(); 3806 3807 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3) 3808 return convertFloat8E4M3APFloatToAPInt(); 3809 3810 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) 3811 return convertFloat8E4M3FNAPFloatToAPInt(); 3812 3813 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) 3814 return convertFloat8E4M3FNUZAPFloatToAPInt(); 3815 3816 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) 3817 return convertFloat8E4M3B11FNUZAPFloatToAPInt(); 3818 3819 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4) 3820 return convertFloat8E3M4APFloatToAPInt(); 3821 3822 if (semantics == (const llvm::fltSemantics *)&semFloatTF32) 3823 return convertFloatTF32APFloatToAPInt(); 3824 3825 if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU) 3826 return convertFloat8E8M0FNUAPFloatToAPInt(); 3827 3828 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) 3829 return convertFloat6E3M2FNAPFloatToAPInt(); 3830 3831 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) 3832 return convertFloat6E2M3FNAPFloatToAPInt(); 3833 3834 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) 3835 return convertFloat4E2M1FNAPFloatToAPInt(); 3836 3837 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && 3838 "unknown format!"); 3839 return convertF80LongDoubleAPFloatToAPInt(); 3840 } 3841 3842 float IEEEFloat::convertToFloat() const { 3843 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle && 3844 "Float semantics are not IEEEsingle"); 3845 APInt api = bitcastToAPInt(); 3846 return api.bitsToFloat(); 3847 } 3848 3849 double IEEEFloat::convertToDouble() const { 3850 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble && 3851 "Float semantics are not IEEEdouble"); 3852 APInt api = bitcastToAPInt(); 3853 return api.bitsToDouble(); 3854 } 3855 3856 #ifdef HAS_IEE754_FLOAT128 3857 float128 IEEEFloat::convertToQuad() const { 3858 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad && 3859 "Float semantics are not IEEEquads"); 3860 APInt api = bitcastToAPInt(); 3861 return api.bitsToQuad(); 3862 } 3863 #endif 3864 3865 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3866 /// does not support these bit patterns: 3867 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3868 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3869 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3870 /// exponent = 0, integer bit 1 ("pseudodenormal") 3871 /// At the moment, the first three are treated as NaNs, the last one as Normal. 3872 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) { 3873 uint64_t i1 = api.getRawData()[0]; 3874 uint64_t i2 = api.getRawData()[1]; 3875 uint64_t myexponent = (i2 & 0x7fff); 3876 uint64_t mysignificand = i1; 3877 uint8_t myintegerbit = mysignificand >> 63; 3878 3879 initialize(&semX87DoubleExtended); 3880 assert(partCount()==2); 3881 3882 sign = static_cast<unsigned int>(i2>>15); 3883 if (myexponent == 0 && mysignificand == 0) { 3884 makeZero(sign); 3885 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3886 makeInf(sign); 3887 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) || 3888 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) { 3889 category = fcNaN; 3890 exponent = exponentNaN(); 3891 significandParts()[0] = mysignificand; 3892 significandParts()[1] = 0; 3893 } else { 3894 category = fcNormal; 3895 exponent = myexponent - 16383; 3896 significandParts()[0] = mysignificand; 3897 significandParts()[1] = 0; 3898 if (myexponent==0) // denormal 3899 exponent = -16382; 3900 } 3901 } 3902 3903 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { 3904 uint64_t i1 = api.getRawData()[0]; 3905 uint64_t i2 = api.getRawData()[1]; 3906 opStatus fs; 3907 bool losesInfo; 3908 3909 // Get the first double and convert to our format. 3910 initFromDoubleAPInt(APInt(64, i1)); 3911 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3912 assert(fs == opOK && !losesInfo); 3913 (void)fs; 3914 3915 // Unless we have a special case, add in second double. 3916 if (isFiniteNonZero()) { 3917 IEEEFloat v(semIEEEdouble, APInt(64, i2)); 3918 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo); 3919 assert(fs == opOK && !losesInfo); 3920 (void)fs; 3921 3922 add(v, rmNearestTiesToEven); 3923 } 3924 } 3925 3926 // The E8M0 format has the following characteristics: 3927 // It is an 8-bit unsigned format with only exponents (no actual significand). 3928 // No encodings for {zero, infinities or denorms}. 3929 // NaN is represented by all 1's. 3930 // Bias is 127. 3931 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) { 3932 const uint64_t exponent_mask = 0xff; 3933 uint64_t val = api.getRawData()[0]; 3934 uint64_t myexponent = (val & exponent_mask); 3935 3936 initialize(&semFloat8E8M0FNU); 3937 assert(partCount() == 1); 3938 3939 // This format has unsigned representation only 3940 sign = 0; 3941 3942 // Set the significand 3943 // This format does not have any significand but the 'Pth' precision bit is 3944 // always set to 1 for consistency in APFloat's internal representation. 3945 uint64_t mysignificand = 1; 3946 significandParts()[0] = mysignificand; 3947 3948 // This format can either have a NaN or fcNormal 3949 // All 1's i.e. 255 is a NaN 3950 if (val == exponent_mask) { 3951 category = fcNaN; 3952 exponent = exponentNaN(); 3953 return; 3954 } 3955 // Handle fcNormal... 3956 category = fcNormal; 3957 exponent = myexponent - 127; // 127 is bias 3958 } 3959 template <const fltSemantics &S> 3960 void IEEEFloat::initFromIEEEAPInt(const APInt &api) { 3961 assert(api.getBitWidth() == S.sizeInBits); 3962 constexpr integerPart integer_bit = integerPart{1} 3963 << ((S.precision - 1) % integerPartWidth); 3964 constexpr uint64_t significand_mask = integer_bit - 1; 3965 constexpr unsigned int trailing_significand_bits = S.precision - 1; 3966 constexpr unsigned int stored_significand_parts = 3967 partCountForBits(trailing_significand_bits); 3968 constexpr unsigned int exponent_bits = 3969 S.sizeInBits - 1 - trailing_significand_bits; 3970 static_assert(exponent_bits < 64); 3971 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; 3972 constexpr int bias = -(S.minExponent - 1); 3973 3974 // Copy the bits of the significand. We need to clear out the exponent and 3975 // sign bit in the last word. 3976 std::array<integerPart, stored_significand_parts> mysignificand; 3977 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); 3978 if constexpr (significand_mask != 0) { 3979 mysignificand[mysignificand.size() - 1] &= significand_mask; 3980 } 3981 3982 // We assume the last word holds the sign bit, the exponent, and potentially 3983 // some of the trailing significand field. 3984 uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; 3985 uint64_t myexponent = 3986 (last_word >> (trailing_significand_bits % 64)) & exponent_mask; 3987 3988 initialize(&S); 3989 assert(partCount() == mysignificand.size()); 3990 3991 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); 3992 3993 bool all_zero_significand = 3994 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); 3995 3996 bool is_zero = myexponent == 0 && all_zero_significand; 3997 3998 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { 3999 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { 4000 makeInf(sign); 4001 return; 4002 } 4003 } 4004 4005 bool is_nan = false; 4006 4007 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { 4008 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; 4009 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { 4010 bool all_ones_significand = 4011 std::all_of(mysignificand.begin(), mysignificand.end() - 1, 4012 [](integerPart bits) { return bits == ~integerPart{0}; }) && 4013 (!significand_mask || 4014 mysignificand[mysignificand.size() - 1] == significand_mask); 4015 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; 4016 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { 4017 is_nan = is_zero && sign; 4018 } 4019 4020 if (is_nan) { 4021 category = fcNaN; 4022 exponent = ::exponentNaN(S); 4023 std::copy_n(mysignificand.begin(), mysignificand.size(), 4024 significandParts()); 4025 return; 4026 } 4027 4028 if (is_zero) { 4029 makeZero(sign); 4030 return; 4031 } 4032 4033 category = fcNormal; 4034 exponent = myexponent - bias; 4035 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); 4036 if (myexponent == 0) // denormal 4037 exponent = S.minExponent; 4038 else 4039 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit 4040 } 4041 4042 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { 4043 initFromIEEEAPInt<semIEEEquad>(api); 4044 } 4045 4046 void IEEEFloat::initFromDoubleAPInt(const APInt &api) { 4047 initFromIEEEAPInt<semIEEEdouble>(api); 4048 } 4049 4050 void IEEEFloat::initFromFloatAPInt(const APInt &api) { 4051 initFromIEEEAPInt<semIEEEsingle>(api); 4052 } 4053 4054 void IEEEFloat::initFromBFloatAPInt(const APInt &api) { 4055 initFromIEEEAPInt<semBFloat>(api); 4056 } 4057 4058 void IEEEFloat::initFromHalfAPInt(const APInt &api) { 4059 initFromIEEEAPInt<semIEEEhalf>(api); 4060 } 4061 4062 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { 4063 initFromIEEEAPInt<semFloat8E5M2>(api); 4064 } 4065 4066 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { 4067 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); 4068 } 4069 4070 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) { 4071 initFromIEEEAPInt<semFloat8E4M3>(api); 4072 } 4073 4074 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { 4075 initFromIEEEAPInt<semFloat8E4M3FN>(api); 4076 } 4077 4078 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { 4079 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); 4080 } 4081 4082 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { 4083 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); 4084 } 4085 4086 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) { 4087 initFromIEEEAPInt<semFloat8E3M4>(api); 4088 } 4089 4090 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { 4091 initFromIEEEAPInt<semFloatTF32>(api); 4092 } 4093 4094 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { 4095 initFromIEEEAPInt<semFloat6E3M2FN>(api); 4096 } 4097 4098 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { 4099 initFromIEEEAPInt<semFloat6E2M3FN>(api); 4100 } 4101 4102 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { 4103 initFromIEEEAPInt<semFloat4E2M1FN>(api); 4104 } 4105 4106 /// Treat api as containing the bits of a floating point number. 4107 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { 4108 assert(api.getBitWidth() == Sem->sizeInBits); 4109 if (Sem == &semIEEEhalf) 4110 return initFromHalfAPInt(api); 4111 if (Sem == &semBFloat) 4112 return initFromBFloatAPInt(api); 4113 if (Sem == &semIEEEsingle) 4114 return initFromFloatAPInt(api); 4115 if (Sem == &semIEEEdouble) 4116 return initFromDoubleAPInt(api); 4117 if (Sem == &semX87DoubleExtended) 4118 return initFromF80LongDoubleAPInt(api); 4119 if (Sem == &semIEEEquad) 4120 return initFromQuadrupleAPInt(api); 4121 if (Sem == &semPPCDoubleDoubleLegacy) 4122 return initFromPPCDoubleDoubleAPInt(api); 4123 if (Sem == &semFloat8E5M2) 4124 return initFromFloat8E5M2APInt(api); 4125 if (Sem == &semFloat8E5M2FNUZ) 4126 return initFromFloat8E5M2FNUZAPInt(api); 4127 if (Sem == &semFloat8E4M3) 4128 return initFromFloat8E4M3APInt(api); 4129 if (Sem == &semFloat8E4M3FN) 4130 return initFromFloat8E4M3FNAPInt(api); 4131 if (Sem == &semFloat8E4M3FNUZ) 4132 return initFromFloat8E4M3FNUZAPInt(api); 4133 if (Sem == &semFloat8E4M3B11FNUZ) 4134 return initFromFloat8E4M3B11FNUZAPInt(api); 4135 if (Sem == &semFloat8E3M4) 4136 return initFromFloat8E3M4APInt(api); 4137 if (Sem == &semFloatTF32) 4138 return initFromFloatTF32APInt(api); 4139 if (Sem == &semFloat8E8M0FNU) 4140 return initFromFloat8E8M0FNUAPInt(api); 4141 if (Sem == &semFloat6E3M2FN) 4142 return initFromFloat6E3M2FNAPInt(api); 4143 if (Sem == &semFloat6E2M3FN) 4144 return initFromFloat6E2M3FNAPInt(api); 4145 if (Sem == &semFloat4E2M1FN) 4146 return initFromFloat4E2M1FNAPInt(api); 4147 4148 llvm_unreachable(nullptr); 4149 } 4150 4151 /// Make this number the largest magnitude normal number in the given 4152 /// semantics. 4153 void IEEEFloat::makeLargest(bool Negative) { 4154 if (Negative && !semantics->hasSignedRepr) 4155 llvm_unreachable( 4156 "This floating point format does not support signed values"); 4157 // We want (in interchange format): 4158 // sign = {Negative} 4159 // exponent = 1..10 4160 // significand = 1..1 4161 category = fcNormal; 4162 sign = Negative; 4163 exponent = semantics->maxExponent; 4164 4165 // Use memset to set all but the highest integerPart to all ones. 4166 integerPart *significand = significandParts(); 4167 unsigned PartCount = partCount(); 4168 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 4169 4170 // Set the high integerPart especially setting all unused top bits for 4171 // internal consistency. 4172 const unsigned NumUnusedHighBits = 4173 PartCount*integerPartWidth - semantics->precision; 4174 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 4175 ? (~integerPart(0) >> NumUnusedHighBits) 4176 : 0; 4177 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && 4178 semantics->nanEncoding == fltNanEncoding::AllOnes && 4179 (semantics->precision > 1)) 4180 significand[0] &= ~integerPart(1); 4181 } 4182 4183 /// Make this number the smallest magnitude denormal number in the given 4184 /// semantics. 4185 void IEEEFloat::makeSmallest(bool Negative) { 4186 if (Negative && !semantics->hasSignedRepr) 4187 llvm_unreachable( 4188 "This floating point format does not support signed values"); 4189 // We want (in interchange format): 4190 // sign = {Negative} 4191 // exponent = 0..0 4192 // significand = 0..01 4193 category = fcNormal; 4194 sign = Negative; 4195 exponent = semantics->minExponent; 4196 APInt::tcSet(significandParts(), 1, partCount()); 4197 } 4198 4199 void IEEEFloat::makeSmallestNormalized(bool Negative) { 4200 if (Negative && !semantics->hasSignedRepr) 4201 llvm_unreachable( 4202 "This floating point format does not support signed values"); 4203 // We want (in interchange format): 4204 // sign = {Negative} 4205 // exponent = 0..0 4206 // significand = 10..0 4207 4208 category = fcNormal; 4209 zeroSignificand(); 4210 sign = Negative; 4211 exponent = semantics->minExponent; 4212 APInt::tcSetBit(significandParts(), semantics->precision - 1); 4213 } 4214 4215 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) { 4216 initFromAPInt(&Sem, API); 4217 } 4218 4219 IEEEFloat::IEEEFloat(float f) { 4220 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f)); 4221 } 4222 4223 IEEEFloat::IEEEFloat(double d) { 4224 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d)); 4225 } 4226 4227 namespace { 4228 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 4229 Buffer.append(Str.begin(), Str.end()); 4230 } 4231 4232 /// Removes data from the given significand until it is no more 4233 /// precise than is required for the desired precision. 4234 void AdjustToPrecision(APInt &significand, 4235 int &exp, unsigned FormatPrecision) { 4236 unsigned bits = significand.getActiveBits(); 4237 4238 // 196/59 is a very slight overestimate of lg_2(10). 4239 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 4240 4241 if (bits <= bitsRequired) return; 4242 4243 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 4244 if (!tensRemovable) return; 4245 4246 exp += tensRemovable; 4247 4248 APInt divisor(significand.getBitWidth(), 1); 4249 APInt powten(significand.getBitWidth(), 10); 4250 while (true) { 4251 if (tensRemovable & 1) 4252 divisor *= powten; 4253 tensRemovable >>= 1; 4254 if (!tensRemovable) break; 4255 powten *= powten; 4256 } 4257 4258 significand = significand.udiv(divisor); 4259 4260 // Truncate the significand down to its active bit count. 4261 significand = significand.trunc(significand.getActiveBits()); 4262 } 4263 4264 4265 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 4266 int &exp, unsigned FormatPrecision) { 4267 unsigned N = buffer.size(); 4268 if (N <= FormatPrecision) return; 4269 4270 // The most significant figures are the last ones in the buffer. 4271 unsigned FirstSignificant = N - FormatPrecision; 4272 4273 // Round. 4274 // FIXME: this probably shouldn't use 'round half up'. 4275 4276 // Rounding down is just a truncation, except we also want to drop 4277 // trailing zeros from the new result. 4278 if (buffer[FirstSignificant - 1] < '5') { 4279 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 4280 FirstSignificant++; 4281 4282 exp += FirstSignificant; 4283 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4284 return; 4285 } 4286 4287 // Rounding up requires a decimal add-with-carry. If we continue 4288 // the carry, the newly-introduced zeros will just be truncated. 4289 for (unsigned I = FirstSignificant; I != N; ++I) { 4290 if (buffer[I] == '9') { 4291 FirstSignificant++; 4292 } else { 4293 buffer[I]++; 4294 break; 4295 } 4296 } 4297 4298 // If we carried through, we have exactly one digit of precision. 4299 if (FirstSignificant == N) { 4300 exp += FirstSignificant; 4301 buffer.clear(); 4302 buffer.push_back('1'); 4303 return; 4304 } 4305 4306 exp += FirstSignificant; 4307 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 4308 } 4309 4310 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp, 4311 APInt significand, unsigned FormatPrecision, 4312 unsigned FormatMaxPadding, bool TruncateZero) { 4313 const int semanticsPrecision = significand.getBitWidth(); 4314 4315 if (isNeg) 4316 Str.push_back('-'); 4317 4318 // Set FormatPrecision if zero. We want to do this before we 4319 // truncate trailing zeros, as those are part of the precision. 4320 if (!FormatPrecision) { 4321 // We use enough digits so the number can be round-tripped back to an 4322 // APFloat. The formula comes from "How to Print Floating-Point Numbers 4323 // Accurately" by Steele and White. 4324 // FIXME: Using a formula based purely on the precision is conservative; 4325 // we can print fewer digits depending on the actual value being printed. 4326 4327 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 4328 FormatPrecision = 2 + semanticsPrecision * 59 / 196; 4329 } 4330 4331 // Ignore trailing binary zeros. 4332 int trailingZeros = significand.countr_zero(); 4333 exp += trailingZeros; 4334 significand.lshrInPlace(trailingZeros); 4335 4336 // Change the exponent from 2^e to 10^e. 4337 if (exp == 0) { 4338 // Nothing to do. 4339 } else if (exp > 0) { 4340 // Just shift left. 4341 significand = significand.zext(semanticsPrecision + exp); 4342 significand <<= exp; 4343 exp = 0; 4344 } else { /* exp < 0 */ 4345 int texp = -exp; 4346 4347 // We transform this using the identity: 4348 // (N)(2^-e) == (N)(5^e)(10^-e) 4349 // This means we have to multiply N (the significand) by 5^e. 4350 // To avoid overflow, we have to operate on numbers large 4351 // enough to store N * 5^e: 4352 // log2(N * 5^e) == log2(N) + e * log2(5) 4353 // <= semantics->precision + e * 137 / 59 4354 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 4355 4356 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59; 4357 4358 // Multiply significand by 5^e. 4359 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 4360 significand = significand.zext(precision); 4361 APInt five_to_the_i(precision, 5); 4362 while (true) { 4363 if (texp & 1) 4364 significand *= five_to_the_i; 4365 4366 texp >>= 1; 4367 if (!texp) 4368 break; 4369 five_to_the_i *= five_to_the_i; 4370 } 4371 } 4372 4373 AdjustToPrecision(significand, exp, FormatPrecision); 4374 4375 SmallVector<char, 256> buffer; 4376 4377 // Fill the buffer. 4378 unsigned precision = significand.getBitWidth(); 4379 if (precision < 4) { 4380 // We need enough precision to store the value 10. 4381 precision = 4; 4382 significand = significand.zext(precision); 4383 } 4384 APInt ten(precision, 10); 4385 APInt digit(precision, 0); 4386 4387 bool inTrail = true; 4388 while (significand != 0) { 4389 // digit <- significand % 10 4390 // significand <- significand / 10 4391 APInt::udivrem(significand, ten, significand, digit); 4392 4393 unsigned d = digit.getZExtValue(); 4394 4395 // Drop trailing zeros. 4396 if (inTrail && !d) 4397 exp++; 4398 else { 4399 buffer.push_back((char) ('0' + d)); 4400 inTrail = false; 4401 } 4402 } 4403 4404 assert(!buffer.empty() && "no characters in buffer!"); 4405 4406 // Drop down to FormatPrecision. 4407 // TODO: don't do more precise calculations above than are required. 4408 AdjustToPrecision(buffer, exp, FormatPrecision); 4409 4410 unsigned NDigits = buffer.size(); 4411 4412 // Check whether we should use scientific notation. 4413 bool FormatScientific; 4414 if (!FormatMaxPadding) 4415 FormatScientific = true; 4416 else { 4417 if (exp >= 0) { 4418 // 765e3 --> 765000 4419 // ^^^ 4420 // But we shouldn't make the number look more precise than it is. 4421 FormatScientific = ((unsigned) exp > FormatMaxPadding || 4422 NDigits + (unsigned) exp > FormatPrecision); 4423 } else { 4424 // Power of the most significant digit. 4425 int MSD = exp + (int) (NDigits - 1); 4426 if (MSD >= 0) { 4427 // 765e-2 == 7.65 4428 FormatScientific = false; 4429 } else { 4430 // 765e-5 == 0.00765 4431 // ^ ^^ 4432 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 4433 } 4434 } 4435 } 4436 4437 // Scientific formatting is pretty straightforward. 4438 if (FormatScientific) { 4439 exp += (NDigits - 1); 4440 4441 Str.push_back(buffer[NDigits-1]); 4442 Str.push_back('.'); 4443 if (NDigits == 1 && TruncateZero) 4444 Str.push_back('0'); 4445 else 4446 for (unsigned I = 1; I != NDigits; ++I) 4447 Str.push_back(buffer[NDigits-1-I]); 4448 // Fill with zeros up to FormatPrecision. 4449 if (!TruncateZero && FormatPrecision > NDigits - 1) 4450 Str.append(FormatPrecision - NDigits + 1, '0'); 4451 // For !TruncateZero we use lower 'e'. 4452 Str.push_back(TruncateZero ? 'E' : 'e'); 4453 4454 Str.push_back(exp >= 0 ? '+' : '-'); 4455 if (exp < 0) 4456 exp = -exp; 4457 SmallVector<char, 6> expbuf; 4458 do { 4459 expbuf.push_back((char) ('0' + (exp % 10))); 4460 exp /= 10; 4461 } while (exp); 4462 // Exponent always at least two digits if we do not truncate zeros. 4463 if (!TruncateZero && expbuf.size() < 2) 4464 expbuf.push_back('0'); 4465 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 4466 Str.push_back(expbuf[E-1-I]); 4467 return; 4468 } 4469 4470 // Non-scientific, positive exponents. 4471 if (exp >= 0) { 4472 for (unsigned I = 0; I != NDigits; ++I) 4473 Str.push_back(buffer[NDigits-1-I]); 4474 for (unsigned I = 0; I != (unsigned) exp; ++I) 4475 Str.push_back('0'); 4476 return; 4477 } 4478 4479 // Non-scientific, negative exponents. 4480 4481 // The number of digits to the left of the decimal point. 4482 int NWholeDigits = exp + (int) NDigits; 4483 4484 unsigned I = 0; 4485 if (NWholeDigits > 0) { 4486 for (; I != (unsigned) NWholeDigits; ++I) 4487 Str.push_back(buffer[NDigits-I-1]); 4488 Str.push_back('.'); 4489 } else { 4490 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 4491 4492 Str.push_back('0'); 4493 Str.push_back('.'); 4494 for (unsigned Z = 1; Z != NZeros; ++Z) 4495 Str.push_back('0'); 4496 } 4497 4498 for (; I != NDigits; ++I) 4499 Str.push_back(buffer[NDigits-I-1]); 4500 4501 } 4502 } // namespace 4503 4504 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, 4505 unsigned FormatMaxPadding, bool TruncateZero) const { 4506 switch (category) { 4507 case fcInfinity: 4508 if (isNegative()) 4509 return append(Str, "-Inf"); 4510 else 4511 return append(Str, "+Inf"); 4512 4513 case fcNaN: return append(Str, "NaN"); 4514 4515 case fcZero: 4516 if (isNegative()) 4517 Str.push_back('-'); 4518 4519 if (!FormatMaxPadding) { 4520 if (TruncateZero) 4521 append(Str, "0.0E+0"); 4522 else { 4523 append(Str, "0.0"); 4524 if (FormatPrecision > 1) 4525 Str.append(FormatPrecision - 1, '0'); 4526 append(Str, "e+00"); 4527 } 4528 } else 4529 Str.push_back('0'); 4530 return; 4531 4532 case fcNormal: 4533 break; 4534 } 4535 4536 // Decompose the number into an APInt and an exponent. 4537 int exp = exponent - ((int) semantics->precision - 1); 4538 APInt significand( 4539 semantics->precision, 4540 ArrayRef(significandParts(), partCountForBits(semantics->precision))); 4541 4542 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision, 4543 FormatMaxPadding, TruncateZero); 4544 4545 } 4546 4547 bool IEEEFloat::getExactInverse(APFloat *inv) const { 4548 // Special floats and denormals have no exact inverse. 4549 if (!isFiniteNonZero()) 4550 return false; 4551 4552 // Check that the number is a power of two by making sure that only the 4553 // integer bit is set in the significand. 4554 if (significandLSB() != semantics->precision - 1) 4555 return false; 4556 4557 // Get the inverse. 4558 IEEEFloat reciprocal(*semantics, 1ULL); 4559 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 4560 return false; 4561 4562 // Avoid multiplication with a denormal, it is not safe on all platforms and 4563 // may be slower than a normal division. 4564 if (reciprocal.isDenormal()) 4565 return false; 4566 4567 assert(reciprocal.isFiniteNonZero() && 4568 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 4569 4570 if (inv) 4571 *inv = APFloat(reciprocal, *semantics); 4572 4573 return true; 4574 } 4575 4576 int IEEEFloat::getExactLog2Abs() const { 4577 if (!isFinite() || isZero()) 4578 return INT_MIN; 4579 4580 const integerPart *Parts = significandParts(); 4581 const int PartCount = partCountForBits(semantics->precision); 4582 4583 int PopCount = 0; 4584 for (int i = 0; i < PartCount; ++i) { 4585 PopCount += llvm::popcount(Parts[i]); 4586 if (PopCount > 1) 4587 return INT_MIN; 4588 } 4589 4590 if (exponent != semantics->minExponent) 4591 return exponent; 4592 4593 int CountrParts = 0; 4594 for (int i = 0; i < PartCount; 4595 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) { 4596 if (Parts[i] != 0) { 4597 return exponent - semantics->precision + CountrParts + 4598 llvm::countr_zero(Parts[i]) + 1; 4599 } 4600 } 4601 4602 llvm_unreachable("didn't find the set bit"); 4603 } 4604 4605 bool IEEEFloat::isSignaling() const { 4606 if (!isNaN()) 4607 return false; 4608 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || 4609 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4610 return false; 4611 4612 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 4613 // first bit of the trailing significand being 0. 4614 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 4615 } 4616 4617 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 4618 /// 4619 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 4620 /// appropriate sign switching before/after the computation. 4621 APFloat::opStatus IEEEFloat::next(bool nextDown) { 4622 // If we are performing nextDown, swap sign so we have -x. 4623 if (nextDown) 4624 changeSign(); 4625 4626 // Compute nextUp(x) 4627 opStatus result = opOK; 4628 4629 // Handle each float category separately. 4630 switch (category) { 4631 case fcInfinity: 4632 // nextUp(+inf) = +inf 4633 if (!isNegative()) 4634 break; 4635 // nextUp(-inf) = -getLargest() 4636 makeLargest(true); 4637 break; 4638 case fcNaN: 4639 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 4640 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 4641 // change the payload. 4642 if (isSignaling()) { 4643 result = opInvalidOp; 4644 // For consistency, propagate the sign of the sNaN to the qNaN. 4645 makeNaN(false, isNegative(), nullptr); 4646 } 4647 break; 4648 case fcZero: 4649 // nextUp(pm 0) = +getSmallest() 4650 makeSmallest(false); 4651 break; 4652 case fcNormal: 4653 // nextUp(-getSmallest()) = -0 4654 if (isSmallest() && isNegative()) { 4655 APInt::tcSet(significandParts(), 0, partCount()); 4656 category = fcZero; 4657 exponent = 0; 4658 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) 4659 sign = false; 4660 if (!semantics->hasZero) 4661 makeSmallestNormalized(false); 4662 break; 4663 } 4664 4665 if (isLargest() && !isNegative()) { 4666 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4667 // nextUp(getLargest()) == NAN 4668 makeNaN(); 4669 break; 4670 } else if (semantics->nonFiniteBehavior == 4671 fltNonfiniteBehavior::FiniteOnly) { 4672 // nextUp(getLargest()) == getLargest() 4673 break; 4674 } else { 4675 // nextUp(getLargest()) == INFINITY 4676 APInt::tcSet(significandParts(), 0, partCount()); 4677 category = fcInfinity; 4678 exponent = semantics->maxExponent + 1; 4679 break; 4680 } 4681 } 4682 4683 // nextUp(normal) == normal + inc. 4684 if (isNegative()) { 4685 // If we are negative, we need to decrement the significand. 4686 4687 // We only cross a binade boundary that requires adjusting the exponent 4688 // if: 4689 // 1. exponent != semantics->minExponent. This implies we are not in the 4690 // smallest binade or are dealing with denormals. 4691 // 2. Our significand excluding the integral bit is all zeros. 4692 bool WillCrossBinadeBoundary = 4693 exponent != semantics->minExponent && isSignificandAllZeros(); 4694 4695 // Decrement the significand. 4696 // 4697 // We always do this since: 4698 // 1. If we are dealing with a non-binade decrement, by definition we 4699 // just decrement the significand. 4700 // 2. If we are dealing with a normal -> normal binade decrement, since 4701 // we have an explicit integral bit the fact that all bits but the 4702 // integral bit are zero implies that subtracting one will yield a 4703 // significand with 0 integral bit and 1 in all other spots. Thus we 4704 // must just adjust the exponent and set the integral bit to 1. 4705 // 3. If we are dealing with a normal -> denormal binade decrement, 4706 // since we set the integral bit to 0 when we represent denormals, we 4707 // just decrement the significand. 4708 integerPart *Parts = significandParts(); 4709 APInt::tcDecrement(Parts, partCount()); 4710 4711 if (WillCrossBinadeBoundary) { 4712 // Our result is a normal number. Do the following: 4713 // 1. Set the integral bit to 1. 4714 // 2. Decrement the exponent. 4715 APInt::tcSetBit(Parts, semantics->precision - 1); 4716 exponent--; 4717 } 4718 } else { 4719 // If we are positive, we need to increment the significand. 4720 4721 // We only cross a binade boundary that requires adjusting the exponent if 4722 // the input is not a denormal and all of said input's significand bits 4723 // are set. If all of said conditions are true: clear the significand, set 4724 // the integral bit to 1, and increment the exponent. If we have a 4725 // denormal always increment since moving denormals and the numbers in the 4726 // smallest normal binade have the same exponent in our representation. 4727 // If there are only exponents, any increment always crosses the 4728 // BinadeBoundary. 4729 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) || 4730 (!isDenormal() && isSignificandAllOnes()); 4731 4732 if (WillCrossBinadeBoundary) { 4733 integerPart *Parts = significandParts(); 4734 APInt::tcSet(Parts, 0, partCount()); 4735 APInt::tcSetBit(Parts, semantics->precision - 1); 4736 assert(exponent != semantics->maxExponent && 4737 "We can not increment an exponent beyond the maxExponent allowed" 4738 " by the given floating point semantics."); 4739 exponent++; 4740 } else { 4741 incrementSignificand(); 4742 } 4743 } 4744 break; 4745 } 4746 4747 // If we are performing nextDown, swap sign so we have -nextUp(-x) 4748 if (nextDown) 4749 changeSign(); 4750 4751 return result; 4752 } 4753 4754 APFloatBase::ExponentType IEEEFloat::exponentNaN() const { 4755 return ::exponentNaN(*semantics); 4756 } 4757 4758 APFloatBase::ExponentType IEEEFloat::exponentInf() const { 4759 return ::exponentInf(*semantics); 4760 } 4761 4762 APFloatBase::ExponentType IEEEFloat::exponentZero() const { 4763 return ::exponentZero(*semantics); 4764 } 4765 4766 void IEEEFloat::makeInf(bool Negative) { 4767 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) 4768 llvm_unreachable("This floating point format does not support Inf"); 4769 4770 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { 4771 // There is no Inf, so make NaN instead. 4772 makeNaN(false, Negative); 4773 return; 4774 } 4775 category = fcInfinity; 4776 sign = Negative; 4777 exponent = exponentInf(); 4778 APInt::tcSet(significandParts(), 0, partCount()); 4779 } 4780 4781 void IEEEFloat::makeZero(bool Negative) { 4782 if (!semantics->hasZero) 4783 llvm_unreachable("This floating point format does not support Zero"); 4784 4785 category = fcZero; 4786 sign = Negative; 4787 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { 4788 // Merge negative zero to positive because 0b10000...000 is used for NaN 4789 sign = false; 4790 } 4791 exponent = exponentZero(); 4792 APInt::tcSet(significandParts(), 0, partCount()); 4793 } 4794 4795 void IEEEFloat::makeQuiet() { 4796 assert(isNaN()); 4797 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly) 4798 APInt::tcSetBit(significandParts(), semantics->precision - 2); 4799 } 4800 4801 int ilogb(const IEEEFloat &Arg) { 4802 if (Arg.isNaN()) 4803 return APFloat::IEK_NaN; 4804 if (Arg.isZero()) 4805 return APFloat::IEK_Zero; 4806 if (Arg.isInfinity()) 4807 return APFloat::IEK_Inf; 4808 if (!Arg.isDenormal()) 4809 return Arg.exponent; 4810 4811 IEEEFloat Normalized(Arg); 4812 int SignificandBits = Arg.getSemantics().precision - 1; 4813 4814 Normalized.exponent += SignificandBits; 4815 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero); 4816 return Normalized.exponent - SignificandBits; 4817 } 4818 4819 IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) { 4820 auto MaxExp = X.getSemantics().maxExponent; 4821 auto MinExp = X.getSemantics().minExponent; 4822 4823 // If Exp is wildly out-of-scale, simply adding it to X.exponent will 4824 // overflow; clamp it to a safe range before adding, but ensure that the range 4825 // is large enough that the clamp does not change the result. The range we 4826 // need to support is the difference between the largest possible exponent and 4827 // the normalized exponent of half the smallest denormal. 4828 4829 int SignificandBits = X.getSemantics().precision - 1; 4830 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; 4831 4832 // Clamp to one past the range ends to let normalize handle overlflow. 4833 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); 4834 X.normalize(RoundingMode, lfExactlyZero); 4835 if (X.isNaN()) 4836 X.makeQuiet(); 4837 return X; 4838 } 4839 4840 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) { 4841 Exp = ilogb(Val); 4842 4843 // Quiet signalling nans. 4844 if (Exp == APFloat::IEK_NaN) { 4845 IEEEFloat Quiet(Val); 4846 Quiet.makeQuiet(); 4847 return Quiet; 4848 } 4849 4850 if (Exp == APFloat::IEK_Inf) 4851 return Val; 4852 4853 // 1 is added because frexp is defined to return a normalized fraction in 4854 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). 4855 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1; 4856 return scalbn(Val, -Exp, RM); 4857 } 4858 4859 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S) 4860 : Semantics(&S), 4861 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) { 4862 assert(Semantics == &semPPCDoubleDouble); 4863 } 4864 4865 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag) 4866 : Semantics(&S), 4867 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized), 4868 APFloat(semIEEEdouble, uninitialized)}) { 4869 assert(Semantics == &semPPCDoubleDouble); 4870 } 4871 4872 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I) 4873 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I), 4874 APFloat(semIEEEdouble)}) { 4875 assert(Semantics == &semPPCDoubleDouble); 4876 } 4877 4878 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I) 4879 : Semantics(&S), 4880 Floats(new APFloat[2]{ 4881 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])), 4882 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) { 4883 assert(Semantics == &semPPCDoubleDouble); 4884 } 4885 4886 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First, 4887 APFloat &&Second) 4888 : Semantics(&S), 4889 Floats(new APFloat[2]{std::move(First), std::move(Second)}) { 4890 assert(Semantics == &semPPCDoubleDouble); 4891 assert(&Floats[0].getSemantics() == &semIEEEdouble); 4892 assert(&Floats[1].getSemantics() == &semIEEEdouble); 4893 } 4894 4895 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS) 4896 : Semantics(RHS.Semantics), 4897 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]), 4898 APFloat(RHS.Floats[1])} 4899 : nullptr) { 4900 assert(Semantics == &semPPCDoubleDouble); 4901 } 4902 4903 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS) 4904 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) { 4905 RHS.Semantics = &semBogus; 4906 assert(Semantics == &semPPCDoubleDouble); 4907 } 4908 4909 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { 4910 if (Semantics == RHS.Semantics && RHS.Floats) { 4911 Floats[0] = RHS.Floats[0]; 4912 Floats[1] = RHS.Floats[1]; 4913 } else if (this != &RHS) { 4914 this->~DoubleAPFloat(); 4915 new (this) DoubleAPFloat(RHS); 4916 } 4917 return *this; 4918 } 4919 4920 // Implement addition, subtraction, multiplication and division based on: 4921 // "Software for Doubled-Precision Floating-Point Computations", 4922 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. 4923 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa, 4924 const APFloat &c, const APFloat &cc, 4925 roundingMode RM) { 4926 int Status = opOK; 4927 APFloat z = a; 4928 Status |= z.add(c, RM); 4929 if (!z.isFinite()) { 4930 if (!z.isInfinity()) { 4931 Floats[0] = std::move(z); 4932 Floats[1].makeZero(/* Neg = */ false); 4933 return (opStatus)Status; 4934 } 4935 Status = opOK; 4936 auto AComparedToC = a.compareAbsoluteValue(c); 4937 z = cc; 4938 Status |= z.add(aa, RM); 4939 if (AComparedToC == APFloat::cmpGreaterThan) { 4940 // z = cc + aa + c + a; 4941 Status |= z.add(c, RM); 4942 Status |= z.add(a, RM); 4943 } else { 4944 // z = cc + aa + a + c; 4945 Status |= z.add(a, RM); 4946 Status |= z.add(c, RM); 4947 } 4948 if (!z.isFinite()) { 4949 Floats[0] = std::move(z); 4950 Floats[1].makeZero(/* Neg = */ false); 4951 return (opStatus)Status; 4952 } 4953 Floats[0] = z; 4954 APFloat zz = aa; 4955 Status |= zz.add(cc, RM); 4956 if (AComparedToC == APFloat::cmpGreaterThan) { 4957 // Floats[1] = a - z + c + zz; 4958 Floats[1] = a; 4959 Status |= Floats[1].subtract(z, RM); 4960 Status |= Floats[1].add(c, RM); 4961 Status |= Floats[1].add(zz, RM); 4962 } else { 4963 // Floats[1] = c - z + a + zz; 4964 Floats[1] = c; 4965 Status |= Floats[1].subtract(z, RM); 4966 Status |= Floats[1].add(a, RM); 4967 Status |= Floats[1].add(zz, RM); 4968 } 4969 } else { 4970 // q = a - z; 4971 APFloat q = a; 4972 Status |= q.subtract(z, RM); 4973 4974 // zz = q + c + (a - (q + z)) + aa + cc; 4975 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. 4976 auto zz = q; 4977 Status |= zz.add(c, RM); 4978 Status |= q.add(z, RM); 4979 Status |= q.subtract(a, RM); 4980 q.changeSign(); 4981 Status |= zz.add(q, RM); 4982 Status |= zz.add(aa, RM); 4983 Status |= zz.add(cc, RM); 4984 if (zz.isZero() && !zz.isNegative()) { 4985 Floats[0] = std::move(z); 4986 Floats[1].makeZero(/* Neg = */ false); 4987 return opOK; 4988 } 4989 Floats[0] = z; 4990 Status |= Floats[0].add(zz, RM); 4991 if (!Floats[0].isFinite()) { 4992 Floats[1].makeZero(/* Neg = */ false); 4993 return (opStatus)Status; 4994 } 4995 Floats[1] = std::move(z); 4996 Status |= Floats[1].subtract(Floats[0], RM); 4997 Status |= Floats[1].add(zz, RM); 4998 } 4999 return (opStatus)Status; 5000 } 5001 5002 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS, 5003 const DoubleAPFloat &RHS, 5004 DoubleAPFloat &Out, 5005 roundingMode RM) { 5006 if (LHS.getCategory() == fcNaN) { 5007 Out = LHS; 5008 return opOK; 5009 } 5010 if (RHS.getCategory() == fcNaN) { 5011 Out = RHS; 5012 return opOK; 5013 } 5014 if (LHS.getCategory() == fcZero) { 5015 Out = RHS; 5016 return opOK; 5017 } 5018 if (RHS.getCategory() == fcZero) { 5019 Out = LHS; 5020 return opOK; 5021 } 5022 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity && 5023 LHS.isNegative() != RHS.isNegative()) { 5024 Out.makeNaN(false, Out.isNegative(), nullptr); 5025 return opInvalidOp; 5026 } 5027 if (LHS.getCategory() == fcInfinity) { 5028 Out = LHS; 5029 return opOK; 5030 } 5031 if (RHS.getCategory() == fcInfinity) { 5032 Out = RHS; 5033 return opOK; 5034 } 5035 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal); 5036 5037 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]), 5038 CC(RHS.Floats[1]); 5039 assert(&A.getSemantics() == &semIEEEdouble); 5040 assert(&AA.getSemantics() == &semIEEEdouble); 5041 assert(&C.getSemantics() == &semIEEEdouble); 5042 assert(&CC.getSemantics() == &semIEEEdouble); 5043 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble); 5044 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble); 5045 return Out.addImpl(A, AA, C, CC, RM); 5046 } 5047 5048 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS, 5049 roundingMode RM) { 5050 return addWithSpecial(*this, RHS, *this, RM); 5051 } 5052 5053 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS, 5054 roundingMode RM) { 5055 changeSign(); 5056 auto Ret = add(RHS, RM); 5057 changeSign(); 5058 return Ret; 5059 } 5060 5061 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS, 5062 APFloat::roundingMode RM) { 5063 const auto &LHS = *this; 5064 auto &Out = *this; 5065 /* Interesting observation: For special categories, finding the lowest 5066 common ancestor of the following layered graph gives the correct 5067 return category: 5068 5069 NaN 5070 / \ 5071 Zero Inf 5072 \ / 5073 Normal 5074 5075 e.g. NaN * NaN = NaN 5076 Zero * Inf = NaN 5077 Normal * Zero = Zero 5078 Normal * Inf = Inf 5079 */ 5080 if (LHS.getCategory() == fcNaN) { 5081 Out = LHS; 5082 return opOK; 5083 } 5084 if (RHS.getCategory() == fcNaN) { 5085 Out = RHS; 5086 return opOK; 5087 } 5088 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) || 5089 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) { 5090 Out.makeNaN(false, false, nullptr); 5091 return opOK; 5092 } 5093 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) { 5094 Out = LHS; 5095 return opOK; 5096 } 5097 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) { 5098 Out = RHS; 5099 return opOK; 5100 } 5101 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal && 5102 "Special cases not handled exhaustively"); 5103 5104 int Status = opOK; 5105 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1]; 5106 // t = a * c 5107 APFloat T = A; 5108 Status |= T.multiply(C, RM); 5109 if (!T.isFiniteNonZero()) { 5110 Floats[0] = T; 5111 Floats[1].makeZero(/* Neg = */ false); 5112 return (opStatus)Status; 5113 } 5114 5115 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). 5116 APFloat Tau = A; 5117 T.changeSign(); 5118 Status |= Tau.fusedMultiplyAdd(C, T, RM); 5119 T.changeSign(); 5120 { 5121 // v = a * d 5122 APFloat V = A; 5123 Status |= V.multiply(D, RM); 5124 // w = b * c 5125 APFloat W = B; 5126 Status |= W.multiply(C, RM); 5127 Status |= V.add(W, RM); 5128 // tau += v + w 5129 Status |= Tau.add(V, RM); 5130 } 5131 // u = t + tau 5132 APFloat U = T; 5133 Status |= U.add(Tau, RM); 5134 5135 Floats[0] = U; 5136 if (!U.isFinite()) { 5137 Floats[1].makeZero(/* Neg = */ false); 5138 } else { 5139 // Floats[1] = (t - u) + tau 5140 Status |= T.subtract(U, RM); 5141 Status |= T.add(Tau, RM); 5142 Floats[1] = T; 5143 } 5144 return (opStatus)Status; 5145 } 5146 5147 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS, 5148 APFloat::roundingMode RM) { 5149 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5150 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5151 auto Ret = 5152 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM); 5153 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5154 return Ret; 5155 } 5156 5157 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) { 5158 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5159 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5160 auto Ret = 5161 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5162 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5163 return Ret; 5164 } 5165 5166 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) { 5167 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5168 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5169 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt())); 5170 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5171 return Ret; 5172 } 5173 5174 APFloat::opStatus 5175 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, 5176 const DoubleAPFloat &Addend, 5177 APFloat::roundingMode RM) { 5178 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5179 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5180 auto Ret = Tmp.fusedMultiplyAdd( 5181 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()), 5182 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM); 5183 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5184 return Ret; 5185 } 5186 5187 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { 5188 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5189 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5190 auto Ret = Tmp.roundToIntegral(RM); 5191 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5192 return Ret; 5193 } 5194 5195 void DoubleAPFloat::changeSign() { 5196 Floats[0].changeSign(); 5197 Floats[1].changeSign(); 5198 } 5199 5200 APFloat::cmpResult 5201 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const { 5202 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]); 5203 if (Result != cmpEqual) 5204 return Result; 5205 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]); 5206 if (Result == cmpLessThan || Result == cmpGreaterThan) { 5207 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative(); 5208 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative(); 5209 if (Against && !RHSAgainst) 5210 return cmpLessThan; 5211 if (!Against && RHSAgainst) 5212 return cmpGreaterThan; 5213 if (!Against && !RHSAgainst) 5214 return Result; 5215 if (Against && RHSAgainst) 5216 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result); 5217 } 5218 return Result; 5219 } 5220 5221 APFloat::fltCategory DoubleAPFloat::getCategory() const { 5222 return Floats[0].getCategory(); 5223 } 5224 5225 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); } 5226 5227 void DoubleAPFloat::makeInf(bool Neg) { 5228 Floats[0].makeInf(Neg); 5229 Floats[1].makeZero(/* Neg = */ false); 5230 } 5231 5232 void DoubleAPFloat::makeZero(bool Neg) { 5233 Floats[0].makeZero(Neg); 5234 Floats[1].makeZero(/* Neg = */ false); 5235 } 5236 5237 void DoubleAPFloat::makeLargest(bool Neg) { 5238 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5239 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull)); 5240 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull)); 5241 if (Neg) 5242 changeSign(); 5243 } 5244 5245 void DoubleAPFloat::makeSmallest(bool Neg) { 5246 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5247 Floats[0].makeSmallest(Neg); 5248 Floats[1].makeZero(/* Neg = */ false); 5249 } 5250 5251 void DoubleAPFloat::makeSmallestNormalized(bool Neg) { 5252 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5253 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull)); 5254 if (Neg) 5255 Floats[0].changeSign(); 5256 Floats[1].makeZero(/* Neg = */ false); 5257 } 5258 5259 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) { 5260 Floats[0].makeNaN(SNaN, Neg, fill); 5261 Floats[1].makeZero(/* Neg = */ false); 5262 } 5263 5264 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const { 5265 auto Result = Floats[0].compare(RHS.Floats[0]); 5266 // |Float[0]| > |Float[1]| 5267 if (Result == APFloat::cmpEqual) 5268 return Floats[1].compare(RHS.Floats[1]); 5269 return Result; 5270 } 5271 5272 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const { 5273 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) && 5274 Floats[1].bitwiseIsEqual(RHS.Floats[1]); 5275 } 5276 5277 hash_code hash_value(const DoubleAPFloat &Arg) { 5278 if (Arg.Floats) 5279 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1])); 5280 return hash_combine(Arg.Semantics); 5281 } 5282 5283 APInt DoubleAPFloat::bitcastToAPInt() const { 5284 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5285 uint64_t Data[] = { 5286 Floats[0].bitcastToAPInt().getRawData()[0], 5287 Floats[1].bitcastToAPInt().getRawData()[0], 5288 }; 5289 return APInt(128, 2, Data); 5290 } 5291 5292 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, 5293 roundingMode RM) { 5294 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5295 APFloat Tmp(semPPCDoubleDoubleLegacy); 5296 auto Ret = Tmp.convertFromString(S, RM); 5297 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5298 return Ret; 5299 } 5300 5301 APFloat::opStatus DoubleAPFloat::next(bool nextDown) { 5302 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5303 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5304 auto Ret = Tmp.next(nextDown); 5305 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5306 return Ret; 5307 } 5308 5309 APFloat::opStatus 5310 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input, 5311 unsigned int Width, bool IsSigned, 5312 roundingMode RM, bool *IsExact) const { 5313 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5314 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5315 .convertToInteger(Input, Width, IsSigned, RM, IsExact); 5316 } 5317 5318 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input, 5319 bool IsSigned, 5320 roundingMode RM) { 5321 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5322 APFloat Tmp(semPPCDoubleDoubleLegacy); 5323 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM); 5324 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5325 return Ret; 5326 } 5327 5328 APFloat::opStatus 5329 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input, 5330 unsigned int InputSize, 5331 bool IsSigned, roundingMode RM) { 5332 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5333 APFloat Tmp(semPPCDoubleDoubleLegacy); 5334 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM); 5335 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5336 return Ret; 5337 } 5338 5339 APFloat::opStatus 5340 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input, 5341 unsigned int InputSize, 5342 bool IsSigned, roundingMode RM) { 5343 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5344 APFloat Tmp(semPPCDoubleDoubleLegacy); 5345 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM); 5346 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); 5347 return Ret; 5348 } 5349 5350 unsigned int DoubleAPFloat::convertToHexString(char *DST, 5351 unsigned int HexDigits, 5352 bool UpperCase, 5353 roundingMode RM) const { 5354 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5355 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5356 .convertToHexString(DST, HexDigits, UpperCase, RM); 5357 } 5358 5359 bool DoubleAPFloat::isDenormal() const { 5360 return getCategory() == fcNormal && 5361 (Floats[0].isDenormal() || Floats[1].isDenormal() || 5362 // (double)(Hi + Lo) == Hi defines a normal number. 5363 Floats[0] != Floats[0] + Floats[1]); 5364 } 5365 5366 bool DoubleAPFloat::isSmallest() const { 5367 if (getCategory() != fcNormal) 5368 return false; 5369 DoubleAPFloat Tmp(*this); 5370 Tmp.makeSmallest(this->isNegative()); 5371 return Tmp.compare(*this) == cmpEqual; 5372 } 5373 5374 bool DoubleAPFloat::isSmallestNormalized() const { 5375 if (getCategory() != fcNormal) 5376 return false; 5377 5378 DoubleAPFloat Tmp(*this); 5379 Tmp.makeSmallestNormalized(this->isNegative()); 5380 return Tmp.compare(*this) == cmpEqual; 5381 } 5382 5383 bool DoubleAPFloat::isLargest() const { 5384 if (getCategory() != fcNormal) 5385 return false; 5386 DoubleAPFloat Tmp(*this); 5387 Tmp.makeLargest(this->isNegative()); 5388 return Tmp.compare(*this) == cmpEqual; 5389 } 5390 5391 bool DoubleAPFloat::isInteger() const { 5392 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5393 return Floats[0].isInteger() && Floats[1].isInteger(); 5394 } 5395 5396 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str, 5397 unsigned FormatPrecision, 5398 unsigned FormatMaxPadding, 5399 bool TruncateZero) const { 5400 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5401 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) 5402 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); 5403 } 5404 5405 bool DoubleAPFloat::getExactInverse(APFloat *inv) const { 5406 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5407 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); 5408 if (!inv) 5409 return Tmp.getExactInverse(nullptr); 5410 APFloat Inv(semPPCDoubleDoubleLegacy); 5411 auto Ret = Tmp.getExactInverse(&Inv); 5412 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt()); 5413 return Ret; 5414 } 5415 5416 int DoubleAPFloat::getExactLog2() const { 5417 // TODO: Implement me 5418 return INT_MIN; 5419 } 5420 5421 int DoubleAPFloat::getExactLog2Abs() const { 5422 // TODO: Implement me 5423 return INT_MIN; 5424 } 5425 5426 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, 5427 APFloat::roundingMode RM) { 5428 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5429 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM), 5430 scalbn(Arg.Floats[1], Exp, RM)); 5431 } 5432 5433 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp, 5434 APFloat::roundingMode RM) { 5435 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); 5436 APFloat First = frexp(Arg.Floats[0], Exp, RM); 5437 APFloat Second = Arg.Floats[1]; 5438 if (Arg.getCategory() == APFloat::fcNormal) 5439 Second = scalbn(Second, -Exp, RM); 5440 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second)); 5441 } 5442 5443 } // namespace detail 5444 5445 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) { 5446 if (usesLayout<IEEEFloat>(Semantics)) { 5447 new (&IEEE) IEEEFloat(std::move(F)); 5448 return; 5449 } 5450 if (usesLayout<DoubleAPFloat>(Semantics)) { 5451 const fltSemantics& S = F.getSemantics(); 5452 new (&Double) 5453 DoubleAPFloat(Semantics, APFloat(std::move(F), S), 5454 APFloat(semIEEEdouble)); 5455 return; 5456 } 5457 llvm_unreachable("Unexpected semantics"); 5458 } 5459 5460 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str, 5461 roundingMode RM) { 5462 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM)); 5463 } 5464 5465 hash_code hash_value(const APFloat &Arg) { 5466 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics())) 5467 return hash_value(Arg.U.IEEE); 5468 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics())) 5469 return hash_value(Arg.U.Double); 5470 llvm_unreachable("Unexpected semantics"); 5471 } 5472 5473 APFloat::APFloat(const fltSemantics &Semantics, StringRef S) 5474 : APFloat(Semantics) { 5475 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven); 5476 assert(StatusOrErr && "Invalid floating point representation"); 5477 consumeError(StatusOrErr.takeError()); 5478 } 5479 5480 FPClassTest APFloat::classify() const { 5481 if (isZero()) 5482 return isNegative() ? fcNegZero : fcPosZero; 5483 if (isNormal()) 5484 return isNegative() ? fcNegNormal : fcPosNormal; 5485 if (isDenormal()) 5486 return isNegative() ? fcNegSubnormal : fcPosSubnormal; 5487 if (isInfinity()) 5488 return isNegative() ? fcNegInf : fcPosInf; 5489 assert(isNaN() && "Other class of FP constant"); 5490 return isSignaling() ? fcSNan : fcQNan; 5491 } 5492 5493 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, 5494 roundingMode RM, bool *losesInfo) { 5495 if (&getSemantics() == &ToSemantics) { 5496 *losesInfo = false; 5497 return opOK; 5498 } 5499 if (usesLayout<IEEEFloat>(getSemantics()) && 5500 usesLayout<IEEEFloat>(ToSemantics)) 5501 return U.IEEE.convert(ToSemantics, RM, losesInfo); 5502 if (usesLayout<IEEEFloat>(getSemantics()) && 5503 usesLayout<DoubleAPFloat>(ToSemantics)) { 5504 assert(&ToSemantics == &semPPCDoubleDouble); 5505 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo); 5506 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt()); 5507 return Ret; 5508 } 5509 if (usesLayout<DoubleAPFloat>(getSemantics()) && 5510 usesLayout<IEEEFloat>(ToSemantics)) { 5511 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo); 5512 *this = APFloat(std::move(getIEEE()), ToSemantics); 5513 return Ret; 5514 } 5515 llvm_unreachable("Unexpected semantics"); 5516 } 5517 5518 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { 5519 return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits)); 5520 } 5521 5522 void APFloat::print(raw_ostream &OS) const { 5523 SmallVector<char, 16> Buffer; 5524 toString(Buffer); 5525 OS << Buffer; 5526 } 5527 5528 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5529 LLVM_DUMP_METHOD void APFloat::dump() const { 5530 print(dbgs()); 5531 dbgs() << '\n'; 5532 } 5533 #endif 5534 5535 void APFloat::Profile(FoldingSetNodeID &NID) const { 5536 NID.Add(bitcastToAPInt()); 5537 } 5538 5539 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 5540 an APSInt, whose initial bit-width and signed-ness are used to determine the 5541 precision of the conversion. 5542 */ 5543 APFloat::opStatus APFloat::convertToInteger(APSInt &result, 5544 roundingMode rounding_mode, 5545 bool *isExact) const { 5546 unsigned bitWidth = result.getBitWidth(); 5547 SmallVector<uint64_t, 4> parts(result.getNumWords()); 5548 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(), 5549 rounding_mode, isExact); 5550 // Keeps the original signed-ness. 5551 result = APInt(bitWidth, parts); 5552 return status; 5553 } 5554 5555 double APFloat::convertToDouble() const { 5556 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble) 5557 return getIEEE().convertToDouble(); 5558 assert(getSemantics().isRepresentableBy(semIEEEdouble) && 5559 "Float semantics is not representable by IEEEdouble"); 5560 APFloat Temp = *this; 5561 bool LosesInfo; 5562 opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo); 5563 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5564 (void)St; 5565 return Temp.getIEEE().convertToDouble(); 5566 } 5567 5568 #ifdef HAS_IEE754_FLOAT128 5569 float128 APFloat::convertToQuad() const { 5570 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad) 5571 return getIEEE().convertToQuad(); 5572 assert(getSemantics().isRepresentableBy(semIEEEquad) && 5573 "Float semantics is not representable by IEEEquad"); 5574 APFloat Temp = *this; 5575 bool LosesInfo; 5576 opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo); 5577 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5578 (void)St; 5579 return Temp.getIEEE().convertToQuad(); 5580 } 5581 #endif 5582 5583 float APFloat::convertToFloat() const { 5584 if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle) 5585 return getIEEE().convertToFloat(); 5586 assert(getSemantics().isRepresentableBy(semIEEEsingle) && 5587 "Float semantics is not representable by IEEEsingle"); 5588 APFloat Temp = *this; 5589 bool LosesInfo; 5590 opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo); 5591 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision"); 5592 (void)St; 5593 return Temp.getIEEE().convertToFloat(); 5594 } 5595 5596 } // namespace llvm 5597 5598 #undef APFLOAT_DISPATCH_ON_SEMANTICS 5599