1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 /// A macro used to combine two fcCategory enums into one key which can be used 29 /// in a switch statement to classify how the interaction of two APFloat's 30 /// categories affects an operation. 31 /// 32 /// TODO: If clang source code is ever allowed to use constexpr in its own 33 /// codebase, change this into a static inline function. 34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36 /* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40 namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 }; 56 57 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; 58 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; 59 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; 60 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; 61 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; 62 const fltSemantics APFloat::Bogus = { 0, 0, 0 }; 63 64 /* The PowerPC format consists of two doubles. It does not map cleanly 65 onto the usual format above. It is approximated using twice the 66 mantissa bits. Note that for exponents near the double minimum, 67 we no longer can represent the full 106 mantissa bits, so those 68 will be treated as denormal numbers. 69 70 FIXME: While this approximation is equivalent to what GCC uses for 71 compile-time arithmetic on PPC double-double numbers, it is not able 72 to represent all possible values held by a PPC double-double number, 73 for example: (long double) 1.0 + (long double) 0x1p-106 74 Should this be replaced by a full emulation of PPC double-double? */ 75 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; 76 77 /* A tight upper bound on number of parts required to hold the value 78 pow(5, power) is 79 80 power * 815 / (351 * integerPartWidth) + 1 81 82 However, whilst the result may require only this many parts, 83 because we are multiplying two values to get it, the 84 multiplication may require an extra part with the excess part 85 being zero (consider the trivial case of 1 * 1, tcFullMultiply 86 requires two parts to hold the single-part result). So we add an 87 extra one to guarantee enough space whilst multiplying. */ 88 const unsigned int maxExponent = 16383; 89 const unsigned int maxPrecision = 113; 90 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 91 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 92 / (351 * integerPartWidth)); 93 } 94 95 /* A bunch of private, handy routines. */ 96 97 static inline unsigned int 98 partCountForBits(unsigned int bits) 99 { 100 return ((bits) + integerPartWidth - 1) / integerPartWidth; 101 } 102 103 /* Returns 0U-9U. Return values >= 10U are not digits. */ 104 static inline unsigned int 105 decDigitValue(unsigned int c) 106 { 107 return c - '0'; 108 } 109 110 /* Return the value of a decimal exponent of the form 111 [+-]ddddddd. 112 113 If the exponent overflows, returns a large exponent with the 114 appropriate sign. */ 115 static int 116 readExponent(StringRef::iterator begin, StringRef::iterator end) 117 { 118 bool isNegative; 119 unsigned int absExponent; 120 const unsigned int overlargeExponent = 24000; /* FIXME. */ 121 StringRef::iterator p = begin; 122 123 assert(p != end && "Exponent has no digits"); 124 125 isNegative = (*p == '-'); 126 if (*p == '-' || *p == '+') { 127 p++; 128 assert(p != end && "Exponent has no digits"); 129 } 130 131 absExponent = decDigitValue(*p++); 132 assert(absExponent < 10U && "Invalid character in exponent"); 133 134 for (; p != end; ++p) { 135 unsigned int value; 136 137 value = decDigitValue(*p); 138 assert(value < 10U && "Invalid character in exponent"); 139 140 value += absExponent * 10; 141 if (absExponent >= overlargeExponent) { 142 absExponent = overlargeExponent; 143 p = end; /* outwit assert below */ 144 break; 145 } 146 absExponent = value; 147 } 148 149 assert(p == end && "Invalid exponent in exponent"); 150 151 if (isNegative) 152 return -(int) absExponent; 153 else 154 return (int) absExponent; 155 } 156 157 /* This is ugly and needs cleaning up, but I don't immediately see 158 how whilst remaining safe. */ 159 static int 160 totalExponent(StringRef::iterator p, StringRef::iterator end, 161 int exponentAdjustment) 162 { 163 int unsignedExponent; 164 bool negative, overflow; 165 int exponent = 0; 166 167 assert(p != end && "Exponent has no digits"); 168 169 negative = *p == '-'; 170 if (*p == '-' || *p == '+') { 171 p++; 172 assert(p != end && "Exponent has no digits"); 173 } 174 175 unsignedExponent = 0; 176 overflow = false; 177 for (; p != end; ++p) { 178 unsigned int value; 179 180 value = decDigitValue(*p); 181 assert(value < 10U && "Invalid character in exponent"); 182 183 unsignedExponent = unsignedExponent * 10 + value; 184 if (unsignedExponent > 32767) { 185 overflow = true; 186 break; 187 } 188 } 189 190 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 191 overflow = true; 192 193 if (!overflow) { 194 exponent = unsignedExponent; 195 if (negative) 196 exponent = -exponent; 197 exponent += exponentAdjustment; 198 if (exponent > 32767 || exponent < -32768) 199 overflow = true; 200 } 201 202 if (overflow) 203 exponent = negative ? -32768: 32767; 204 205 return exponent; 206 } 207 208 static StringRef::iterator 209 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 210 StringRef::iterator *dot) 211 { 212 StringRef::iterator p = begin; 213 *dot = end; 214 while (p != end && *p == '0') 215 p++; 216 217 if (p != end && *p == '.') { 218 *dot = p++; 219 220 assert(end - begin != 1 && "Significand has no digits"); 221 222 while (p != end && *p == '0') 223 p++; 224 } 225 226 return p; 227 } 228 229 /* Given a normal decimal floating point number of the form 230 231 dddd.dddd[eE][+-]ddd 232 233 where the decimal point and exponent are optional, fill out the 234 structure D. Exponent is appropriate if the significand is 235 treated as an integer, and normalizedExponent if the significand 236 is taken to have the decimal point after a single leading 237 non-zero digit. 238 239 If the value is zero, V->firstSigDigit points to a non-digit, and 240 the return exponent is zero. 241 */ 242 struct decimalInfo { 243 const char *firstSigDigit; 244 const char *lastSigDigit; 245 int exponent; 246 int normalizedExponent; 247 }; 248 249 static void 250 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 251 decimalInfo *D) 252 { 253 StringRef::iterator dot = end; 254 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 255 256 D->firstSigDigit = p; 257 D->exponent = 0; 258 D->normalizedExponent = 0; 259 260 for (; p != end; ++p) { 261 if (*p == '.') { 262 assert(dot == end && "String contains multiple dots"); 263 dot = p++; 264 if (p == end) 265 break; 266 } 267 if (decDigitValue(*p) >= 10U) 268 break; 269 } 270 271 if (p != end) { 272 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 273 assert(p != begin && "Significand has no digits"); 274 assert((dot == end || p - begin != 1) && "Significand has no digits"); 275 276 /* p points to the first non-digit in the string */ 277 D->exponent = readExponent(p + 1, end); 278 279 /* Implied decimal point? */ 280 if (dot == end) 281 dot = p; 282 } 283 284 /* If number is all zeroes accept any exponent. */ 285 if (p != D->firstSigDigit) { 286 /* Drop insignificant trailing zeroes. */ 287 if (p != begin) { 288 do 289 do 290 p--; 291 while (p != begin && *p == '0'); 292 while (p != begin && *p == '.'); 293 } 294 295 /* Adjust the exponents for any decimal point. */ 296 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 297 D->normalizedExponent = (D->exponent + 298 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 299 - (dot > D->firstSigDigit && dot < p))); 300 } 301 302 D->lastSigDigit = p; 303 } 304 305 /* Return the trailing fraction of a hexadecimal number. 306 DIGITVALUE is the first hex digit of the fraction, P points to 307 the next digit. */ 308 static lostFraction 309 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 310 unsigned int digitValue) 311 { 312 unsigned int hexDigit; 313 314 /* If the first trailing digit isn't 0 or 8 we can work out the 315 fraction immediately. */ 316 if (digitValue > 8) 317 return lfMoreThanHalf; 318 else if (digitValue < 8 && digitValue > 0) 319 return lfLessThanHalf; 320 321 // Otherwise we need to find the first non-zero digit. 322 while (p != end && (*p == '0' || *p == '.')) 323 p++; 324 325 assert(p != end && "Invalid trailing hexadecimal fraction!"); 326 327 hexDigit = hexDigitValue(*p); 328 329 /* If we ran off the end it is exactly zero or one-half, otherwise 330 a little more. */ 331 if (hexDigit == -1U) 332 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 333 else 334 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 335 } 336 337 /* Return the fraction lost were a bignum truncated losing the least 338 significant BITS bits. */ 339 static lostFraction 340 lostFractionThroughTruncation(const integerPart *parts, 341 unsigned int partCount, 342 unsigned int bits) 343 { 344 unsigned int lsb; 345 346 lsb = APInt::tcLSB(parts, partCount); 347 348 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 349 if (bits <= lsb) 350 return lfExactlyZero; 351 if (bits == lsb + 1) 352 return lfExactlyHalf; 353 if (bits <= partCount * integerPartWidth && 354 APInt::tcExtractBit(parts, bits - 1)) 355 return lfMoreThanHalf; 356 357 return lfLessThanHalf; 358 } 359 360 /* Shift DST right BITS bits noting lost fraction. */ 361 static lostFraction 362 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 363 { 364 lostFraction lost_fraction; 365 366 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 367 368 APInt::tcShiftRight(dst, parts, bits); 369 370 return lost_fraction; 371 } 372 373 /* Combine the effect of two lost fractions. */ 374 static lostFraction 375 combineLostFractions(lostFraction moreSignificant, 376 lostFraction lessSignificant) 377 { 378 if (lessSignificant != lfExactlyZero) { 379 if (moreSignificant == lfExactlyZero) 380 moreSignificant = lfLessThanHalf; 381 else if (moreSignificant == lfExactlyHalf) 382 moreSignificant = lfMoreThanHalf; 383 } 384 385 return moreSignificant; 386 } 387 388 /* The error from the true value, in half-ulps, on multiplying two 389 floating point numbers, which differ from the value they 390 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 391 than the returned value. 392 393 See "How to Read Floating Point Numbers Accurately" by William D 394 Clinger. */ 395 static unsigned int 396 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 397 { 398 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 399 400 if (HUerr1 + HUerr2 == 0) 401 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 402 else 403 return inexactMultiply + 2 * (HUerr1 + HUerr2); 404 } 405 406 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 407 when the least significant BITS are truncated. BITS cannot be 408 zero. */ 409 static integerPart 410 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 411 { 412 unsigned int count, partBits; 413 integerPart part, boundary; 414 415 assert(bits != 0); 416 417 bits--; 418 count = bits / integerPartWidth; 419 partBits = bits % integerPartWidth + 1; 420 421 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 422 423 if (isNearest) 424 boundary = (integerPart) 1 << (partBits - 1); 425 else 426 boundary = 0; 427 428 if (count == 0) { 429 if (part - boundary <= boundary - part) 430 return part - boundary; 431 else 432 return boundary - part; 433 } 434 435 if (part == boundary) { 436 while (--count) 437 if (parts[count]) 438 return ~(integerPart) 0; /* A lot. */ 439 440 return parts[0]; 441 } else if (part == boundary - 1) { 442 while (--count) 443 if (~parts[count]) 444 return ~(integerPart) 0; /* A lot. */ 445 446 return -parts[0]; 447 } 448 449 return ~(integerPart) 0; /* A lot. */ 450 } 451 452 /* Place pow(5, power) in DST, and return the number of parts used. 453 DST must be at least one part larger than size of the answer. */ 454 static unsigned int 455 powerOf5(integerPart *dst, unsigned int power) 456 { 457 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 458 15625, 78125 }; 459 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 460 pow5s[0] = 78125 * 5; 461 462 unsigned int partsCount[16] = { 1 }; 463 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 464 unsigned int result; 465 assert(power <= maxExponent); 466 467 p1 = dst; 468 p2 = scratch; 469 470 *p1 = firstEightPowers[power & 7]; 471 power >>= 3; 472 473 result = 1; 474 pow5 = pow5s; 475 476 for (unsigned int n = 0; power; power >>= 1, n++) { 477 unsigned int pc; 478 479 pc = partsCount[n]; 480 481 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 482 if (pc == 0) { 483 pc = partsCount[n - 1]; 484 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 485 pc *= 2; 486 if (pow5[pc - 1] == 0) 487 pc--; 488 partsCount[n] = pc; 489 } 490 491 if (power & 1) { 492 integerPart *tmp; 493 494 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 495 result += pc; 496 if (p2[result - 1] == 0) 497 result--; 498 499 /* Now result is in p1 with partsCount parts and p2 is scratch 500 space. */ 501 tmp = p1, p1 = p2, p2 = tmp; 502 } 503 504 pow5 += pc; 505 } 506 507 if (p1 != dst) 508 APInt::tcAssign(dst, p1, result); 509 510 return result; 511 } 512 513 /* Zero at the end to avoid modular arithmetic when adding one; used 514 when rounding up during hexadecimal output. */ 515 static const char hexDigitsLower[] = "0123456789abcdef0"; 516 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 517 static const char infinityL[] = "infinity"; 518 static const char infinityU[] = "INFINITY"; 519 static const char NaNL[] = "nan"; 520 static const char NaNU[] = "NAN"; 521 522 /* Write out an integerPart in hexadecimal, starting with the most 523 significant nibble. Write out exactly COUNT hexdigits, return 524 COUNT. */ 525 static unsigned int 526 partAsHex (char *dst, integerPart part, unsigned int count, 527 const char *hexDigitChars) 528 { 529 unsigned int result = count; 530 531 assert(count != 0 && count <= integerPartWidth / 4); 532 533 part >>= (integerPartWidth - 4 * count); 534 while (count--) { 535 dst[count] = hexDigitChars[part & 0xf]; 536 part >>= 4; 537 } 538 539 return result; 540 } 541 542 /* Write out an unsigned decimal integer. */ 543 static char * 544 writeUnsignedDecimal (char *dst, unsigned int n) 545 { 546 char buff[40], *p; 547 548 p = buff; 549 do 550 *p++ = '0' + n % 10; 551 while (n /= 10); 552 553 do 554 *dst++ = *--p; 555 while (p != buff); 556 557 return dst; 558 } 559 560 /* Write out a signed decimal integer. */ 561 static char * 562 writeSignedDecimal (char *dst, int value) 563 { 564 if (value < 0) { 565 *dst++ = '-'; 566 dst = writeUnsignedDecimal(dst, -(unsigned) value); 567 } else 568 dst = writeUnsignedDecimal(dst, value); 569 570 return dst; 571 } 572 573 /* Constructors. */ 574 void 575 APFloat::initialize(const fltSemantics *ourSemantics) 576 { 577 unsigned int count; 578 579 semantics = ourSemantics; 580 count = partCount(); 581 if (count > 1) 582 significand.parts = new integerPart[count]; 583 } 584 585 void 586 APFloat::freeSignificand() 587 { 588 if (needsCleanup()) 589 delete [] significand.parts; 590 } 591 592 void 593 APFloat::assign(const APFloat &rhs) 594 { 595 assert(semantics == rhs.semantics); 596 597 sign = rhs.sign; 598 category = rhs.category; 599 exponent = rhs.exponent; 600 if (isFiniteNonZero() || category == fcNaN) 601 copySignificand(rhs); 602 } 603 604 void 605 APFloat::copySignificand(const APFloat &rhs) 606 { 607 assert(isFiniteNonZero() || category == fcNaN); 608 assert(rhs.partCount() >= partCount()); 609 610 APInt::tcAssign(significandParts(), rhs.significandParts(), 611 partCount()); 612 } 613 614 /* Make this number a NaN, with an arbitrary but deterministic value 615 for the significand. If double or longer, this is a signalling NaN, 616 which may not be ideal. If float, this is QNaN(0). */ 617 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 618 { 619 category = fcNaN; 620 sign = Negative; 621 622 integerPart *significand = significandParts(); 623 unsigned numParts = partCount(); 624 625 // Set the significand bits to the fill. 626 if (!fill || fill->getNumWords() < numParts) 627 APInt::tcSet(significand, 0, numParts); 628 if (fill) { 629 APInt::tcAssign(significand, fill->getRawData(), 630 std::min(fill->getNumWords(), numParts)); 631 632 // Zero out the excess bits of the significand. 633 unsigned bitsToPreserve = semantics->precision - 1; 634 unsigned part = bitsToPreserve / 64; 635 bitsToPreserve %= 64; 636 significand[part] &= ((1ULL << bitsToPreserve) - 1); 637 for (part++; part != numParts; ++part) 638 significand[part] = 0; 639 } 640 641 unsigned QNaNBit = semantics->precision - 2; 642 643 if (SNaN) { 644 // We always have to clear the QNaN bit to make it an SNaN. 645 APInt::tcClearBit(significand, QNaNBit); 646 647 // If there are no bits set in the payload, we have to set 648 // *something* to make it a NaN instead of an infinity; 649 // conventionally, this is the next bit down from the QNaN bit. 650 if (APInt::tcIsZero(significand, numParts)) 651 APInt::tcSetBit(significand, QNaNBit - 1); 652 } else { 653 // We always have to set the QNaN bit to make it a QNaN. 654 APInt::tcSetBit(significand, QNaNBit); 655 } 656 657 // For x87 extended precision, we want to make a NaN, not a 658 // pseudo-NaN. Maybe we should expose the ability to make 659 // pseudo-NaNs? 660 if (semantics == &APFloat::x87DoubleExtended) 661 APInt::tcSetBit(significand, QNaNBit + 1); 662 } 663 664 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 665 const APInt *fill) { 666 APFloat value(Sem, uninitialized); 667 value.makeNaN(SNaN, Negative, fill); 668 return value; 669 } 670 671 APFloat & 672 APFloat::operator=(const APFloat &rhs) 673 { 674 if (this != &rhs) { 675 if (semantics != rhs.semantics) { 676 freeSignificand(); 677 initialize(rhs.semantics); 678 } 679 assign(rhs); 680 } 681 682 return *this; 683 } 684 685 APFloat & 686 APFloat::operator=(APFloat &&rhs) { 687 freeSignificand(); 688 689 semantics = rhs.semantics; 690 significand = rhs.significand; 691 exponent = rhs.exponent; 692 category = rhs.category; 693 sign = rhs.sign; 694 695 rhs.semantics = &Bogus; 696 return *this; 697 } 698 699 bool 700 APFloat::isDenormal() const { 701 return isFiniteNonZero() && (exponent == semantics->minExponent) && 702 (APInt::tcExtractBit(significandParts(), 703 semantics->precision - 1) == 0); 704 } 705 706 bool 707 APFloat::isSmallest() const { 708 // The smallest number by magnitude in our format will be the smallest 709 // denormal, i.e. the floating point number with exponent being minimum 710 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 711 return isFiniteNonZero() && exponent == semantics->minExponent && 712 significandMSB() == 0; 713 } 714 715 bool APFloat::isSignificandAllOnes() const { 716 // Test if the significand excluding the integral bit is all ones. This allows 717 // us to test for binade boundaries. 718 const integerPart *Parts = significandParts(); 719 const unsigned PartCount = partCount(); 720 for (unsigned i = 0; i < PartCount - 1; i++) 721 if (~Parts[i]) 722 return false; 723 724 // Set the unused high bits to all ones when we compare. 725 const unsigned NumHighBits = 726 PartCount*integerPartWidth - semantics->precision + 1; 727 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 728 "fill than integerPartWidth"); 729 const integerPart HighBitFill = 730 ~integerPart(0) << (integerPartWidth - NumHighBits); 731 if (~(Parts[PartCount - 1] | HighBitFill)) 732 return false; 733 734 return true; 735 } 736 737 bool APFloat::isSignificandAllZeros() const { 738 // Test if the significand excluding the integral bit is all zeros. This 739 // allows us to test for binade boundaries. 740 const integerPart *Parts = significandParts(); 741 const unsigned PartCount = partCount(); 742 743 for (unsigned i = 0; i < PartCount - 1; i++) 744 if (Parts[i]) 745 return false; 746 747 const unsigned NumHighBits = 748 PartCount*integerPartWidth - semantics->precision + 1; 749 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 750 "clear than integerPartWidth"); 751 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 752 753 if (Parts[PartCount - 1] & HighBitMask) 754 return false; 755 756 return true; 757 } 758 759 bool 760 APFloat::isLargest() const { 761 // The largest number by magnitude in our format will be the floating point 762 // number with maximum exponent and with significand that is all ones. 763 return isFiniteNonZero() && exponent == semantics->maxExponent 764 && isSignificandAllOnes(); 765 } 766 767 bool 768 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 769 if (this == &rhs) 770 return true; 771 if (semantics != rhs.semantics || 772 category != rhs.category || 773 sign != rhs.sign) 774 return false; 775 if (category==fcZero || category==fcInfinity) 776 return true; 777 else if (isFiniteNonZero() && exponent!=rhs.exponent) 778 return false; 779 else { 780 int i= partCount(); 781 const integerPart* p=significandParts(); 782 const integerPart* q=rhs.significandParts(); 783 for (; i>0; i--, p++, q++) { 784 if (*p != *q) 785 return false; 786 } 787 return true; 788 } 789 } 790 791 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 792 initialize(&ourSemantics); 793 sign = 0; 794 category = fcNormal; 795 zeroSignificand(); 796 exponent = ourSemantics.precision - 1; 797 significandParts()[0] = value; 798 normalize(rmNearestTiesToEven, lfExactlyZero); 799 } 800 801 APFloat::APFloat(const fltSemantics &ourSemantics) { 802 initialize(&ourSemantics); 803 category = fcZero; 804 sign = false; 805 } 806 807 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 808 // Allocates storage if necessary but does not initialize it. 809 initialize(&ourSemantics); 810 } 811 812 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 813 initialize(&ourSemantics); 814 convertFromString(text, rmNearestTiesToEven); 815 } 816 817 APFloat::APFloat(const APFloat &rhs) { 818 initialize(rhs.semantics); 819 assign(rhs); 820 } 821 822 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 823 *this = std::move(rhs); 824 } 825 826 APFloat::~APFloat() 827 { 828 freeSignificand(); 829 } 830 831 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 832 void APFloat::Profile(FoldingSetNodeID& ID) const { 833 ID.Add(bitcastToAPInt()); 834 } 835 836 unsigned int 837 APFloat::partCount() const 838 { 839 return partCountForBits(semantics->precision + 1); 840 } 841 842 unsigned int 843 APFloat::semanticsPrecision(const fltSemantics &semantics) 844 { 845 return semantics.precision; 846 } 847 848 const integerPart * 849 APFloat::significandParts() const 850 { 851 return const_cast<APFloat *>(this)->significandParts(); 852 } 853 854 integerPart * 855 APFloat::significandParts() 856 { 857 if (partCount() > 1) 858 return significand.parts; 859 else 860 return &significand.part; 861 } 862 863 void 864 APFloat::zeroSignificand() 865 { 866 APInt::tcSet(significandParts(), 0, partCount()); 867 } 868 869 /* Increment an fcNormal floating point number's significand. */ 870 void 871 APFloat::incrementSignificand() 872 { 873 integerPart carry; 874 875 carry = APInt::tcIncrement(significandParts(), partCount()); 876 877 /* Our callers should never cause us to overflow. */ 878 assert(carry == 0); 879 (void)carry; 880 } 881 882 /* Add the significand of the RHS. Returns the carry flag. */ 883 integerPart 884 APFloat::addSignificand(const APFloat &rhs) 885 { 886 integerPart *parts; 887 888 parts = significandParts(); 889 890 assert(semantics == rhs.semantics); 891 assert(exponent == rhs.exponent); 892 893 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 894 } 895 896 /* Subtract the significand of the RHS with a borrow flag. Returns 897 the borrow flag. */ 898 integerPart 899 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 900 { 901 integerPart *parts; 902 903 parts = significandParts(); 904 905 assert(semantics == rhs.semantics); 906 assert(exponent == rhs.exponent); 907 908 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 909 partCount()); 910 } 911 912 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 913 on to the full-precision result of the multiplication. Returns the 914 lost fraction. */ 915 lostFraction 916 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 917 { 918 unsigned int omsb; // One, not zero, based MSB. 919 unsigned int partsCount, newPartsCount, precision; 920 integerPart *lhsSignificand; 921 integerPart scratch[4]; 922 integerPart *fullSignificand; 923 lostFraction lost_fraction; 924 bool ignored; 925 926 assert(semantics == rhs.semantics); 927 928 precision = semantics->precision; 929 930 // Allocate space for twice as many bits as the original significand, plus one 931 // extra bit for the addition to overflow into. 932 newPartsCount = partCountForBits(precision * 2 + 1); 933 934 if (newPartsCount > 4) 935 fullSignificand = new integerPart[newPartsCount]; 936 else 937 fullSignificand = scratch; 938 939 lhsSignificand = significandParts(); 940 partsCount = partCount(); 941 942 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 943 rhs.significandParts(), partsCount, partsCount); 944 945 lost_fraction = lfExactlyZero; 946 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 947 exponent += rhs.exponent; 948 949 // Assume the operands involved in the multiplication are single-precision 950 // FP, and the two multiplicants are: 951 // *this = a23 . a22 ... a0 * 2^e1 952 // rhs = b23 . b22 ... b0 * 2^e2 953 // the result of multiplication is: 954 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) 955 // Note that there are three significant bits at the left-hand side of the 956 // radix point: two for the multiplication, and an overflow bit for the 957 // addition (that will always be zero at this point). Move the radix point 958 // toward left by two bits, and adjust exponent accordingly. 959 exponent += 2; 960 961 if (addend && addend->isNonZero()) { 962 // The intermediate result of the multiplication has "2 * precision" 963 // signicant bit; adjust the addend to be consistent with mul result. 964 // 965 Significand savedSignificand = significand; 966 const fltSemantics *savedSemantics = semantics; 967 fltSemantics extendedSemantics; 968 opStatus status; 969 unsigned int extendedPrecision; 970 971 // Normalize our MSB to one below the top bit to allow for overflow. 972 extendedPrecision = 2 * precision + 1; 973 if (omsb != extendedPrecision - 1) { 974 assert(extendedPrecision > omsb); 975 APInt::tcShiftLeft(fullSignificand, newPartsCount, 976 (extendedPrecision - 1) - omsb); 977 exponent -= (extendedPrecision - 1) - omsb; 978 } 979 980 /* Create new semantics. */ 981 extendedSemantics = *semantics; 982 extendedSemantics.precision = extendedPrecision; 983 984 if (newPartsCount == 1) 985 significand.part = fullSignificand[0]; 986 else 987 significand.parts = fullSignificand; 988 semantics = &extendedSemantics; 989 990 APFloat extendedAddend(*addend); 991 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 992 assert(status == opOK); 993 (void)status; 994 995 // Shift the significand of the addend right by one bit. This guarantees 996 // that the high bit of the significand is zero (same as fullSignificand), 997 // so the addition will overflow (if it does overflow at all) into the top bit. 998 lost_fraction = extendedAddend.shiftSignificandRight(1); 999 assert(lost_fraction == lfExactlyZero && 1000 "Lost precision while shifting addend for fused-multiply-add."); 1001 1002 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 1003 1004 /* Restore our state. */ 1005 if (newPartsCount == 1) 1006 fullSignificand[0] = significand.part; 1007 significand = savedSignificand; 1008 semantics = savedSemantics; 1009 1010 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 1011 } 1012 1013 // Convert the result having "2 * precision" significant-bits back to the one 1014 // having "precision" significant-bits. First, move the radix point from 1015 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1016 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1017 exponent -= precision + 1; 1018 1019 // In case MSB resides at the left-hand side of radix point, shift the 1020 // mantissa right by some amount to make sure the MSB reside right before 1021 // the radix point (i.e. "MSB . rest-significant-bits"). 1022 // 1023 // Note that the result is not normalized when "omsb < precision". So, the 1024 // caller needs to call APFloat::normalize() if normalized value is expected. 1025 if (omsb > precision) { 1026 unsigned int bits, significantParts; 1027 lostFraction lf; 1028 1029 bits = omsb - precision; 1030 significantParts = partCountForBits(omsb); 1031 lf = shiftRight(fullSignificand, significantParts, bits); 1032 lost_fraction = combineLostFractions(lf, lost_fraction); 1033 exponent += bits; 1034 } 1035 1036 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1037 1038 if (newPartsCount > 4) 1039 delete [] fullSignificand; 1040 1041 return lost_fraction; 1042 } 1043 1044 /* Multiply the significands of LHS and RHS to DST. */ 1045 lostFraction 1046 APFloat::divideSignificand(const APFloat &rhs) 1047 { 1048 unsigned int bit, i, partsCount; 1049 const integerPart *rhsSignificand; 1050 integerPart *lhsSignificand, *dividend, *divisor; 1051 integerPart scratch[4]; 1052 lostFraction lost_fraction; 1053 1054 assert(semantics == rhs.semantics); 1055 1056 lhsSignificand = significandParts(); 1057 rhsSignificand = rhs.significandParts(); 1058 partsCount = partCount(); 1059 1060 if (partsCount > 2) 1061 dividend = new integerPart[partsCount * 2]; 1062 else 1063 dividend = scratch; 1064 1065 divisor = dividend + partsCount; 1066 1067 /* Copy the dividend and divisor as they will be modified in-place. */ 1068 for (i = 0; i < partsCount; i++) { 1069 dividend[i] = lhsSignificand[i]; 1070 divisor[i] = rhsSignificand[i]; 1071 lhsSignificand[i] = 0; 1072 } 1073 1074 exponent -= rhs.exponent; 1075 1076 unsigned int precision = semantics->precision; 1077 1078 /* Normalize the divisor. */ 1079 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1080 if (bit) { 1081 exponent += bit; 1082 APInt::tcShiftLeft(divisor, partsCount, bit); 1083 } 1084 1085 /* Normalize the dividend. */ 1086 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1087 if (bit) { 1088 exponent -= bit; 1089 APInt::tcShiftLeft(dividend, partsCount, bit); 1090 } 1091 1092 /* Ensure the dividend >= divisor initially for the loop below. 1093 Incidentally, this means that the division loop below is 1094 guaranteed to set the integer bit to one. */ 1095 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1096 exponent--; 1097 APInt::tcShiftLeft(dividend, partsCount, 1); 1098 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1099 } 1100 1101 /* Long division. */ 1102 for (bit = precision; bit; bit -= 1) { 1103 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1104 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1105 APInt::tcSetBit(lhsSignificand, bit - 1); 1106 } 1107 1108 APInt::tcShiftLeft(dividend, partsCount, 1); 1109 } 1110 1111 /* Figure out the lost fraction. */ 1112 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1113 1114 if (cmp > 0) 1115 lost_fraction = lfMoreThanHalf; 1116 else if (cmp == 0) 1117 lost_fraction = lfExactlyHalf; 1118 else if (APInt::tcIsZero(dividend, partsCount)) 1119 lost_fraction = lfExactlyZero; 1120 else 1121 lost_fraction = lfLessThanHalf; 1122 1123 if (partsCount > 2) 1124 delete [] dividend; 1125 1126 return lost_fraction; 1127 } 1128 1129 unsigned int 1130 APFloat::significandMSB() const 1131 { 1132 return APInt::tcMSB(significandParts(), partCount()); 1133 } 1134 1135 unsigned int 1136 APFloat::significandLSB() const 1137 { 1138 return APInt::tcLSB(significandParts(), partCount()); 1139 } 1140 1141 /* Note that a zero result is NOT normalized to fcZero. */ 1142 lostFraction 1143 APFloat::shiftSignificandRight(unsigned int bits) 1144 { 1145 /* Our exponent should not overflow. */ 1146 assert((ExponentType) (exponent + bits) >= exponent); 1147 1148 exponent += bits; 1149 1150 return shiftRight(significandParts(), partCount(), bits); 1151 } 1152 1153 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1154 void 1155 APFloat::shiftSignificandLeft(unsigned int bits) 1156 { 1157 assert(bits < semantics->precision); 1158 1159 if (bits) { 1160 unsigned int partsCount = partCount(); 1161 1162 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1163 exponent -= bits; 1164 1165 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1166 } 1167 } 1168 1169 APFloat::cmpResult 1170 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1171 { 1172 int compare; 1173 1174 assert(semantics == rhs.semantics); 1175 assert(isFiniteNonZero()); 1176 assert(rhs.isFiniteNonZero()); 1177 1178 compare = exponent - rhs.exponent; 1179 1180 /* If exponents are equal, do an unsigned bignum comparison of the 1181 significands. */ 1182 if (compare == 0) 1183 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1184 partCount()); 1185 1186 if (compare > 0) 1187 return cmpGreaterThan; 1188 else if (compare < 0) 1189 return cmpLessThan; 1190 else 1191 return cmpEqual; 1192 } 1193 1194 /* Handle overflow. Sign is preserved. We either become infinity or 1195 the largest finite number. */ 1196 APFloat::opStatus 1197 APFloat::handleOverflow(roundingMode rounding_mode) 1198 { 1199 /* Infinity? */ 1200 if (rounding_mode == rmNearestTiesToEven || 1201 rounding_mode == rmNearestTiesToAway || 1202 (rounding_mode == rmTowardPositive && !sign) || 1203 (rounding_mode == rmTowardNegative && sign)) { 1204 category = fcInfinity; 1205 return (opStatus) (opOverflow | opInexact); 1206 } 1207 1208 /* Otherwise we become the largest finite number. */ 1209 category = fcNormal; 1210 exponent = semantics->maxExponent; 1211 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1212 semantics->precision); 1213 1214 return opInexact; 1215 } 1216 1217 /* Returns TRUE if, when truncating the current number, with BIT the 1218 new LSB, with the given lost fraction and rounding mode, the result 1219 would need to be rounded away from zero (i.e., by increasing the 1220 signficand). This routine must work for fcZero of both signs, and 1221 fcNormal numbers. */ 1222 bool 1223 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1224 lostFraction lost_fraction, 1225 unsigned int bit) const 1226 { 1227 /* NaNs and infinities should not have lost fractions. */ 1228 assert(isFiniteNonZero() || category == fcZero); 1229 1230 /* Current callers never pass this so we don't handle it. */ 1231 assert(lost_fraction != lfExactlyZero); 1232 1233 switch (rounding_mode) { 1234 case rmNearestTiesToAway: 1235 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1236 1237 case rmNearestTiesToEven: 1238 if (lost_fraction == lfMoreThanHalf) 1239 return true; 1240 1241 /* Our zeroes don't have a significand to test. */ 1242 if (lost_fraction == lfExactlyHalf && category != fcZero) 1243 return APInt::tcExtractBit(significandParts(), bit); 1244 1245 return false; 1246 1247 case rmTowardZero: 1248 return false; 1249 1250 case rmTowardPositive: 1251 return sign == false; 1252 1253 case rmTowardNegative: 1254 return sign == true; 1255 } 1256 llvm_unreachable("Invalid rounding mode found"); 1257 } 1258 1259 APFloat::opStatus 1260 APFloat::normalize(roundingMode rounding_mode, 1261 lostFraction lost_fraction) 1262 { 1263 unsigned int omsb; /* One, not zero, based MSB. */ 1264 int exponentChange; 1265 1266 if (!isFiniteNonZero()) 1267 return opOK; 1268 1269 /* Before rounding normalize the exponent of fcNormal numbers. */ 1270 omsb = significandMSB() + 1; 1271 1272 if (omsb) { 1273 /* OMSB is numbered from 1. We want to place it in the integer 1274 bit numbered PRECISION if possible, with a compensating change in 1275 the exponent. */ 1276 exponentChange = omsb - semantics->precision; 1277 1278 /* If the resulting exponent is too high, overflow according to 1279 the rounding mode. */ 1280 if (exponent + exponentChange > semantics->maxExponent) 1281 return handleOverflow(rounding_mode); 1282 1283 /* Subnormal numbers have exponent minExponent, and their MSB 1284 is forced based on that. */ 1285 if (exponent + exponentChange < semantics->minExponent) 1286 exponentChange = semantics->minExponent - exponent; 1287 1288 /* Shifting left is easy as we don't lose precision. */ 1289 if (exponentChange < 0) { 1290 assert(lost_fraction == lfExactlyZero); 1291 1292 shiftSignificandLeft(-exponentChange); 1293 1294 return opOK; 1295 } 1296 1297 if (exponentChange > 0) { 1298 lostFraction lf; 1299 1300 /* Shift right and capture any new lost fraction. */ 1301 lf = shiftSignificandRight(exponentChange); 1302 1303 lost_fraction = combineLostFractions(lf, lost_fraction); 1304 1305 /* Keep OMSB up-to-date. */ 1306 if (omsb > (unsigned) exponentChange) 1307 omsb -= exponentChange; 1308 else 1309 omsb = 0; 1310 } 1311 } 1312 1313 /* Now round the number according to rounding_mode given the lost 1314 fraction. */ 1315 1316 /* As specified in IEEE 754, since we do not trap we do not report 1317 underflow for exact results. */ 1318 if (lost_fraction == lfExactlyZero) { 1319 /* Canonicalize zeroes. */ 1320 if (omsb == 0) 1321 category = fcZero; 1322 1323 return opOK; 1324 } 1325 1326 /* Increment the significand if we're rounding away from zero. */ 1327 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1328 if (omsb == 0) 1329 exponent = semantics->minExponent; 1330 1331 incrementSignificand(); 1332 omsb = significandMSB() + 1; 1333 1334 /* Did the significand increment overflow? */ 1335 if (omsb == (unsigned) semantics->precision + 1) { 1336 /* Renormalize by incrementing the exponent and shifting our 1337 significand right one. However if we already have the 1338 maximum exponent we overflow to infinity. */ 1339 if (exponent == semantics->maxExponent) { 1340 category = fcInfinity; 1341 1342 return (opStatus) (opOverflow | opInexact); 1343 } 1344 1345 shiftSignificandRight(1); 1346 1347 return opInexact; 1348 } 1349 } 1350 1351 /* The normal case - we were and are not denormal, and any 1352 significand increment above didn't overflow. */ 1353 if (omsb == semantics->precision) 1354 return opInexact; 1355 1356 /* We have a non-zero denormal. */ 1357 assert(omsb < semantics->precision); 1358 1359 /* Canonicalize zeroes. */ 1360 if (omsb == 0) 1361 category = fcZero; 1362 1363 /* The fcZero case is a denormal that underflowed to zero. */ 1364 return (opStatus) (opUnderflow | opInexact); 1365 } 1366 1367 APFloat::opStatus 1368 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1369 { 1370 switch (PackCategoriesIntoKey(category, rhs.category)) { 1371 default: 1372 llvm_unreachable(nullptr); 1373 1374 case PackCategoriesIntoKey(fcNaN, fcZero): 1375 case PackCategoriesIntoKey(fcNaN, fcNormal): 1376 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1377 case PackCategoriesIntoKey(fcNaN, fcNaN): 1378 case PackCategoriesIntoKey(fcNormal, fcZero): 1379 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1380 case PackCategoriesIntoKey(fcInfinity, fcZero): 1381 return opOK; 1382 1383 case PackCategoriesIntoKey(fcZero, fcNaN): 1384 case PackCategoriesIntoKey(fcNormal, fcNaN): 1385 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1386 // We need to be sure to flip the sign here for subtraction because we 1387 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1388 sign = rhs.sign ^ subtract; 1389 category = fcNaN; 1390 copySignificand(rhs); 1391 return opOK; 1392 1393 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1394 case PackCategoriesIntoKey(fcZero, fcInfinity): 1395 category = fcInfinity; 1396 sign = rhs.sign ^ subtract; 1397 return opOK; 1398 1399 case PackCategoriesIntoKey(fcZero, fcNormal): 1400 assign(rhs); 1401 sign = rhs.sign ^ subtract; 1402 return opOK; 1403 1404 case PackCategoriesIntoKey(fcZero, fcZero): 1405 /* Sign depends on rounding mode; handled by caller. */ 1406 return opOK; 1407 1408 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1409 /* Differently signed infinities can only be validly 1410 subtracted. */ 1411 if (((sign ^ rhs.sign)!=0) != subtract) { 1412 makeNaN(); 1413 return opInvalidOp; 1414 } 1415 1416 return opOK; 1417 1418 case PackCategoriesIntoKey(fcNormal, fcNormal): 1419 return opDivByZero; 1420 } 1421 } 1422 1423 /* Add or subtract two normal numbers. */ 1424 lostFraction 1425 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1426 { 1427 integerPart carry; 1428 lostFraction lost_fraction; 1429 int bits; 1430 1431 /* Determine if the operation on the absolute values is effectively 1432 an addition or subtraction. */ 1433 subtract ^= (sign ^ rhs.sign) ? true : false; 1434 1435 /* Are we bigger exponent-wise than the RHS? */ 1436 bits = exponent - rhs.exponent; 1437 1438 /* Subtraction is more subtle than one might naively expect. */ 1439 if (subtract) { 1440 APFloat temp_rhs(rhs); 1441 bool reverse; 1442 1443 if (bits == 0) { 1444 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1445 lost_fraction = lfExactlyZero; 1446 } else if (bits > 0) { 1447 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1448 shiftSignificandLeft(1); 1449 reverse = false; 1450 } else { 1451 lost_fraction = shiftSignificandRight(-bits - 1); 1452 temp_rhs.shiftSignificandLeft(1); 1453 reverse = true; 1454 } 1455 1456 if (reverse) { 1457 carry = temp_rhs.subtractSignificand 1458 (*this, lost_fraction != lfExactlyZero); 1459 copySignificand(temp_rhs); 1460 sign = !sign; 1461 } else { 1462 carry = subtractSignificand 1463 (temp_rhs, lost_fraction != lfExactlyZero); 1464 } 1465 1466 /* Invert the lost fraction - it was on the RHS and 1467 subtracted. */ 1468 if (lost_fraction == lfLessThanHalf) 1469 lost_fraction = lfMoreThanHalf; 1470 else if (lost_fraction == lfMoreThanHalf) 1471 lost_fraction = lfLessThanHalf; 1472 1473 /* The code above is intended to ensure that no borrow is 1474 necessary. */ 1475 assert(!carry); 1476 (void)carry; 1477 } else { 1478 if (bits > 0) { 1479 APFloat temp_rhs(rhs); 1480 1481 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1482 carry = addSignificand(temp_rhs); 1483 } else { 1484 lost_fraction = shiftSignificandRight(-bits); 1485 carry = addSignificand(rhs); 1486 } 1487 1488 /* We have a guard bit; generating a carry cannot happen. */ 1489 assert(!carry); 1490 (void)carry; 1491 } 1492 1493 return lost_fraction; 1494 } 1495 1496 APFloat::opStatus 1497 APFloat::multiplySpecials(const APFloat &rhs) 1498 { 1499 switch (PackCategoriesIntoKey(category, rhs.category)) { 1500 default: 1501 llvm_unreachable(nullptr); 1502 1503 case PackCategoriesIntoKey(fcNaN, fcZero): 1504 case PackCategoriesIntoKey(fcNaN, fcNormal): 1505 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1506 case PackCategoriesIntoKey(fcNaN, fcNaN): 1507 sign = false; 1508 return opOK; 1509 1510 case PackCategoriesIntoKey(fcZero, fcNaN): 1511 case PackCategoriesIntoKey(fcNormal, fcNaN): 1512 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1513 sign = false; 1514 category = fcNaN; 1515 copySignificand(rhs); 1516 return opOK; 1517 1518 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1519 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1520 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1521 category = fcInfinity; 1522 return opOK; 1523 1524 case PackCategoriesIntoKey(fcZero, fcNormal): 1525 case PackCategoriesIntoKey(fcNormal, fcZero): 1526 case PackCategoriesIntoKey(fcZero, fcZero): 1527 category = fcZero; 1528 return opOK; 1529 1530 case PackCategoriesIntoKey(fcZero, fcInfinity): 1531 case PackCategoriesIntoKey(fcInfinity, fcZero): 1532 makeNaN(); 1533 return opInvalidOp; 1534 1535 case PackCategoriesIntoKey(fcNormal, fcNormal): 1536 return opOK; 1537 } 1538 } 1539 1540 APFloat::opStatus 1541 APFloat::divideSpecials(const APFloat &rhs) 1542 { 1543 switch (PackCategoriesIntoKey(category, rhs.category)) { 1544 default: 1545 llvm_unreachable(nullptr); 1546 1547 case PackCategoriesIntoKey(fcZero, fcNaN): 1548 case PackCategoriesIntoKey(fcNormal, fcNaN): 1549 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1550 category = fcNaN; 1551 copySignificand(rhs); 1552 case PackCategoriesIntoKey(fcNaN, fcZero): 1553 case PackCategoriesIntoKey(fcNaN, fcNormal): 1554 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1555 case PackCategoriesIntoKey(fcNaN, fcNaN): 1556 sign = false; 1557 case PackCategoriesIntoKey(fcInfinity, fcZero): 1558 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1559 case PackCategoriesIntoKey(fcZero, fcInfinity): 1560 case PackCategoriesIntoKey(fcZero, fcNormal): 1561 return opOK; 1562 1563 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1564 category = fcZero; 1565 return opOK; 1566 1567 case PackCategoriesIntoKey(fcNormal, fcZero): 1568 category = fcInfinity; 1569 return opDivByZero; 1570 1571 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1572 case PackCategoriesIntoKey(fcZero, fcZero): 1573 makeNaN(); 1574 return opInvalidOp; 1575 1576 case PackCategoriesIntoKey(fcNormal, fcNormal): 1577 return opOK; 1578 } 1579 } 1580 1581 APFloat::opStatus 1582 APFloat::modSpecials(const APFloat &rhs) 1583 { 1584 switch (PackCategoriesIntoKey(category, rhs.category)) { 1585 default: 1586 llvm_unreachable(nullptr); 1587 1588 case PackCategoriesIntoKey(fcNaN, fcZero): 1589 case PackCategoriesIntoKey(fcNaN, fcNormal): 1590 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1591 case PackCategoriesIntoKey(fcNaN, fcNaN): 1592 case PackCategoriesIntoKey(fcZero, fcInfinity): 1593 case PackCategoriesIntoKey(fcZero, fcNormal): 1594 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1595 return opOK; 1596 1597 case PackCategoriesIntoKey(fcZero, fcNaN): 1598 case PackCategoriesIntoKey(fcNormal, fcNaN): 1599 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1600 sign = false; 1601 category = fcNaN; 1602 copySignificand(rhs); 1603 return opOK; 1604 1605 case PackCategoriesIntoKey(fcNormal, fcZero): 1606 case PackCategoriesIntoKey(fcInfinity, fcZero): 1607 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1608 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1609 case PackCategoriesIntoKey(fcZero, fcZero): 1610 makeNaN(); 1611 return opInvalidOp; 1612 1613 case PackCategoriesIntoKey(fcNormal, fcNormal): 1614 return opOK; 1615 } 1616 } 1617 1618 /* Change sign. */ 1619 void 1620 APFloat::changeSign() 1621 { 1622 /* Look mummy, this one's easy. */ 1623 sign = !sign; 1624 } 1625 1626 void 1627 APFloat::clearSign() 1628 { 1629 /* So is this one. */ 1630 sign = 0; 1631 } 1632 1633 void 1634 APFloat::copySign(const APFloat &rhs) 1635 { 1636 /* And this one. */ 1637 sign = rhs.sign; 1638 } 1639 1640 /* Normalized addition or subtraction. */ 1641 APFloat::opStatus 1642 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1643 bool subtract) 1644 { 1645 opStatus fs; 1646 1647 fs = addOrSubtractSpecials(rhs, subtract); 1648 1649 /* This return code means it was not a simple case. */ 1650 if (fs == opDivByZero) { 1651 lostFraction lost_fraction; 1652 1653 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1654 fs = normalize(rounding_mode, lost_fraction); 1655 1656 /* Can only be zero if we lost no fraction. */ 1657 assert(category != fcZero || lost_fraction == lfExactlyZero); 1658 } 1659 1660 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1661 positive zero unless rounding to minus infinity, except that 1662 adding two like-signed zeroes gives that zero. */ 1663 if (category == fcZero) { 1664 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1665 sign = (rounding_mode == rmTowardNegative); 1666 } 1667 1668 return fs; 1669 } 1670 1671 /* Normalized addition. */ 1672 APFloat::opStatus 1673 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1674 { 1675 return addOrSubtract(rhs, rounding_mode, false); 1676 } 1677 1678 /* Normalized subtraction. */ 1679 APFloat::opStatus 1680 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1681 { 1682 return addOrSubtract(rhs, rounding_mode, true); 1683 } 1684 1685 /* Normalized multiply. */ 1686 APFloat::opStatus 1687 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1688 { 1689 opStatus fs; 1690 1691 sign ^= rhs.sign; 1692 fs = multiplySpecials(rhs); 1693 1694 if (isFiniteNonZero()) { 1695 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1696 fs = normalize(rounding_mode, lost_fraction); 1697 if (lost_fraction != lfExactlyZero) 1698 fs = (opStatus) (fs | opInexact); 1699 } 1700 1701 return fs; 1702 } 1703 1704 /* Normalized divide. */ 1705 APFloat::opStatus 1706 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1707 { 1708 opStatus fs; 1709 1710 sign ^= rhs.sign; 1711 fs = divideSpecials(rhs); 1712 1713 if (isFiniteNonZero()) { 1714 lostFraction lost_fraction = divideSignificand(rhs); 1715 fs = normalize(rounding_mode, lost_fraction); 1716 if (lost_fraction != lfExactlyZero) 1717 fs = (opStatus) (fs | opInexact); 1718 } 1719 1720 return fs; 1721 } 1722 1723 /* Normalized remainder. This is not currently correct in all cases. */ 1724 APFloat::opStatus 1725 APFloat::remainder(const APFloat &rhs) 1726 { 1727 opStatus fs; 1728 APFloat V = *this; 1729 unsigned int origSign = sign; 1730 1731 fs = V.divide(rhs, rmNearestTiesToEven); 1732 if (fs == opDivByZero) 1733 return fs; 1734 1735 int parts = partCount(); 1736 integerPart *x = new integerPart[parts]; 1737 bool ignored; 1738 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1739 rmNearestTiesToEven, &ignored); 1740 if (fs==opInvalidOp) 1741 return fs; 1742 1743 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1744 rmNearestTiesToEven); 1745 assert(fs==opOK); // should always work 1746 1747 fs = V.multiply(rhs, rmNearestTiesToEven); 1748 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1749 1750 fs = subtract(V, rmNearestTiesToEven); 1751 assert(fs==opOK || fs==opInexact); // likewise 1752 1753 if (isZero()) 1754 sign = origSign; // IEEE754 requires this 1755 delete[] x; 1756 return fs; 1757 } 1758 1759 /* Normalized llvm frem (C fmod). 1760 This is not currently correct in all cases. */ 1761 APFloat::opStatus 1762 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1763 { 1764 opStatus fs; 1765 fs = modSpecials(rhs); 1766 1767 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1768 APFloat V = *this; 1769 unsigned int origSign = sign; 1770 1771 fs = V.divide(rhs, rmNearestTiesToEven); 1772 if (fs == opDivByZero) 1773 return fs; 1774 1775 int parts = partCount(); 1776 integerPart *x = new integerPart[parts]; 1777 bool ignored; 1778 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1779 rmTowardZero, &ignored); 1780 if (fs==opInvalidOp) 1781 return fs; 1782 1783 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1784 rmNearestTiesToEven); 1785 assert(fs==opOK); // should always work 1786 1787 fs = V.multiply(rhs, rounding_mode); 1788 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1789 1790 fs = subtract(V, rounding_mode); 1791 assert(fs==opOK || fs==opInexact); // likewise 1792 1793 if (isZero()) 1794 sign = origSign; // IEEE754 requires this 1795 delete[] x; 1796 } 1797 return fs; 1798 } 1799 1800 /* Normalized fused-multiply-add. */ 1801 APFloat::opStatus 1802 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1803 const APFloat &addend, 1804 roundingMode rounding_mode) 1805 { 1806 opStatus fs; 1807 1808 /* Post-multiplication sign, before addition. */ 1809 sign ^= multiplicand.sign; 1810 1811 /* If and only if all arguments are normal do we need to do an 1812 extended-precision calculation. */ 1813 if (isFiniteNonZero() && 1814 multiplicand.isFiniteNonZero() && 1815 addend.isFinite()) { 1816 lostFraction lost_fraction; 1817 1818 lost_fraction = multiplySignificand(multiplicand, &addend); 1819 fs = normalize(rounding_mode, lost_fraction); 1820 if (lost_fraction != lfExactlyZero) 1821 fs = (opStatus) (fs | opInexact); 1822 1823 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1824 positive zero unless rounding to minus infinity, except that 1825 adding two like-signed zeroes gives that zero. */ 1826 if (category == fcZero && sign != addend.sign) 1827 sign = (rounding_mode == rmTowardNegative); 1828 } else { 1829 fs = multiplySpecials(multiplicand); 1830 1831 /* FS can only be opOK or opInvalidOp. There is no more work 1832 to do in the latter case. The IEEE-754R standard says it is 1833 implementation-defined in this case whether, if ADDEND is a 1834 quiet NaN, we raise invalid op; this implementation does so. 1835 1836 If we need to do the addition we can do so with normal 1837 precision. */ 1838 if (fs == opOK) 1839 fs = addOrSubtract(addend, rounding_mode, false); 1840 } 1841 1842 return fs; 1843 } 1844 1845 /* Rounding-mode corrrect round to integral value. */ 1846 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1847 opStatus fs; 1848 1849 // If the exponent is large enough, we know that this value is already 1850 // integral, and the arithmetic below would potentially cause it to saturate 1851 // to +/-Inf. Bail out early instead. 1852 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1853 return opOK; 1854 1855 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1856 // precision of our format, and then subtract it back off again. The choice 1857 // of rounding modes for the addition/subtraction determines the rounding mode 1858 // for our integral rounding as well. 1859 // NOTE: When the input value is negative, we do subtraction followed by 1860 // addition instead. 1861 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1862 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1863 APFloat MagicConstant(*semantics); 1864 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1865 rmNearestTiesToEven); 1866 MagicConstant.copySign(*this); 1867 1868 if (fs != opOK) 1869 return fs; 1870 1871 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1872 bool inputSign = isNegative(); 1873 1874 fs = add(MagicConstant, rounding_mode); 1875 if (fs != opOK && fs != opInexact) 1876 return fs; 1877 1878 fs = subtract(MagicConstant, rounding_mode); 1879 1880 // Restore the input sign. 1881 if (inputSign != isNegative()) 1882 changeSign(); 1883 1884 return fs; 1885 } 1886 1887 1888 /* Comparison requires normalized numbers. */ 1889 APFloat::cmpResult 1890 APFloat::compare(const APFloat &rhs) const 1891 { 1892 cmpResult result; 1893 1894 assert(semantics == rhs.semantics); 1895 1896 switch (PackCategoriesIntoKey(category, rhs.category)) { 1897 default: 1898 llvm_unreachable(nullptr); 1899 1900 case PackCategoriesIntoKey(fcNaN, fcZero): 1901 case PackCategoriesIntoKey(fcNaN, fcNormal): 1902 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1903 case PackCategoriesIntoKey(fcNaN, fcNaN): 1904 case PackCategoriesIntoKey(fcZero, fcNaN): 1905 case PackCategoriesIntoKey(fcNormal, fcNaN): 1906 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1907 return cmpUnordered; 1908 1909 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1910 case PackCategoriesIntoKey(fcInfinity, fcZero): 1911 case PackCategoriesIntoKey(fcNormal, fcZero): 1912 if (sign) 1913 return cmpLessThan; 1914 else 1915 return cmpGreaterThan; 1916 1917 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1918 case PackCategoriesIntoKey(fcZero, fcInfinity): 1919 case PackCategoriesIntoKey(fcZero, fcNormal): 1920 if (rhs.sign) 1921 return cmpGreaterThan; 1922 else 1923 return cmpLessThan; 1924 1925 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1926 if (sign == rhs.sign) 1927 return cmpEqual; 1928 else if (sign) 1929 return cmpLessThan; 1930 else 1931 return cmpGreaterThan; 1932 1933 case PackCategoriesIntoKey(fcZero, fcZero): 1934 return cmpEqual; 1935 1936 case PackCategoriesIntoKey(fcNormal, fcNormal): 1937 break; 1938 } 1939 1940 /* Two normal numbers. Do they have the same sign? */ 1941 if (sign != rhs.sign) { 1942 if (sign) 1943 result = cmpLessThan; 1944 else 1945 result = cmpGreaterThan; 1946 } else { 1947 /* Compare absolute values; invert result if negative. */ 1948 result = compareAbsoluteValue(rhs); 1949 1950 if (sign) { 1951 if (result == cmpLessThan) 1952 result = cmpGreaterThan; 1953 else if (result == cmpGreaterThan) 1954 result = cmpLessThan; 1955 } 1956 } 1957 1958 return result; 1959 } 1960 1961 /// APFloat::convert - convert a value of one floating point type to another. 1962 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1963 /// records whether the transformation lost information, i.e. whether 1964 /// converting the result back to the original type will produce the 1965 /// original value (this is almost the same as return value==fsOK, but there 1966 /// are edge cases where this is not so). 1967 1968 APFloat::opStatus 1969 APFloat::convert(const fltSemantics &toSemantics, 1970 roundingMode rounding_mode, bool *losesInfo) 1971 { 1972 lostFraction lostFraction; 1973 unsigned int newPartCount, oldPartCount; 1974 opStatus fs; 1975 int shift; 1976 const fltSemantics &fromSemantics = *semantics; 1977 1978 lostFraction = lfExactlyZero; 1979 newPartCount = partCountForBits(toSemantics.precision + 1); 1980 oldPartCount = partCount(); 1981 shift = toSemantics.precision - fromSemantics.precision; 1982 1983 bool X86SpecialNan = false; 1984 if (&fromSemantics == &APFloat::x87DoubleExtended && 1985 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1986 (!(*significandParts() & 0x8000000000000000ULL) || 1987 !(*significandParts() & 0x4000000000000000ULL))) { 1988 // x86 has some unusual NaNs which cannot be represented in any other 1989 // format; note them here. 1990 X86SpecialNan = true; 1991 } 1992 1993 // If this is a truncation of a denormal number, and the target semantics 1994 // has larger exponent range than the source semantics (this can happen 1995 // when truncating from PowerPC double-double to double format), the 1996 // right shift could lose result mantissa bits. Adjust exponent instead 1997 // of performing excessive shift. 1998 if (shift < 0 && isFiniteNonZero()) { 1999 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 2000 if (exponent + exponentChange < toSemantics.minExponent) 2001 exponentChange = toSemantics.minExponent - exponent; 2002 if (exponentChange < shift) 2003 exponentChange = shift; 2004 if (exponentChange < 0) { 2005 shift -= exponentChange; 2006 exponent += exponentChange; 2007 } 2008 } 2009 2010 // If this is a truncation, perform the shift before we narrow the storage. 2011 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2012 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2013 2014 // Fix the storage so it can hold to new value. 2015 if (newPartCount > oldPartCount) { 2016 // The new type requires more storage; make it available. 2017 integerPart *newParts; 2018 newParts = new integerPart[newPartCount]; 2019 APInt::tcSet(newParts, 0, newPartCount); 2020 if (isFiniteNonZero() || category==fcNaN) 2021 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2022 freeSignificand(); 2023 significand.parts = newParts; 2024 } else if (newPartCount == 1 && oldPartCount != 1) { 2025 // Switch to built-in storage for a single part. 2026 integerPart newPart = 0; 2027 if (isFiniteNonZero() || category==fcNaN) 2028 newPart = significandParts()[0]; 2029 freeSignificand(); 2030 significand.part = newPart; 2031 } 2032 2033 // Now that we have the right storage, switch the semantics. 2034 semantics = &toSemantics; 2035 2036 // If this is an extension, perform the shift now that the storage is 2037 // available. 2038 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2039 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2040 2041 if (isFiniteNonZero()) { 2042 fs = normalize(rounding_mode, lostFraction); 2043 *losesInfo = (fs != opOK); 2044 } else if (category == fcNaN) { 2045 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2046 2047 // For x87 extended precision, we want to make a NaN, not a special NaN if 2048 // the input wasn't special either. 2049 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2050 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2051 2052 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2053 // does not give you back the same bits. This is dubious, and we 2054 // don't currently do it. You're really supposed to get 2055 // an invalid operation signal at runtime, but nobody does that. 2056 fs = opOK; 2057 } else { 2058 *losesInfo = false; 2059 fs = opOK; 2060 } 2061 2062 return fs; 2063 } 2064 2065 /* Convert a floating point number to an integer according to the 2066 rounding mode. If the rounded integer value is out of range this 2067 returns an invalid operation exception and the contents of the 2068 destination parts are unspecified. If the rounded value is in 2069 range but the floating point number is not the exact integer, the C 2070 standard doesn't require an inexact exception to be raised. IEEE 2071 854 does require it so we do that. 2072 2073 Note that for conversions to integer type the C standard requires 2074 round-to-zero to always be used. */ 2075 APFloat::opStatus 2076 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2077 bool isSigned, 2078 roundingMode rounding_mode, 2079 bool *isExact) const 2080 { 2081 lostFraction lost_fraction; 2082 const integerPart *src; 2083 unsigned int dstPartsCount, truncatedBits; 2084 2085 *isExact = false; 2086 2087 /* Handle the three special cases first. */ 2088 if (category == fcInfinity || category == fcNaN) 2089 return opInvalidOp; 2090 2091 dstPartsCount = partCountForBits(width); 2092 2093 if (category == fcZero) { 2094 APInt::tcSet(parts, 0, dstPartsCount); 2095 // Negative zero can't be represented as an int. 2096 *isExact = !sign; 2097 return opOK; 2098 } 2099 2100 src = significandParts(); 2101 2102 /* Step 1: place our absolute value, with any fraction truncated, in 2103 the destination. */ 2104 if (exponent < 0) { 2105 /* Our absolute value is less than one; truncate everything. */ 2106 APInt::tcSet(parts, 0, dstPartsCount); 2107 /* For exponent -1 the integer bit represents .5, look at that. 2108 For smaller exponents leftmost truncated bit is 0. */ 2109 truncatedBits = semantics->precision -1U - exponent; 2110 } else { 2111 /* We want the most significant (exponent + 1) bits; the rest are 2112 truncated. */ 2113 unsigned int bits = exponent + 1U; 2114 2115 /* Hopelessly large in magnitude? */ 2116 if (bits > width) 2117 return opInvalidOp; 2118 2119 if (bits < semantics->precision) { 2120 /* We truncate (semantics->precision - bits) bits. */ 2121 truncatedBits = semantics->precision - bits; 2122 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2123 } else { 2124 /* We want at least as many bits as are available. */ 2125 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2126 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2127 truncatedBits = 0; 2128 } 2129 } 2130 2131 /* Step 2: work out any lost fraction, and increment the absolute 2132 value if we would round away from zero. */ 2133 if (truncatedBits) { 2134 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2135 truncatedBits); 2136 if (lost_fraction != lfExactlyZero && 2137 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2138 if (APInt::tcIncrement(parts, dstPartsCount)) 2139 return opInvalidOp; /* Overflow. */ 2140 } 2141 } else { 2142 lost_fraction = lfExactlyZero; 2143 } 2144 2145 /* Step 3: check if we fit in the destination. */ 2146 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2147 2148 if (sign) { 2149 if (!isSigned) { 2150 /* Negative numbers cannot be represented as unsigned. */ 2151 if (omsb != 0) 2152 return opInvalidOp; 2153 } else { 2154 /* It takes omsb bits to represent the unsigned integer value. 2155 We lose a bit for the sign, but care is needed as the 2156 maximally negative integer is a special case. */ 2157 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2158 return opInvalidOp; 2159 2160 /* This case can happen because of rounding. */ 2161 if (omsb > width) 2162 return opInvalidOp; 2163 } 2164 2165 APInt::tcNegate (parts, dstPartsCount); 2166 } else { 2167 if (omsb >= width + !isSigned) 2168 return opInvalidOp; 2169 } 2170 2171 if (lost_fraction == lfExactlyZero) { 2172 *isExact = true; 2173 return opOK; 2174 } else 2175 return opInexact; 2176 } 2177 2178 /* Same as convertToSignExtendedInteger, except we provide 2179 deterministic values in case of an invalid operation exception, 2180 namely zero for NaNs and the minimal or maximal value respectively 2181 for underflow or overflow. 2182 The *isExact output tells whether the result is exact, in the sense 2183 that converting it back to the original floating point type produces 2184 the original value. This is almost equivalent to result==opOK, 2185 except for negative zeroes. 2186 */ 2187 APFloat::opStatus 2188 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2189 bool isSigned, 2190 roundingMode rounding_mode, bool *isExact) const 2191 { 2192 opStatus fs; 2193 2194 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2195 isExact); 2196 2197 if (fs == opInvalidOp) { 2198 unsigned int bits, dstPartsCount; 2199 2200 dstPartsCount = partCountForBits(width); 2201 2202 if (category == fcNaN) 2203 bits = 0; 2204 else if (sign) 2205 bits = isSigned; 2206 else 2207 bits = width - isSigned; 2208 2209 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2210 if (sign && isSigned) 2211 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2212 } 2213 2214 return fs; 2215 } 2216 2217 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2218 an APSInt, whose initial bit-width and signed-ness are used to determine the 2219 precision of the conversion. 2220 */ 2221 APFloat::opStatus 2222 APFloat::convertToInteger(APSInt &result, 2223 roundingMode rounding_mode, bool *isExact) const 2224 { 2225 unsigned bitWidth = result.getBitWidth(); 2226 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2227 opStatus status = convertToInteger( 2228 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2229 // Keeps the original signed-ness. 2230 result = APInt(bitWidth, parts); 2231 return status; 2232 } 2233 2234 /* Convert an unsigned integer SRC to a floating point number, 2235 rounding according to ROUNDING_MODE. The sign of the floating 2236 point number is not modified. */ 2237 APFloat::opStatus 2238 APFloat::convertFromUnsignedParts(const integerPart *src, 2239 unsigned int srcCount, 2240 roundingMode rounding_mode) 2241 { 2242 unsigned int omsb, precision, dstCount; 2243 integerPart *dst; 2244 lostFraction lost_fraction; 2245 2246 category = fcNormal; 2247 omsb = APInt::tcMSB(src, srcCount) + 1; 2248 dst = significandParts(); 2249 dstCount = partCount(); 2250 precision = semantics->precision; 2251 2252 /* We want the most significant PRECISION bits of SRC. There may not 2253 be that many; extract what we can. */ 2254 if (precision <= omsb) { 2255 exponent = omsb - 1; 2256 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2257 omsb - precision); 2258 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2259 } else { 2260 exponent = precision - 1; 2261 lost_fraction = lfExactlyZero; 2262 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2263 } 2264 2265 return normalize(rounding_mode, lost_fraction); 2266 } 2267 2268 APFloat::opStatus 2269 APFloat::convertFromAPInt(const APInt &Val, 2270 bool isSigned, 2271 roundingMode rounding_mode) 2272 { 2273 unsigned int partCount = Val.getNumWords(); 2274 APInt api = Val; 2275 2276 sign = false; 2277 if (isSigned && api.isNegative()) { 2278 sign = true; 2279 api = -api; 2280 } 2281 2282 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2283 } 2284 2285 /* Convert a two's complement integer SRC to a floating point number, 2286 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2287 integer is signed, in which case it must be sign-extended. */ 2288 APFloat::opStatus 2289 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2290 unsigned int srcCount, 2291 bool isSigned, 2292 roundingMode rounding_mode) 2293 { 2294 opStatus status; 2295 2296 if (isSigned && 2297 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2298 integerPart *copy; 2299 2300 /* If we're signed and negative negate a copy. */ 2301 sign = true; 2302 copy = new integerPart[srcCount]; 2303 APInt::tcAssign(copy, src, srcCount); 2304 APInt::tcNegate(copy, srcCount); 2305 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2306 delete [] copy; 2307 } else { 2308 sign = false; 2309 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2310 } 2311 2312 return status; 2313 } 2314 2315 /* FIXME: should this just take a const APInt reference? */ 2316 APFloat::opStatus 2317 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2318 unsigned int width, bool isSigned, 2319 roundingMode rounding_mode) 2320 { 2321 unsigned int partCount = partCountForBits(width); 2322 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2323 2324 sign = false; 2325 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2326 sign = true; 2327 api = -api; 2328 } 2329 2330 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2331 } 2332 2333 APFloat::opStatus 2334 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2335 { 2336 lostFraction lost_fraction = lfExactlyZero; 2337 2338 category = fcNormal; 2339 zeroSignificand(); 2340 exponent = 0; 2341 2342 integerPart *significand = significandParts(); 2343 unsigned partsCount = partCount(); 2344 unsigned bitPos = partsCount * integerPartWidth; 2345 bool computedTrailingFraction = false; 2346 2347 // Skip leading zeroes and any (hexa)decimal point. 2348 StringRef::iterator begin = s.begin(); 2349 StringRef::iterator end = s.end(); 2350 StringRef::iterator dot; 2351 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2352 StringRef::iterator firstSignificantDigit = p; 2353 2354 while (p != end) { 2355 integerPart hex_value; 2356 2357 if (*p == '.') { 2358 assert(dot == end && "String contains multiple dots"); 2359 dot = p++; 2360 continue; 2361 } 2362 2363 hex_value = hexDigitValue(*p); 2364 if (hex_value == -1U) 2365 break; 2366 2367 p++; 2368 2369 // Store the number while we have space. 2370 if (bitPos) { 2371 bitPos -= 4; 2372 hex_value <<= bitPos % integerPartWidth; 2373 significand[bitPos / integerPartWidth] |= hex_value; 2374 } else if (!computedTrailingFraction) { 2375 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2376 computedTrailingFraction = true; 2377 } 2378 } 2379 2380 /* Hex floats require an exponent but not a hexadecimal point. */ 2381 assert(p != end && "Hex strings require an exponent"); 2382 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2383 assert(p != begin && "Significand has no digits"); 2384 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2385 2386 /* Ignore the exponent if we are zero. */ 2387 if (p != firstSignificantDigit) { 2388 int expAdjustment; 2389 2390 /* Implicit hexadecimal point? */ 2391 if (dot == end) 2392 dot = p; 2393 2394 /* Calculate the exponent adjustment implicit in the number of 2395 significant digits. */ 2396 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2397 if (expAdjustment < 0) 2398 expAdjustment++; 2399 expAdjustment = expAdjustment * 4 - 1; 2400 2401 /* Adjust for writing the significand starting at the most 2402 significant nibble. */ 2403 expAdjustment += semantics->precision; 2404 expAdjustment -= partsCount * integerPartWidth; 2405 2406 /* Adjust for the given exponent. */ 2407 exponent = totalExponent(p + 1, end, expAdjustment); 2408 } 2409 2410 return normalize(rounding_mode, lost_fraction); 2411 } 2412 2413 APFloat::opStatus 2414 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2415 unsigned sigPartCount, int exp, 2416 roundingMode rounding_mode) 2417 { 2418 unsigned int parts, pow5PartCount; 2419 fltSemantics calcSemantics = { 32767, -32767, 0 }; 2420 integerPart pow5Parts[maxPowerOfFiveParts]; 2421 bool isNearest; 2422 2423 isNearest = (rounding_mode == rmNearestTiesToEven || 2424 rounding_mode == rmNearestTiesToAway); 2425 2426 parts = partCountForBits(semantics->precision + 11); 2427 2428 /* Calculate pow(5, abs(exp)). */ 2429 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2430 2431 for (;; parts *= 2) { 2432 opStatus sigStatus, powStatus; 2433 unsigned int excessPrecision, truncatedBits; 2434 2435 calcSemantics.precision = parts * integerPartWidth - 1; 2436 excessPrecision = calcSemantics.precision - semantics->precision; 2437 truncatedBits = excessPrecision; 2438 2439 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2440 APFloat pow5(calcSemantics); 2441 2442 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2443 rmNearestTiesToEven); 2444 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2445 rmNearestTiesToEven); 2446 /* Add exp, as 10^n = 5^n * 2^n. */ 2447 decSig.exponent += exp; 2448 2449 lostFraction calcLostFraction; 2450 integerPart HUerr, HUdistance; 2451 unsigned int powHUerr; 2452 2453 if (exp >= 0) { 2454 /* multiplySignificand leaves the precision-th bit set to 1. */ 2455 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2456 powHUerr = powStatus != opOK; 2457 } else { 2458 calcLostFraction = decSig.divideSignificand(pow5); 2459 /* Denormal numbers have less precision. */ 2460 if (decSig.exponent < semantics->minExponent) { 2461 excessPrecision += (semantics->minExponent - decSig.exponent); 2462 truncatedBits = excessPrecision; 2463 if (excessPrecision > calcSemantics.precision) 2464 excessPrecision = calcSemantics.precision; 2465 } 2466 /* Extra half-ulp lost in reciprocal of exponent. */ 2467 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2468 } 2469 2470 /* Both multiplySignificand and divideSignificand return the 2471 result with the integer bit set. */ 2472 assert(APInt::tcExtractBit 2473 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2474 2475 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2476 powHUerr); 2477 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2478 excessPrecision, isNearest); 2479 2480 /* Are we guaranteed to round correctly if we truncate? */ 2481 if (HUdistance >= HUerr) { 2482 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2483 calcSemantics.precision - excessPrecision, 2484 excessPrecision); 2485 /* Take the exponent of decSig. If we tcExtract-ed less bits 2486 above we must adjust our exponent to compensate for the 2487 implicit right shift. */ 2488 exponent = (decSig.exponent + semantics->precision 2489 - (calcSemantics.precision - excessPrecision)); 2490 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2491 decSig.partCount(), 2492 truncatedBits); 2493 return normalize(rounding_mode, calcLostFraction); 2494 } 2495 } 2496 } 2497 2498 APFloat::opStatus 2499 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2500 { 2501 decimalInfo D; 2502 opStatus fs; 2503 2504 /* Scan the text. */ 2505 StringRef::iterator p = str.begin(); 2506 interpretDecimal(p, str.end(), &D); 2507 2508 /* Handle the quick cases. First the case of no significant digits, 2509 i.e. zero, and then exponents that are obviously too large or too 2510 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2511 definitely overflows if 2512 2513 (exp - 1) * L >= maxExponent 2514 2515 and definitely underflows to zero where 2516 2517 (exp + 1) * L <= minExponent - precision 2518 2519 With integer arithmetic the tightest bounds for L are 2520 2521 93/28 < L < 196/59 [ numerator <= 256 ] 2522 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2523 */ 2524 2525 // Test if we have a zero number allowing for strings with no null terminators 2526 // and zero decimals with non-zero exponents. 2527 // 2528 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2529 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2530 // be at most one dot. On the other hand, if we have a zero with a non-zero 2531 // exponent, then we know that D.firstSigDigit will be non-numeric. 2532 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2533 category = fcZero; 2534 fs = opOK; 2535 2536 /* Check whether the normalized exponent is high enough to overflow 2537 max during the log-rebasing in the max-exponent check below. */ 2538 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2539 fs = handleOverflow(rounding_mode); 2540 2541 /* If it wasn't, then it also wasn't high enough to overflow max 2542 during the log-rebasing in the min-exponent check. Check that it 2543 won't overflow min in either check, then perform the min-exponent 2544 check. */ 2545 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2546 (D.normalizedExponent + 1) * 28738 <= 2547 8651 * (semantics->minExponent - (int) semantics->precision)) { 2548 /* Underflow to zero and round. */ 2549 category = fcNormal; 2550 zeroSignificand(); 2551 fs = normalize(rounding_mode, lfLessThanHalf); 2552 2553 /* We can finally safely perform the max-exponent check. */ 2554 } else if ((D.normalizedExponent - 1) * 42039 2555 >= 12655 * semantics->maxExponent) { 2556 /* Overflow and round. */ 2557 fs = handleOverflow(rounding_mode); 2558 } else { 2559 integerPart *decSignificand; 2560 unsigned int partCount; 2561 2562 /* A tight upper bound on number of bits required to hold an 2563 N-digit decimal integer is N * 196 / 59. Allocate enough space 2564 to hold the full significand, and an extra part required by 2565 tcMultiplyPart. */ 2566 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2567 partCount = partCountForBits(1 + 196 * partCount / 59); 2568 decSignificand = new integerPart[partCount + 1]; 2569 partCount = 0; 2570 2571 /* Convert to binary efficiently - we do almost all multiplication 2572 in an integerPart. When this would overflow do we do a single 2573 bignum multiplication, and then revert again to multiplication 2574 in an integerPart. */ 2575 do { 2576 integerPart decValue, val, multiplier; 2577 2578 val = 0; 2579 multiplier = 1; 2580 2581 do { 2582 if (*p == '.') { 2583 p++; 2584 if (p == str.end()) { 2585 break; 2586 } 2587 } 2588 decValue = decDigitValue(*p++); 2589 assert(decValue < 10U && "Invalid character in significand"); 2590 multiplier *= 10; 2591 val = val * 10 + decValue; 2592 /* The maximum number that can be multiplied by ten with any 2593 digit added without overflowing an integerPart. */ 2594 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2595 2596 /* Multiply out the current part. */ 2597 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2598 partCount, partCount + 1, false); 2599 2600 /* If we used another part (likely but not guaranteed), increase 2601 the count. */ 2602 if (decSignificand[partCount]) 2603 partCount++; 2604 } while (p <= D.lastSigDigit); 2605 2606 category = fcNormal; 2607 fs = roundSignificandWithExponent(decSignificand, partCount, 2608 D.exponent, rounding_mode); 2609 2610 delete [] decSignificand; 2611 } 2612 2613 return fs; 2614 } 2615 2616 bool 2617 APFloat::convertFromStringSpecials(StringRef str) { 2618 if (str.equals("inf") || str.equals("INFINITY")) { 2619 makeInf(false); 2620 return true; 2621 } 2622 2623 if (str.equals("-inf") || str.equals("-INFINITY")) { 2624 makeInf(true); 2625 return true; 2626 } 2627 2628 if (str.equals("nan") || str.equals("NaN")) { 2629 makeNaN(false, false); 2630 return true; 2631 } 2632 2633 if (str.equals("-nan") || str.equals("-NaN")) { 2634 makeNaN(false, true); 2635 return true; 2636 } 2637 2638 return false; 2639 } 2640 2641 APFloat::opStatus 2642 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2643 { 2644 assert(!str.empty() && "Invalid string length"); 2645 2646 // Handle special cases. 2647 if (convertFromStringSpecials(str)) 2648 return opOK; 2649 2650 /* Handle a leading minus sign. */ 2651 StringRef::iterator p = str.begin(); 2652 size_t slen = str.size(); 2653 sign = *p == '-' ? 1 : 0; 2654 if (*p == '-' || *p == '+') { 2655 p++; 2656 slen--; 2657 assert(slen && "String has no digits"); 2658 } 2659 2660 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2661 assert(slen - 2 && "Invalid string"); 2662 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2663 rounding_mode); 2664 } 2665 2666 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2667 } 2668 2669 /* Write out a hexadecimal representation of the floating point value 2670 to DST, which must be of sufficient size, in the C99 form 2671 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2672 excluding the terminating NUL. 2673 2674 If UPPERCASE, the output is in upper case, otherwise in lower case. 2675 2676 HEXDIGITS digits appear altogether, rounding the value if 2677 necessary. If HEXDIGITS is 0, the minimal precision to display the 2678 number precisely is used instead. If nothing would appear after 2679 the decimal point it is suppressed. 2680 2681 The decimal exponent is always printed and has at least one digit. 2682 Zero values display an exponent of zero. Infinities and NaNs 2683 appear as "infinity" or "nan" respectively. 2684 2685 The above rules are as specified by C99. There is ambiguity about 2686 what the leading hexadecimal digit should be. This implementation 2687 uses whatever is necessary so that the exponent is displayed as 2688 stored. This implies the exponent will fall within the IEEE format 2689 range, and the leading hexadecimal digit will be 0 (for denormals), 2690 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2691 any other digits zero). 2692 */ 2693 unsigned int 2694 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2695 bool upperCase, roundingMode rounding_mode) const 2696 { 2697 char *p; 2698 2699 p = dst; 2700 if (sign) 2701 *dst++ = '-'; 2702 2703 switch (category) { 2704 case fcInfinity: 2705 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2706 dst += sizeof infinityL - 1; 2707 break; 2708 2709 case fcNaN: 2710 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2711 dst += sizeof NaNU - 1; 2712 break; 2713 2714 case fcZero: 2715 *dst++ = '0'; 2716 *dst++ = upperCase ? 'X': 'x'; 2717 *dst++ = '0'; 2718 if (hexDigits > 1) { 2719 *dst++ = '.'; 2720 memset (dst, '0', hexDigits - 1); 2721 dst += hexDigits - 1; 2722 } 2723 *dst++ = upperCase ? 'P': 'p'; 2724 *dst++ = '0'; 2725 break; 2726 2727 case fcNormal: 2728 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2729 break; 2730 } 2731 2732 *dst = 0; 2733 2734 return static_cast<unsigned int>(dst - p); 2735 } 2736 2737 /* Does the hard work of outputting the correctly rounded hexadecimal 2738 form of a normal floating point number with the specified number of 2739 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2740 digits necessary to print the value precisely is output. */ 2741 char * 2742 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2743 bool upperCase, 2744 roundingMode rounding_mode) const 2745 { 2746 unsigned int count, valueBits, shift, partsCount, outputDigits; 2747 const char *hexDigitChars; 2748 const integerPart *significand; 2749 char *p; 2750 bool roundUp; 2751 2752 *dst++ = '0'; 2753 *dst++ = upperCase ? 'X': 'x'; 2754 2755 roundUp = false; 2756 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2757 2758 significand = significandParts(); 2759 partsCount = partCount(); 2760 2761 /* +3 because the first digit only uses the single integer bit, so 2762 we have 3 virtual zero most-significant-bits. */ 2763 valueBits = semantics->precision + 3; 2764 shift = integerPartWidth - valueBits % integerPartWidth; 2765 2766 /* The natural number of digits required ignoring trailing 2767 insignificant zeroes. */ 2768 outputDigits = (valueBits - significandLSB () + 3) / 4; 2769 2770 /* hexDigits of zero means use the required number for the 2771 precision. Otherwise, see if we are truncating. If we are, 2772 find out if we need to round away from zero. */ 2773 if (hexDigits) { 2774 if (hexDigits < outputDigits) { 2775 /* We are dropping non-zero bits, so need to check how to round. 2776 "bits" is the number of dropped bits. */ 2777 unsigned int bits; 2778 lostFraction fraction; 2779 2780 bits = valueBits - hexDigits * 4; 2781 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2782 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2783 } 2784 outputDigits = hexDigits; 2785 } 2786 2787 /* Write the digits consecutively, and start writing in the location 2788 of the hexadecimal point. We move the most significant digit 2789 left and add the hexadecimal point later. */ 2790 p = ++dst; 2791 2792 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2793 2794 while (outputDigits && count) { 2795 integerPart part; 2796 2797 /* Put the most significant integerPartWidth bits in "part". */ 2798 if (--count == partsCount) 2799 part = 0; /* An imaginary higher zero part. */ 2800 else 2801 part = significand[count] << shift; 2802 2803 if (count && shift) 2804 part |= significand[count - 1] >> (integerPartWidth - shift); 2805 2806 /* Convert as much of "part" to hexdigits as we can. */ 2807 unsigned int curDigits = integerPartWidth / 4; 2808 2809 if (curDigits > outputDigits) 2810 curDigits = outputDigits; 2811 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2812 outputDigits -= curDigits; 2813 } 2814 2815 if (roundUp) { 2816 char *q = dst; 2817 2818 /* Note that hexDigitChars has a trailing '0'. */ 2819 do { 2820 q--; 2821 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2822 } while (*q == '0'); 2823 assert(q >= p); 2824 } else { 2825 /* Add trailing zeroes. */ 2826 memset (dst, '0', outputDigits); 2827 dst += outputDigits; 2828 } 2829 2830 /* Move the most significant digit to before the point, and if there 2831 is something after the decimal point add it. This must come 2832 after rounding above. */ 2833 p[-1] = p[0]; 2834 if (dst -1 == p) 2835 dst--; 2836 else 2837 p[0] = '.'; 2838 2839 /* Finally output the exponent. */ 2840 *dst++ = upperCase ? 'P': 'p'; 2841 2842 return writeSignedDecimal (dst, exponent); 2843 } 2844 2845 hash_code llvm::hash_value(const APFloat &Arg) { 2846 if (!Arg.isFiniteNonZero()) 2847 return hash_combine((uint8_t)Arg.category, 2848 // NaN has no sign, fix it at zero. 2849 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2850 Arg.semantics->precision); 2851 2852 // Normal floats need their exponent and significand hashed. 2853 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2854 Arg.semantics->precision, Arg.exponent, 2855 hash_combine_range( 2856 Arg.significandParts(), 2857 Arg.significandParts() + Arg.partCount())); 2858 } 2859 2860 // Conversion from APFloat to/from host float/double. It may eventually be 2861 // possible to eliminate these and have everybody deal with APFloats, but that 2862 // will take a while. This approach will not easily extend to long double. 2863 // Current implementation requires integerPartWidth==64, which is correct at 2864 // the moment but could be made more general. 2865 2866 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2867 // the actual IEEE respresentations. We compensate for that here. 2868 2869 APInt 2870 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2871 { 2872 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2873 assert(partCount()==2); 2874 2875 uint64_t myexponent, mysignificand; 2876 2877 if (isFiniteNonZero()) { 2878 myexponent = exponent+16383; //bias 2879 mysignificand = significandParts()[0]; 2880 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2881 myexponent = 0; // denormal 2882 } else if (category==fcZero) { 2883 myexponent = 0; 2884 mysignificand = 0; 2885 } else if (category==fcInfinity) { 2886 myexponent = 0x7fff; 2887 mysignificand = 0x8000000000000000ULL; 2888 } else { 2889 assert(category == fcNaN && "Unknown category"); 2890 myexponent = 0x7fff; 2891 mysignificand = significandParts()[0]; 2892 } 2893 2894 uint64_t words[2]; 2895 words[0] = mysignificand; 2896 words[1] = ((uint64_t)(sign & 1) << 15) | 2897 (myexponent & 0x7fffLL); 2898 return APInt(80, words); 2899 } 2900 2901 APInt 2902 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2903 { 2904 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2905 assert(partCount()==2); 2906 2907 uint64_t words[2]; 2908 opStatus fs; 2909 bool losesInfo; 2910 2911 // Convert number to double. To avoid spurious underflows, we re- 2912 // normalize against the "double" minExponent first, and only *then* 2913 // truncate the mantissa. The result of that second conversion 2914 // may be inexact, but should never underflow. 2915 // Declare fltSemantics before APFloat that uses it (and 2916 // saves pointer to it) to ensure correct destruction order. 2917 fltSemantics extendedSemantics = *semantics; 2918 extendedSemantics.minExponent = IEEEdouble.minExponent; 2919 APFloat extended(*this); 2920 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2921 assert(fs == opOK && !losesInfo); 2922 (void)fs; 2923 2924 APFloat u(extended); 2925 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2926 assert(fs == opOK || fs == opInexact); 2927 (void)fs; 2928 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2929 2930 // If conversion was exact or resulted in a special case, we're done; 2931 // just set the second double to zero. Otherwise, re-convert back to 2932 // the extended format and compute the difference. This now should 2933 // convert exactly to double. 2934 if (u.isFiniteNonZero() && losesInfo) { 2935 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2936 assert(fs == opOK && !losesInfo); 2937 (void)fs; 2938 2939 APFloat v(extended); 2940 v.subtract(u, rmNearestTiesToEven); 2941 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2942 assert(fs == opOK && !losesInfo); 2943 (void)fs; 2944 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2945 } else { 2946 words[1] = 0; 2947 } 2948 2949 return APInt(128, words); 2950 } 2951 2952 APInt 2953 APFloat::convertQuadrupleAPFloatToAPInt() const 2954 { 2955 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2956 assert(partCount()==2); 2957 2958 uint64_t myexponent, mysignificand, mysignificand2; 2959 2960 if (isFiniteNonZero()) { 2961 myexponent = exponent+16383; //bias 2962 mysignificand = significandParts()[0]; 2963 mysignificand2 = significandParts()[1]; 2964 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2965 myexponent = 0; // denormal 2966 } else if (category==fcZero) { 2967 myexponent = 0; 2968 mysignificand = mysignificand2 = 0; 2969 } else if (category==fcInfinity) { 2970 myexponent = 0x7fff; 2971 mysignificand = mysignificand2 = 0; 2972 } else { 2973 assert(category == fcNaN && "Unknown category!"); 2974 myexponent = 0x7fff; 2975 mysignificand = significandParts()[0]; 2976 mysignificand2 = significandParts()[1]; 2977 } 2978 2979 uint64_t words[2]; 2980 words[0] = mysignificand; 2981 words[1] = ((uint64_t)(sign & 1) << 63) | 2982 ((myexponent & 0x7fff) << 48) | 2983 (mysignificand2 & 0xffffffffffffLL); 2984 2985 return APInt(128, words); 2986 } 2987 2988 APInt 2989 APFloat::convertDoubleAPFloatToAPInt() const 2990 { 2991 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2992 assert(partCount()==1); 2993 2994 uint64_t myexponent, mysignificand; 2995 2996 if (isFiniteNonZero()) { 2997 myexponent = exponent+1023; //bias 2998 mysignificand = *significandParts(); 2999 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 3000 myexponent = 0; // denormal 3001 } else if (category==fcZero) { 3002 myexponent = 0; 3003 mysignificand = 0; 3004 } else if (category==fcInfinity) { 3005 myexponent = 0x7ff; 3006 mysignificand = 0; 3007 } else { 3008 assert(category == fcNaN && "Unknown category!"); 3009 myexponent = 0x7ff; 3010 mysignificand = *significandParts(); 3011 } 3012 3013 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3014 ((myexponent & 0x7ff) << 52) | 3015 (mysignificand & 0xfffffffffffffLL)))); 3016 } 3017 3018 APInt 3019 APFloat::convertFloatAPFloatToAPInt() const 3020 { 3021 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3022 assert(partCount()==1); 3023 3024 uint32_t myexponent, mysignificand; 3025 3026 if (isFiniteNonZero()) { 3027 myexponent = exponent+127; //bias 3028 mysignificand = (uint32_t)*significandParts(); 3029 if (myexponent == 1 && !(mysignificand & 0x800000)) 3030 myexponent = 0; // denormal 3031 } else if (category==fcZero) { 3032 myexponent = 0; 3033 mysignificand = 0; 3034 } else if (category==fcInfinity) { 3035 myexponent = 0xff; 3036 mysignificand = 0; 3037 } else { 3038 assert(category == fcNaN && "Unknown category!"); 3039 myexponent = 0xff; 3040 mysignificand = (uint32_t)*significandParts(); 3041 } 3042 3043 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3044 (mysignificand & 0x7fffff))); 3045 } 3046 3047 APInt 3048 APFloat::convertHalfAPFloatToAPInt() const 3049 { 3050 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3051 assert(partCount()==1); 3052 3053 uint32_t myexponent, mysignificand; 3054 3055 if (isFiniteNonZero()) { 3056 myexponent = exponent+15; //bias 3057 mysignificand = (uint32_t)*significandParts(); 3058 if (myexponent == 1 && !(mysignificand & 0x400)) 3059 myexponent = 0; // denormal 3060 } else if (category==fcZero) { 3061 myexponent = 0; 3062 mysignificand = 0; 3063 } else if (category==fcInfinity) { 3064 myexponent = 0x1f; 3065 mysignificand = 0; 3066 } else { 3067 assert(category == fcNaN && "Unknown category!"); 3068 myexponent = 0x1f; 3069 mysignificand = (uint32_t)*significandParts(); 3070 } 3071 3072 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3073 (mysignificand & 0x3ff))); 3074 } 3075 3076 // This function creates an APInt that is just a bit map of the floating 3077 // point constant as it would appear in memory. It is not a conversion, 3078 // and treating the result as a normal integer is unlikely to be useful. 3079 3080 APInt 3081 APFloat::bitcastToAPInt() const 3082 { 3083 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3084 return convertHalfAPFloatToAPInt(); 3085 3086 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3087 return convertFloatAPFloatToAPInt(); 3088 3089 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3090 return convertDoubleAPFloatToAPInt(); 3091 3092 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3093 return convertQuadrupleAPFloatToAPInt(); 3094 3095 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3096 return convertPPCDoubleDoubleAPFloatToAPInt(); 3097 3098 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3099 "unknown format!"); 3100 return convertF80LongDoubleAPFloatToAPInt(); 3101 } 3102 3103 float 3104 APFloat::convertToFloat() const 3105 { 3106 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3107 "Float semantics are not IEEEsingle"); 3108 APInt api = bitcastToAPInt(); 3109 return api.bitsToFloat(); 3110 } 3111 3112 double 3113 APFloat::convertToDouble() const 3114 { 3115 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3116 "Float semantics are not IEEEdouble"); 3117 APInt api = bitcastToAPInt(); 3118 return api.bitsToDouble(); 3119 } 3120 3121 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3122 /// does not support these bit patterns: 3123 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3124 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3125 /// exponent = 0, integer bit 1 ("pseudodenormal") 3126 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3127 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3128 void 3129 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3130 { 3131 assert(api.getBitWidth()==80); 3132 uint64_t i1 = api.getRawData()[0]; 3133 uint64_t i2 = api.getRawData()[1]; 3134 uint64_t myexponent = (i2 & 0x7fff); 3135 uint64_t mysignificand = i1; 3136 3137 initialize(&APFloat::x87DoubleExtended); 3138 assert(partCount()==2); 3139 3140 sign = static_cast<unsigned int>(i2>>15); 3141 if (myexponent==0 && mysignificand==0) { 3142 // exponent, significand meaningless 3143 category = fcZero; 3144 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3145 // exponent, significand meaningless 3146 category = fcInfinity; 3147 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3148 // exponent meaningless 3149 category = fcNaN; 3150 significandParts()[0] = mysignificand; 3151 significandParts()[1] = 0; 3152 } else { 3153 category = fcNormal; 3154 exponent = myexponent - 16383; 3155 significandParts()[0] = mysignificand; 3156 significandParts()[1] = 0; 3157 if (myexponent==0) // denormal 3158 exponent = -16382; 3159 } 3160 } 3161 3162 void 3163 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3164 { 3165 assert(api.getBitWidth()==128); 3166 uint64_t i1 = api.getRawData()[0]; 3167 uint64_t i2 = api.getRawData()[1]; 3168 opStatus fs; 3169 bool losesInfo; 3170 3171 // Get the first double and convert to our format. 3172 initFromDoubleAPInt(APInt(64, i1)); 3173 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3174 assert(fs == opOK && !losesInfo); 3175 (void)fs; 3176 3177 // Unless we have a special case, add in second double. 3178 if (isFiniteNonZero()) { 3179 APFloat v(IEEEdouble, APInt(64, i2)); 3180 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3181 assert(fs == opOK && !losesInfo); 3182 (void)fs; 3183 3184 add(v, rmNearestTiesToEven); 3185 } 3186 } 3187 3188 void 3189 APFloat::initFromQuadrupleAPInt(const APInt &api) 3190 { 3191 assert(api.getBitWidth()==128); 3192 uint64_t i1 = api.getRawData()[0]; 3193 uint64_t i2 = api.getRawData()[1]; 3194 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3195 uint64_t mysignificand = i1; 3196 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3197 3198 initialize(&APFloat::IEEEquad); 3199 assert(partCount()==2); 3200 3201 sign = static_cast<unsigned int>(i2>>63); 3202 if (myexponent==0 && 3203 (mysignificand==0 && mysignificand2==0)) { 3204 // exponent, significand meaningless 3205 category = fcZero; 3206 } else if (myexponent==0x7fff && 3207 (mysignificand==0 && mysignificand2==0)) { 3208 // exponent, significand meaningless 3209 category = fcInfinity; 3210 } else if (myexponent==0x7fff && 3211 (mysignificand!=0 || mysignificand2 !=0)) { 3212 // exponent meaningless 3213 category = fcNaN; 3214 significandParts()[0] = mysignificand; 3215 significandParts()[1] = mysignificand2; 3216 } else { 3217 category = fcNormal; 3218 exponent = myexponent - 16383; 3219 significandParts()[0] = mysignificand; 3220 significandParts()[1] = mysignificand2; 3221 if (myexponent==0) // denormal 3222 exponent = -16382; 3223 else 3224 significandParts()[1] |= 0x1000000000000LL; // integer bit 3225 } 3226 } 3227 3228 void 3229 APFloat::initFromDoubleAPInt(const APInt &api) 3230 { 3231 assert(api.getBitWidth()==64); 3232 uint64_t i = *api.getRawData(); 3233 uint64_t myexponent = (i >> 52) & 0x7ff; 3234 uint64_t mysignificand = i & 0xfffffffffffffLL; 3235 3236 initialize(&APFloat::IEEEdouble); 3237 assert(partCount()==1); 3238 3239 sign = static_cast<unsigned int>(i>>63); 3240 if (myexponent==0 && mysignificand==0) { 3241 // exponent, significand meaningless 3242 category = fcZero; 3243 } else if (myexponent==0x7ff && mysignificand==0) { 3244 // exponent, significand meaningless 3245 category = fcInfinity; 3246 } else if (myexponent==0x7ff && mysignificand!=0) { 3247 // exponent meaningless 3248 category = fcNaN; 3249 *significandParts() = mysignificand; 3250 } else { 3251 category = fcNormal; 3252 exponent = myexponent - 1023; 3253 *significandParts() = mysignificand; 3254 if (myexponent==0) // denormal 3255 exponent = -1022; 3256 else 3257 *significandParts() |= 0x10000000000000LL; // integer bit 3258 } 3259 } 3260 3261 void 3262 APFloat::initFromFloatAPInt(const APInt & api) 3263 { 3264 assert(api.getBitWidth()==32); 3265 uint32_t i = (uint32_t)*api.getRawData(); 3266 uint32_t myexponent = (i >> 23) & 0xff; 3267 uint32_t mysignificand = i & 0x7fffff; 3268 3269 initialize(&APFloat::IEEEsingle); 3270 assert(partCount()==1); 3271 3272 sign = i >> 31; 3273 if (myexponent==0 && mysignificand==0) { 3274 // exponent, significand meaningless 3275 category = fcZero; 3276 } else if (myexponent==0xff && mysignificand==0) { 3277 // exponent, significand meaningless 3278 category = fcInfinity; 3279 } else if (myexponent==0xff && mysignificand!=0) { 3280 // sign, exponent, significand meaningless 3281 category = fcNaN; 3282 *significandParts() = mysignificand; 3283 } else { 3284 category = fcNormal; 3285 exponent = myexponent - 127; //bias 3286 *significandParts() = mysignificand; 3287 if (myexponent==0) // denormal 3288 exponent = -126; 3289 else 3290 *significandParts() |= 0x800000; // integer bit 3291 } 3292 } 3293 3294 void 3295 APFloat::initFromHalfAPInt(const APInt & api) 3296 { 3297 assert(api.getBitWidth()==16); 3298 uint32_t i = (uint32_t)*api.getRawData(); 3299 uint32_t myexponent = (i >> 10) & 0x1f; 3300 uint32_t mysignificand = i & 0x3ff; 3301 3302 initialize(&APFloat::IEEEhalf); 3303 assert(partCount()==1); 3304 3305 sign = i >> 15; 3306 if (myexponent==0 && mysignificand==0) { 3307 // exponent, significand meaningless 3308 category = fcZero; 3309 } else if (myexponent==0x1f && mysignificand==0) { 3310 // exponent, significand meaningless 3311 category = fcInfinity; 3312 } else if (myexponent==0x1f && mysignificand!=0) { 3313 // sign, exponent, significand meaningless 3314 category = fcNaN; 3315 *significandParts() = mysignificand; 3316 } else { 3317 category = fcNormal; 3318 exponent = myexponent - 15; //bias 3319 *significandParts() = mysignificand; 3320 if (myexponent==0) // denormal 3321 exponent = -14; 3322 else 3323 *significandParts() |= 0x400; // integer bit 3324 } 3325 } 3326 3327 /// Treat api as containing the bits of a floating point number. Currently 3328 /// we infer the floating point type from the size of the APInt. The 3329 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3330 /// when the size is anything else). 3331 void 3332 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3333 { 3334 if (Sem == &IEEEhalf) 3335 return initFromHalfAPInt(api); 3336 if (Sem == &IEEEsingle) 3337 return initFromFloatAPInt(api); 3338 if (Sem == &IEEEdouble) 3339 return initFromDoubleAPInt(api); 3340 if (Sem == &x87DoubleExtended) 3341 return initFromF80LongDoubleAPInt(api); 3342 if (Sem == &IEEEquad) 3343 return initFromQuadrupleAPInt(api); 3344 if (Sem == &PPCDoubleDouble) 3345 return initFromPPCDoubleDoubleAPInt(api); 3346 3347 llvm_unreachable(nullptr); 3348 } 3349 3350 APFloat 3351 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3352 { 3353 switch (BitWidth) { 3354 case 16: 3355 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3356 case 32: 3357 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3358 case 64: 3359 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3360 case 80: 3361 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3362 case 128: 3363 if (isIEEE) 3364 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3365 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3366 default: 3367 llvm_unreachable("Unknown floating bit width"); 3368 } 3369 } 3370 3371 /// Make this number the largest magnitude normal number in the given 3372 /// semantics. 3373 void APFloat::makeLargest(bool Negative) { 3374 // We want (in interchange format): 3375 // sign = {Negative} 3376 // exponent = 1..10 3377 // significand = 1..1 3378 category = fcNormal; 3379 sign = Negative; 3380 exponent = semantics->maxExponent; 3381 3382 // Use memset to set all but the highest integerPart to all ones. 3383 integerPart *significand = significandParts(); 3384 unsigned PartCount = partCount(); 3385 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3386 3387 // Set the high integerPart especially setting all unused top bits for 3388 // internal consistency. 3389 const unsigned NumUnusedHighBits = 3390 PartCount*integerPartWidth - semantics->precision; 3391 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3392 ? (~integerPart(0) >> NumUnusedHighBits) 3393 : 0; 3394 } 3395 3396 /// Make this number the smallest magnitude denormal number in the given 3397 /// semantics. 3398 void APFloat::makeSmallest(bool Negative) { 3399 // We want (in interchange format): 3400 // sign = {Negative} 3401 // exponent = 0..0 3402 // significand = 0..01 3403 category = fcNormal; 3404 sign = Negative; 3405 exponent = semantics->minExponent; 3406 APInt::tcSet(significandParts(), 1, partCount()); 3407 } 3408 3409 3410 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3411 // We want (in interchange format): 3412 // sign = {Negative} 3413 // exponent = 1..10 3414 // significand = 1..1 3415 APFloat Val(Sem, uninitialized); 3416 Val.makeLargest(Negative); 3417 return Val; 3418 } 3419 3420 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3421 // We want (in interchange format): 3422 // sign = {Negative} 3423 // exponent = 0..0 3424 // significand = 0..01 3425 APFloat Val(Sem, uninitialized); 3426 Val.makeSmallest(Negative); 3427 return Val; 3428 } 3429 3430 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3431 APFloat Val(Sem, uninitialized); 3432 3433 // We want (in interchange format): 3434 // sign = {Negative} 3435 // exponent = 0..0 3436 // significand = 10..0 3437 3438 Val.category = fcNormal; 3439 Val.zeroSignificand(); 3440 Val.sign = Negative; 3441 Val.exponent = Sem.minExponent; 3442 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3443 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3444 3445 return Val; 3446 } 3447 3448 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3449 initFromAPInt(&Sem, API); 3450 } 3451 3452 APFloat::APFloat(float f) { 3453 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3454 } 3455 3456 APFloat::APFloat(double d) { 3457 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3458 } 3459 3460 namespace { 3461 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3462 Buffer.append(Str.begin(), Str.end()); 3463 } 3464 3465 /// Removes data from the given significand until it is no more 3466 /// precise than is required for the desired precision. 3467 void AdjustToPrecision(APInt &significand, 3468 int &exp, unsigned FormatPrecision) { 3469 unsigned bits = significand.getActiveBits(); 3470 3471 // 196/59 is a very slight overestimate of lg_2(10). 3472 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3473 3474 if (bits <= bitsRequired) return; 3475 3476 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3477 if (!tensRemovable) return; 3478 3479 exp += tensRemovable; 3480 3481 APInt divisor(significand.getBitWidth(), 1); 3482 APInt powten(significand.getBitWidth(), 10); 3483 while (true) { 3484 if (tensRemovable & 1) 3485 divisor *= powten; 3486 tensRemovable >>= 1; 3487 if (!tensRemovable) break; 3488 powten *= powten; 3489 } 3490 3491 significand = significand.udiv(divisor); 3492 3493 // Truncate the significand down to its active bit count. 3494 significand = significand.trunc(significand.getActiveBits()); 3495 } 3496 3497 3498 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3499 int &exp, unsigned FormatPrecision) { 3500 unsigned N = buffer.size(); 3501 if (N <= FormatPrecision) return; 3502 3503 // The most significant figures are the last ones in the buffer. 3504 unsigned FirstSignificant = N - FormatPrecision; 3505 3506 // Round. 3507 // FIXME: this probably shouldn't use 'round half up'. 3508 3509 // Rounding down is just a truncation, except we also want to drop 3510 // trailing zeros from the new result. 3511 if (buffer[FirstSignificant - 1] < '5') { 3512 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3513 FirstSignificant++; 3514 3515 exp += FirstSignificant; 3516 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3517 return; 3518 } 3519 3520 // Rounding up requires a decimal add-with-carry. If we continue 3521 // the carry, the newly-introduced zeros will just be truncated. 3522 for (unsigned I = FirstSignificant; I != N; ++I) { 3523 if (buffer[I] == '9') { 3524 FirstSignificant++; 3525 } else { 3526 buffer[I]++; 3527 break; 3528 } 3529 } 3530 3531 // If we carried through, we have exactly one digit of precision. 3532 if (FirstSignificant == N) { 3533 exp += FirstSignificant; 3534 buffer.clear(); 3535 buffer.push_back('1'); 3536 return; 3537 } 3538 3539 exp += FirstSignificant; 3540 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3541 } 3542 } 3543 3544 void APFloat::toString(SmallVectorImpl<char> &Str, 3545 unsigned FormatPrecision, 3546 unsigned FormatMaxPadding) const { 3547 switch (category) { 3548 case fcInfinity: 3549 if (isNegative()) 3550 return append(Str, "-Inf"); 3551 else 3552 return append(Str, "+Inf"); 3553 3554 case fcNaN: return append(Str, "NaN"); 3555 3556 case fcZero: 3557 if (isNegative()) 3558 Str.push_back('-'); 3559 3560 if (!FormatMaxPadding) 3561 append(Str, "0.0E+0"); 3562 else 3563 Str.push_back('0'); 3564 return; 3565 3566 case fcNormal: 3567 break; 3568 } 3569 3570 if (isNegative()) 3571 Str.push_back('-'); 3572 3573 // Decompose the number into an APInt and an exponent. 3574 int exp = exponent - ((int) semantics->precision - 1); 3575 APInt significand(semantics->precision, 3576 makeArrayRef(significandParts(), 3577 partCountForBits(semantics->precision))); 3578 3579 // Set FormatPrecision if zero. We want to do this before we 3580 // truncate trailing zeros, as those are part of the precision. 3581 if (!FormatPrecision) { 3582 // We use enough digits so the number can be round-tripped back to an 3583 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3584 // Accurately" by Steele and White. 3585 // FIXME: Using a formula based purely on the precision is conservative; 3586 // we can print fewer digits depending on the actual value being printed. 3587 3588 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3589 FormatPrecision = 2 + semantics->precision * 59 / 196; 3590 } 3591 3592 // Ignore trailing binary zeros. 3593 int trailingZeros = significand.countTrailingZeros(); 3594 exp += trailingZeros; 3595 significand = significand.lshr(trailingZeros); 3596 3597 // Change the exponent from 2^e to 10^e. 3598 if (exp == 0) { 3599 // Nothing to do. 3600 } else if (exp > 0) { 3601 // Just shift left. 3602 significand = significand.zext(semantics->precision + exp); 3603 significand <<= exp; 3604 exp = 0; 3605 } else { /* exp < 0 */ 3606 int texp = -exp; 3607 3608 // We transform this using the identity: 3609 // (N)(2^-e) == (N)(5^e)(10^-e) 3610 // This means we have to multiply N (the significand) by 5^e. 3611 // To avoid overflow, we have to operate on numbers large 3612 // enough to store N * 5^e: 3613 // log2(N * 5^e) == log2(N) + e * log2(5) 3614 // <= semantics->precision + e * 137 / 59 3615 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3616 3617 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3618 3619 // Multiply significand by 5^e. 3620 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3621 significand = significand.zext(precision); 3622 APInt five_to_the_i(precision, 5); 3623 while (true) { 3624 if (texp & 1) significand *= five_to_the_i; 3625 3626 texp >>= 1; 3627 if (!texp) break; 3628 five_to_the_i *= five_to_the_i; 3629 } 3630 } 3631 3632 AdjustToPrecision(significand, exp, FormatPrecision); 3633 3634 SmallVector<char, 256> buffer; 3635 3636 // Fill the buffer. 3637 unsigned precision = significand.getBitWidth(); 3638 APInt ten(precision, 10); 3639 APInt digit(precision, 0); 3640 3641 bool inTrail = true; 3642 while (significand != 0) { 3643 // digit <- significand % 10 3644 // significand <- significand / 10 3645 APInt::udivrem(significand, ten, significand, digit); 3646 3647 unsigned d = digit.getZExtValue(); 3648 3649 // Drop trailing zeros. 3650 if (inTrail && !d) exp++; 3651 else { 3652 buffer.push_back((char) ('0' + d)); 3653 inTrail = false; 3654 } 3655 } 3656 3657 assert(!buffer.empty() && "no characters in buffer!"); 3658 3659 // Drop down to FormatPrecision. 3660 // TODO: don't do more precise calculations above than are required. 3661 AdjustToPrecision(buffer, exp, FormatPrecision); 3662 3663 unsigned NDigits = buffer.size(); 3664 3665 // Check whether we should use scientific notation. 3666 bool FormatScientific; 3667 if (!FormatMaxPadding) 3668 FormatScientific = true; 3669 else { 3670 if (exp >= 0) { 3671 // 765e3 --> 765000 3672 // ^^^ 3673 // But we shouldn't make the number look more precise than it is. 3674 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3675 NDigits + (unsigned) exp > FormatPrecision); 3676 } else { 3677 // Power of the most significant digit. 3678 int MSD = exp + (int) (NDigits - 1); 3679 if (MSD >= 0) { 3680 // 765e-2 == 7.65 3681 FormatScientific = false; 3682 } else { 3683 // 765e-5 == 0.00765 3684 // ^ ^^ 3685 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3686 } 3687 } 3688 } 3689 3690 // Scientific formatting is pretty straightforward. 3691 if (FormatScientific) { 3692 exp += (NDigits - 1); 3693 3694 Str.push_back(buffer[NDigits-1]); 3695 Str.push_back('.'); 3696 if (NDigits == 1) 3697 Str.push_back('0'); 3698 else 3699 for (unsigned I = 1; I != NDigits; ++I) 3700 Str.push_back(buffer[NDigits-1-I]); 3701 Str.push_back('E'); 3702 3703 Str.push_back(exp >= 0 ? '+' : '-'); 3704 if (exp < 0) exp = -exp; 3705 SmallVector<char, 6> expbuf; 3706 do { 3707 expbuf.push_back((char) ('0' + (exp % 10))); 3708 exp /= 10; 3709 } while (exp); 3710 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3711 Str.push_back(expbuf[E-1-I]); 3712 return; 3713 } 3714 3715 // Non-scientific, positive exponents. 3716 if (exp >= 0) { 3717 for (unsigned I = 0; I != NDigits; ++I) 3718 Str.push_back(buffer[NDigits-1-I]); 3719 for (unsigned I = 0; I != (unsigned) exp; ++I) 3720 Str.push_back('0'); 3721 return; 3722 } 3723 3724 // Non-scientific, negative exponents. 3725 3726 // The number of digits to the left of the decimal point. 3727 int NWholeDigits = exp + (int) NDigits; 3728 3729 unsigned I = 0; 3730 if (NWholeDigits > 0) { 3731 for (; I != (unsigned) NWholeDigits; ++I) 3732 Str.push_back(buffer[NDigits-I-1]); 3733 Str.push_back('.'); 3734 } else { 3735 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3736 3737 Str.push_back('0'); 3738 Str.push_back('.'); 3739 for (unsigned Z = 1; Z != NZeros; ++Z) 3740 Str.push_back('0'); 3741 } 3742 3743 for (; I != NDigits; ++I) 3744 Str.push_back(buffer[NDigits-I-1]); 3745 } 3746 3747 bool APFloat::getExactInverse(APFloat *inv) const { 3748 // Special floats and denormals have no exact inverse. 3749 if (!isFiniteNonZero()) 3750 return false; 3751 3752 // Check that the number is a power of two by making sure that only the 3753 // integer bit is set in the significand. 3754 if (significandLSB() != semantics->precision - 1) 3755 return false; 3756 3757 // Get the inverse. 3758 APFloat reciprocal(*semantics, 1ULL); 3759 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3760 return false; 3761 3762 // Avoid multiplication with a denormal, it is not safe on all platforms and 3763 // may be slower than a normal division. 3764 if (reciprocal.isDenormal()) 3765 return false; 3766 3767 assert(reciprocal.isFiniteNonZero() && 3768 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3769 3770 if (inv) 3771 *inv = reciprocal; 3772 3773 return true; 3774 } 3775 3776 bool APFloat::isSignaling() const { 3777 if (!isNaN()) 3778 return false; 3779 3780 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3781 // first bit of the trailing significand being 0. 3782 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3783 } 3784 3785 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3786 /// 3787 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3788 /// appropriate sign switching before/after the computation. 3789 APFloat::opStatus APFloat::next(bool nextDown) { 3790 // If we are performing nextDown, swap sign so we have -x. 3791 if (nextDown) 3792 changeSign(); 3793 3794 // Compute nextUp(x) 3795 opStatus result = opOK; 3796 3797 // Handle each float category separately. 3798 switch (category) { 3799 case fcInfinity: 3800 // nextUp(+inf) = +inf 3801 if (!isNegative()) 3802 break; 3803 // nextUp(-inf) = -getLargest() 3804 makeLargest(true); 3805 break; 3806 case fcNaN: 3807 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3808 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3809 // change the payload. 3810 if (isSignaling()) { 3811 result = opInvalidOp; 3812 // For consistency, propagate the sign of the sNaN to the qNaN. 3813 makeNaN(false, isNegative(), nullptr); 3814 } 3815 break; 3816 case fcZero: 3817 // nextUp(pm 0) = +getSmallest() 3818 makeSmallest(false); 3819 break; 3820 case fcNormal: 3821 // nextUp(-getSmallest()) = -0 3822 if (isSmallest() && isNegative()) { 3823 APInt::tcSet(significandParts(), 0, partCount()); 3824 category = fcZero; 3825 exponent = 0; 3826 break; 3827 } 3828 3829 // nextUp(getLargest()) == INFINITY 3830 if (isLargest() && !isNegative()) { 3831 APInt::tcSet(significandParts(), 0, partCount()); 3832 category = fcInfinity; 3833 exponent = semantics->maxExponent + 1; 3834 break; 3835 } 3836 3837 // nextUp(normal) == normal + inc. 3838 if (isNegative()) { 3839 // If we are negative, we need to decrement the significand. 3840 3841 // We only cross a binade boundary that requires adjusting the exponent 3842 // if: 3843 // 1. exponent != semantics->minExponent. This implies we are not in the 3844 // smallest binade or are dealing with denormals. 3845 // 2. Our significand excluding the integral bit is all zeros. 3846 bool WillCrossBinadeBoundary = 3847 exponent != semantics->minExponent && isSignificandAllZeros(); 3848 3849 // Decrement the significand. 3850 // 3851 // We always do this since: 3852 // 1. If we are dealing with a non-binade decrement, by definition we 3853 // just decrement the significand. 3854 // 2. If we are dealing with a normal -> normal binade decrement, since 3855 // we have an explicit integral bit the fact that all bits but the 3856 // integral bit are zero implies that subtracting one will yield a 3857 // significand with 0 integral bit and 1 in all other spots. Thus we 3858 // must just adjust the exponent and set the integral bit to 1. 3859 // 3. If we are dealing with a normal -> denormal binade decrement, 3860 // since we set the integral bit to 0 when we represent denormals, we 3861 // just decrement the significand. 3862 integerPart *Parts = significandParts(); 3863 APInt::tcDecrement(Parts, partCount()); 3864 3865 if (WillCrossBinadeBoundary) { 3866 // Our result is a normal number. Do the following: 3867 // 1. Set the integral bit to 1. 3868 // 2. Decrement the exponent. 3869 APInt::tcSetBit(Parts, semantics->precision - 1); 3870 exponent--; 3871 } 3872 } else { 3873 // If we are positive, we need to increment the significand. 3874 3875 // We only cross a binade boundary that requires adjusting the exponent if 3876 // the input is not a denormal and all of said input's significand bits 3877 // are set. If all of said conditions are true: clear the significand, set 3878 // the integral bit to 1, and increment the exponent. If we have a 3879 // denormal always increment since moving denormals and the numbers in the 3880 // smallest normal binade have the same exponent in our representation. 3881 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3882 3883 if (WillCrossBinadeBoundary) { 3884 integerPart *Parts = significandParts(); 3885 APInt::tcSet(Parts, 0, partCount()); 3886 APInt::tcSetBit(Parts, semantics->precision - 1); 3887 assert(exponent != semantics->maxExponent && 3888 "We can not increment an exponent beyond the maxExponent allowed" 3889 " by the given floating point semantics."); 3890 exponent++; 3891 } else { 3892 incrementSignificand(); 3893 } 3894 } 3895 break; 3896 } 3897 3898 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3899 if (nextDown) 3900 changeSign(); 3901 3902 return result; 3903 } 3904 3905 void 3906 APFloat::makeInf(bool Negative) { 3907 category = fcInfinity; 3908 sign = Negative; 3909 exponent = semantics->maxExponent + 1; 3910 APInt::tcSet(significandParts(), 0, partCount()); 3911 } 3912 3913 void 3914 APFloat::makeZero(bool Negative) { 3915 category = fcZero; 3916 sign = Negative; 3917 exponent = semantics->minExponent-1; 3918 APInt::tcSet(significandParts(), 0, partCount()); 3919 } 3920 3921 APFloat llvm::scalbn(APFloat X, int Exp) { 3922 if (X.isInfinity() || X.isZero() || X.isNaN()) 3923 return std::move(X); 3924 3925 auto MaxExp = X.getSemantics().maxExponent; 3926 auto MinExp = X.getSemantics().minExponent; 3927 if (Exp > (MaxExp - X.exponent)) 3928 // Overflow saturates to infinity. 3929 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3930 if (Exp < (MinExp - X.exponent)) 3931 // Underflow saturates to zero. 3932 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3933 3934 X.exponent += Exp; 3935 return std::move(X); 3936 } 3937