1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 /// A macro used to combine two fcCategory enums into one key which can be used 29 /// in a switch statement to classify how the interaction of two APFloat's 30 /// categories affects an operation. 31 /// 32 /// TODO: If clang source code is ever allowed to use constexpr in its own 33 /// codebase, change this into a static inline function. 34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) 35 36 /* Assumed in hexadecimal significand parsing, and conversion to 37 hexadecimal strings. */ 38 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!"); 39 40 namespace llvm { 41 42 /* Represents floating point arithmetic semantics. */ 43 struct fltSemantics { 44 /* The largest E such that 2^E is representable; this matches the 45 definition of IEEE 754. */ 46 APFloat::ExponentType maxExponent; 47 48 /* The smallest E such that 2^E is a normalized number; this 49 matches the definition of IEEE 754. */ 50 APFloat::ExponentType minExponent; 51 52 /* Number of bits in the significand. This includes the integer 53 bit. */ 54 unsigned int precision; 55 }; 56 57 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; 58 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; 59 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; 60 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; 61 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; 62 const fltSemantics APFloat::Bogus = { 0, 0, 0 }; 63 64 /* The PowerPC format consists of two doubles. It does not map cleanly 65 onto the usual format above. It is approximated using twice the 66 mantissa bits. Note that for exponents near the double minimum, 67 we no longer can represent the full 106 mantissa bits, so those 68 will be treated as denormal numbers. 69 70 FIXME: While this approximation is equivalent to what GCC uses for 71 compile-time arithmetic on PPC double-double numbers, it is not able 72 to represent all possible values held by a PPC double-double number, 73 for example: (long double) 1.0 + (long double) 0x1p-106 74 Should this be replaced by a full emulation of PPC double-double? */ 75 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; 76 77 /* A tight upper bound on number of parts required to hold the value 78 pow(5, power) is 79 80 power * 815 / (351 * integerPartWidth) + 1 81 82 However, whilst the result may require only this many parts, 83 because we are multiplying two values to get it, the 84 multiplication may require an extra part with the excess part 85 being zero (consider the trivial case of 1 * 1, tcFullMultiply 86 requires two parts to hold the single-part result). So we add an 87 extra one to guarantee enough space whilst multiplying. */ 88 const unsigned int maxExponent = 16383; 89 const unsigned int maxPrecision = 113; 90 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 91 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 92 / (351 * integerPartWidth)); 93 } 94 95 /* A bunch of private, handy routines. */ 96 97 static inline unsigned int 98 partCountForBits(unsigned int bits) 99 { 100 return ((bits) + integerPartWidth - 1) / integerPartWidth; 101 } 102 103 /* Returns 0U-9U. Return values >= 10U are not digits. */ 104 static inline unsigned int 105 decDigitValue(unsigned int c) 106 { 107 return c - '0'; 108 } 109 110 /* Return the value of a decimal exponent of the form 111 [+-]ddddddd. 112 113 If the exponent overflows, returns a large exponent with the 114 appropriate sign. */ 115 static int 116 readExponent(StringRef::iterator begin, StringRef::iterator end) 117 { 118 bool isNegative; 119 unsigned int absExponent; 120 const unsigned int overlargeExponent = 24000; /* FIXME. */ 121 StringRef::iterator p = begin; 122 123 assert(p != end && "Exponent has no digits"); 124 125 isNegative = (*p == '-'); 126 if (*p == '-' || *p == '+') { 127 p++; 128 assert(p != end && "Exponent has no digits"); 129 } 130 131 absExponent = decDigitValue(*p++); 132 assert(absExponent < 10U && "Invalid character in exponent"); 133 134 for (; p != end; ++p) { 135 unsigned int value; 136 137 value = decDigitValue(*p); 138 assert(value < 10U && "Invalid character in exponent"); 139 140 value += absExponent * 10; 141 if (absExponent >= overlargeExponent) { 142 absExponent = overlargeExponent; 143 p = end; /* outwit assert below */ 144 break; 145 } 146 absExponent = value; 147 } 148 149 assert(p == end && "Invalid exponent in exponent"); 150 151 if (isNegative) 152 return -(int) absExponent; 153 else 154 return (int) absExponent; 155 } 156 157 /* This is ugly and needs cleaning up, but I don't immediately see 158 how whilst remaining safe. */ 159 static int 160 totalExponent(StringRef::iterator p, StringRef::iterator end, 161 int exponentAdjustment) 162 { 163 int unsignedExponent; 164 bool negative, overflow; 165 int exponent = 0; 166 167 assert(p != end && "Exponent has no digits"); 168 169 negative = *p == '-'; 170 if (*p == '-' || *p == '+') { 171 p++; 172 assert(p != end && "Exponent has no digits"); 173 } 174 175 unsignedExponent = 0; 176 overflow = false; 177 for (; p != end; ++p) { 178 unsigned int value; 179 180 value = decDigitValue(*p); 181 assert(value < 10U && "Invalid character in exponent"); 182 183 unsignedExponent = unsignedExponent * 10 + value; 184 if (unsignedExponent > 32767) { 185 overflow = true; 186 break; 187 } 188 } 189 190 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 191 overflow = true; 192 193 if (!overflow) { 194 exponent = unsignedExponent; 195 if (negative) 196 exponent = -exponent; 197 exponent += exponentAdjustment; 198 if (exponent > 32767 || exponent < -32768) 199 overflow = true; 200 } 201 202 if (overflow) 203 exponent = negative ? -32768: 32767; 204 205 return exponent; 206 } 207 208 static StringRef::iterator 209 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 210 StringRef::iterator *dot) 211 { 212 StringRef::iterator p = begin; 213 *dot = end; 214 while (p != end && *p == '0') 215 p++; 216 217 if (p != end && *p == '.') { 218 *dot = p++; 219 220 assert(end - begin != 1 && "Significand has no digits"); 221 222 while (p != end && *p == '0') 223 p++; 224 } 225 226 return p; 227 } 228 229 /* Given a normal decimal floating point number of the form 230 231 dddd.dddd[eE][+-]ddd 232 233 where the decimal point and exponent are optional, fill out the 234 structure D. Exponent is appropriate if the significand is 235 treated as an integer, and normalizedExponent if the significand 236 is taken to have the decimal point after a single leading 237 non-zero digit. 238 239 If the value is zero, V->firstSigDigit points to a non-digit, and 240 the return exponent is zero. 241 */ 242 struct decimalInfo { 243 const char *firstSigDigit; 244 const char *lastSigDigit; 245 int exponent; 246 int normalizedExponent; 247 }; 248 249 static void 250 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 251 decimalInfo *D) 252 { 253 StringRef::iterator dot = end; 254 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 255 256 D->firstSigDigit = p; 257 D->exponent = 0; 258 D->normalizedExponent = 0; 259 260 for (; p != end; ++p) { 261 if (*p == '.') { 262 assert(dot == end && "String contains multiple dots"); 263 dot = p++; 264 if (p == end) 265 break; 266 } 267 if (decDigitValue(*p) >= 10U) 268 break; 269 } 270 271 if (p != end) { 272 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 273 assert(p != begin && "Significand has no digits"); 274 assert((dot == end || p - begin != 1) && "Significand has no digits"); 275 276 /* p points to the first non-digit in the string */ 277 D->exponent = readExponent(p + 1, end); 278 279 /* Implied decimal point? */ 280 if (dot == end) 281 dot = p; 282 } 283 284 /* If number is all zeroes accept any exponent. */ 285 if (p != D->firstSigDigit) { 286 /* Drop insignificant trailing zeroes. */ 287 if (p != begin) { 288 do 289 do 290 p--; 291 while (p != begin && *p == '0'); 292 while (p != begin && *p == '.'); 293 } 294 295 /* Adjust the exponents for any decimal point. */ 296 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p)); 297 D->normalizedExponent = (D->exponent + 298 static_cast<APFloat::ExponentType>((p - D->firstSigDigit) 299 - (dot > D->firstSigDigit && dot < p))); 300 } 301 302 D->lastSigDigit = p; 303 } 304 305 /* Return the trailing fraction of a hexadecimal number. 306 DIGITVALUE is the first hex digit of the fraction, P points to 307 the next digit. */ 308 static lostFraction 309 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 310 unsigned int digitValue) 311 { 312 unsigned int hexDigit; 313 314 /* If the first trailing digit isn't 0 or 8 we can work out the 315 fraction immediately. */ 316 if (digitValue > 8) 317 return lfMoreThanHalf; 318 else if (digitValue < 8 && digitValue > 0) 319 return lfLessThanHalf; 320 321 // Otherwise we need to find the first non-zero digit. 322 while (p != end && (*p == '0' || *p == '.')) 323 p++; 324 325 assert(p != end && "Invalid trailing hexadecimal fraction!"); 326 327 hexDigit = hexDigitValue(*p); 328 329 /* If we ran off the end it is exactly zero or one-half, otherwise 330 a little more. */ 331 if (hexDigit == -1U) 332 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 333 else 334 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 335 } 336 337 /* Return the fraction lost were a bignum truncated losing the least 338 significant BITS bits. */ 339 static lostFraction 340 lostFractionThroughTruncation(const integerPart *parts, 341 unsigned int partCount, 342 unsigned int bits) 343 { 344 unsigned int lsb; 345 346 lsb = APInt::tcLSB(parts, partCount); 347 348 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 349 if (bits <= lsb) 350 return lfExactlyZero; 351 if (bits == lsb + 1) 352 return lfExactlyHalf; 353 if (bits <= partCount * integerPartWidth && 354 APInt::tcExtractBit(parts, bits - 1)) 355 return lfMoreThanHalf; 356 357 return lfLessThanHalf; 358 } 359 360 /* Shift DST right BITS bits noting lost fraction. */ 361 static lostFraction 362 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 363 { 364 lostFraction lost_fraction; 365 366 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 367 368 APInt::tcShiftRight(dst, parts, bits); 369 370 return lost_fraction; 371 } 372 373 /* Combine the effect of two lost fractions. */ 374 static lostFraction 375 combineLostFractions(lostFraction moreSignificant, 376 lostFraction lessSignificant) 377 { 378 if (lessSignificant != lfExactlyZero) { 379 if (moreSignificant == lfExactlyZero) 380 moreSignificant = lfLessThanHalf; 381 else if (moreSignificant == lfExactlyHalf) 382 moreSignificant = lfMoreThanHalf; 383 } 384 385 return moreSignificant; 386 } 387 388 /* The error from the true value, in half-ulps, on multiplying two 389 floating point numbers, which differ from the value they 390 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 391 than the returned value. 392 393 See "How to Read Floating Point Numbers Accurately" by William D 394 Clinger. */ 395 static unsigned int 396 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 397 { 398 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 399 400 if (HUerr1 + HUerr2 == 0) 401 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 402 else 403 return inexactMultiply + 2 * (HUerr1 + HUerr2); 404 } 405 406 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 407 when the least significant BITS are truncated. BITS cannot be 408 zero. */ 409 static integerPart 410 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 411 { 412 unsigned int count, partBits; 413 integerPart part, boundary; 414 415 assert(bits != 0); 416 417 bits--; 418 count = bits / integerPartWidth; 419 partBits = bits % integerPartWidth + 1; 420 421 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 422 423 if (isNearest) 424 boundary = (integerPart) 1 << (partBits - 1); 425 else 426 boundary = 0; 427 428 if (count == 0) { 429 if (part - boundary <= boundary - part) 430 return part - boundary; 431 else 432 return boundary - part; 433 } 434 435 if (part == boundary) { 436 while (--count) 437 if (parts[count]) 438 return ~(integerPart) 0; /* A lot. */ 439 440 return parts[0]; 441 } else if (part == boundary - 1) { 442 while (--count) 443 if (~parts[count]) 444 return ~(integerPart) 0; /* A lot. */ 445 446 return -parts[0]; 447 } 448 449 return ~(integerPart) 0; /* A lot. */ 450 } 451 452 /* Place pow(5, power) in DST, and return the number of parts used. 453 DST must be at least one part larger than size of the answer. */ 454 static unsigned int 455 powerOf5(integerPart *dst, unsigned int power) 456 { 457 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 458 15625, 78125 }; 459 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 460 pow5s[0] = 78125 * 5; 461 462 unsigned int partsCount[16] = { 1 }; 463 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 464 unsigned int result; 465 assert(power <= maxExponent); 466 467 p1 = dst; 468 p2 = scratch; 469 470 *p1 = firstEightPowers[power & 7]; 471 power >>= 3; 472 473 result = 1; 474 pow5 = pow5s; 475 476 for (unsigned int n = 0; power; power >>= 1, n++) { 477 unsigned int pc; 478 479 pc = partsCount[n]; 480 481 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 482 if (pc == 0) { 483 pc = partsCount[n - 1]; 484 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 485 pc *= 2; 486 if (pow5[pc - 1] == 0) 487 pc--; 488 partsCount[n] = pc; 489 } 490 491 if (power & 1) { 492 integerPart *tmp; 493 494 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 495 result += pc; 496 if (p2[result - 1] == 0) 497 result--; 498 499 /* Now result is in p1 with partsCount parts and p2 is scratch 500 space. */ 501 tmp = p1, p1 = p2, p2 = tmp; 502 } 503 504 pow5 += pc; 505 } 506 507 if (p1 != dst) 508 APInt::tcAssign(dst, p1, result); 509 510 return result; 511 } 512 513 /* Zero at the end to avoid modular arithmetic when adding one; used 514 when rounding up during hexadecimal output. */ 515 static const char hexDigitsLower[] = "0123456789abcdef0"; 516 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 517 static const char infinityL[] = "infinity"; 518 static const char infinityU[] = "INFINITY"; 519 static const char NaNL[] = "nan"; 520 static const char NaNU[] = "NAN"; 521 522 /* Write out an integerPart in hexadecimal, starting with the most 523 significant nibble. Write out exactly COUNT hexdigits, return 524 COUNT. */ 525 static unsigned int 526 partAsHex (char *dst, integerPart part, unsigned int count, 527 const char *hexDigitChars) 528 { 529 unsigned int result = count; 530 531 assert(count != 0 && count <= integerPartWidth / 4); 532 533 part >>= (integerPartWidth - 4 * count); 534 while (count--) { 535 dst[count] = hexDigitChars[part & 0xf]; 536 part >>= 4; 537 } 538 539 return result; 540 } 541 542 /* Write out an unsigned decimal integer. */ 543 static char * 544 writeUnsignedDecimal (char *dst, unsigned int n) 545 { 546 char buff[40], *p; 547 548 p = buff; 549 do 550 *p++ = '0' + n % 10; 551 while (n /= 10); 552 553 do 554 *dst++ = *--p; 555 while (p != buff); 556 557 return dst; 558 } 559 560 /* Write out a signed decimal integer. */ 561 static char * 562 writeSignedDecimal (char *dst, int value) 563 { 564 if (value < 0) { 565 *dst++ = '-'; 566 dst = writeUnsignedDecimal(dst, -(unsigned) value); 567 } else 568 dst = writeUnsignedDecimal(dst, value); 569 570 return dst; 571 } 572 573 /* Constructors. */ 574 void 575 APFloat::initialize(const fltSemantics *ourSemantics) 576 { 577 unsigned int count; 578 579 semantics = ourSemantics; 580 count = partCount(); 581 if (count > 1) 582 significand.parts = new integerPart[count]; 583 } 584 585 void 586 APFloat::freeSignificand() 587 { 588 if (needsCleanup()) 589 delete [] significand.parts; 590 } 591 592 void 593 APFloat::assign(const APFloat &rhs) 594 { 595 assert(semantics == rhs.semantics); 596 597 sign = rhs.sign; 598 category = rhs.category; 599 exponent = rhs.exponent; 600 if (isFiniteNonZero() || category == fcNaN) 601 copySignificand(rhs); 602 } 603 604 void 605 APFloat::copySignificand(const APFloat &rhs) 606 { 607 assert(isFiniteNonZero() || category == fcNaN); 608 assert(rhs.partCount() >= partCount()); 609 610 APInt::tcAssign(significandParts(), rhs.significandParts(), 611 partCount()); 612 } 613 614 /* Make this number a NaN, with an arbitrary but deterministic value 615 for the significand. If double or longer, this is a signalling NaN, 616 which may not be ideal. If float, this is QNaN(0). */ 617 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 618 { 619 category = fcNaN; 620 sign = Negative; 621 622 integerPart *significand = significandParts(); 623 unsigned numParts = partCount(); 624 625 // Set the significand bits to the fill. 626 if (!fill || fill->getNumWords() < numParts) 627 APInt::tcSet(significand, 0, numParts); 628 if (fill) { 629 APInt::tcAssign(significand, fill->getRawData(), 630 std::min(fill->getNumWords(), numParts)); 631 632 // Zero out the excess bits of the significand. 633 unsigned bitsToPreserve = semantics->precision - 1; 634 unsigned part = bitsToPreserve / 64; 635 bitsToPreserve %= 64; 636 significand[part] &= ((1ULL << bitsToPreserve) - 1); 637 for (part++; part != numParts; ++part) 638 significand[part] = 0; 639 } 640 641 unsigned QNaNBit = semantics->precision - 2; 642 643 if (SNaN) { 644 // We always have to clear the QNaN bit to make it an SNaN. 645 APInt::tcClearBit(significand, QNaNBit); 646 647 // If there are no bits set in the payload, we have to set 648 // *something* to make it a NaN instead of an infinity; 649 // conventionally, this is the next bit down from the QNaN bit. 650 if (APInt::tcIsZero(significand, numParts)) 651 APInt::tcSetBit(significand, QNaNBit - 1); 652 } else { 653 // We always have to set the QNaN bit to make it a QNaN. 654 APInt::tcSetBit(significand, QNaNBit); 655 } 656 657 // For x87 extended precision, we want to make a NaN, not a 658 // pseudo-NaN. Maybe we should expose the ability to make 659 // pseudo-NaNs? 660 if (semantics == &APFloat::x87DoubleExtended) 661 APInt::tcSetBit(significand, QNaNBit + 1); 662 } 663 664 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 665 const APInt *fill) { 666 APFloat value(Sem, uninitialized); 667 value.makeNaN(SNaN, Negative, fill); 668 return value; 669 } 670 671 APFloat & 672 APFloat::operator=(const APFloat &rhs) 673 { 674 if (this != &rhs) { 675 if (semantics != rhs.semantics) { 676 freeSignificand(); 677 initialize(rhs.semantics); 678 } 679 assign(rhs); 680 } 681 682 return *this; 683 } 684 685 APFloat & 686 APFloat::operator=(APFloat &&rhs) { 687 freeSignificand(); 688 689 semantics = rhs.semantics; 690 significand = rhs.significand; 691 exponent = rhs.exponent; 692 category = rhs.category; 693 sign = rhs.sign; 694 695 rhs.semantics = &Bogus; 696 return *this; 697 } 698 699 bool 700 APFloat::isDenormal() const { 701 return isFiniteNonZero() && (exponent == semantics->minExponent) && 702 (APInt::tcExtractBit(significandParts(), 703 semantics->precision - 1) == 0); 704 } 705 706 bool 707 APFloat::isSmallest() const { 708 // The smallest number by magnitude in our format will be the smallest 709 // denormal, i.e. the floating point number with exponent being minimum 710 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 711 return isFiniteNonZero() && exponent == semantics->minExponent && 712 significandMSB() == 0; 713 } 714 715 bool APFloat::isSignificandAllOnes() const { 716 // Test if the significand excluding the integral bit is all ones. This allows 717 // us to test for binade boundaries. 718 const integerPart *Parts = significandParts(); 719 const unsigned PartCount = partCount(); 720 for (unsigned i = 0; i < PartCount - 1; i++) 721 if (~Parts[i]) 722 return false; 723 724 // Set the unused high bits to all ones when we compare. 725 const unsigned NumHighBits = 726 PartCount*integerPartWidth - semantics->precision + 1; 727 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 728 "fill than integerPartWidth"); 729 const integerPart HighBitFill = 730 ~integerPart(0) << (integerPartWidth - NumHighBits); 731 if (~(Parts[PartCount - 1] | HighBitFill)) 732 return false; 733 734 return true; 735 } 736 737 bool APFloat::isSignificandAllZeros() const { 738 // Test if the significand excluding the integral bit is all zeros. This 739 // allows us to test for binade boundaries. 740 const integerPart *Parts = significandParts(); 741 const unsigned PartCount = partCount(); 742 743 for (unsigned i = 0; i < PartCount - 1; i++) 744 if (Parts[i]) 745 return false; 746 747 const unsigned NumHighBits = 748 PartCount*integerPartWidth - semantics->precision + 1; 749 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 750 "clear than integerPartWidth"); 751 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 752 753 if (Parts[PartCount - 1] & HighBitMask) 754 return false; 755 756 return true; 757 } 758 759 bool 760 APFloat::isLargest() const { 761 // The largest number by magnitude in our format will be the floating point 762 // number with maximum exponent and with significand that is all ones. 763 return isFiniteNonZero() && exponent == semantics->maxExponent 764 && isSignificandAllOnes(); 765 } 766 767 bool 768 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 769 if (this == &rhs) 770 return true; 771 if (semantics != rhs.semantics || 772 category != rhs.category || 773 sign != rhs.sign) 774 return false; 775 if (category==fcZero || category==fcInfinity) 776 return true; 777 else if (isFiniteNonZero() && exponent!=rhs.exponent) 778 return false; 779 else { 780 int i= partCount(); 781 const integerPart* p=significandParts(); 782 const integerPart* q=rhs.significandParts(); 783 for (; i>0; i--, p++, q++) { 784 if (*p != *q) 785 return false; 786 } 787 return true; 788 } 789 } 790 791 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 792 initialize(&ourSemantics); 793 sign = 0; 794 category = fcNormal; 795 zeroSignificand(); 796 exponent = ourSemantics.precision - 1; 797 significandParts()[0] = value; 798 normalize(rmNearestTiesToEven, lfExactlyZero); 799 } 800 801 APFloat::APFloat(const fltSemantics &ourSemantics) { 802 initialize(&ourSemantics); 803 category = fcZero; 804 sign = false; 805 } 806 807 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 808 // Allocates storage if necessary but does not initialize it. 809 initialize(&ourSemantics); 810 } 811 812 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 813 initialize(&ourSemantics); 814 convertFromString(text, rmNearestTiesToEven); 815 } 816 817 APFloat::APFloat(const APFloat &rhs) { 818 initialize(rhs.semantics); 819 assign(rhs); 820 } 821 822 APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) { 823 *this = std::move(rhs); 824 } 825 826 APFloat::~APFloat() 827 { 828 freeSignificand(); 829 } 830 831 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 832 void APFloat::Profile(FoldingSetNodeID& ID) const { 833 ID.Add(bitcastToAPInt()); 834 } 835 836 unsigned int 837 APFloat::partCount() const 838 { 839 return partCountForBits(semantics->precision + 1); 840 } 841 842 unsigned int 843 APFloat::semanticsPrecision(const fltSemantics &semantics) 844 { 845 return semantics.precision; 846 } 847 848 const integerPart * 849 APFloat::significandParts() const 850 { 851 return const_cast<APFloat *>(this)->significandParts(); 852 } 853 854 integerPart * 855 APFloat::significandParts() 856 { 857 if (partCount() > 1) 858 return significand.parts; 859 else 860 return &significand.part; 861 } 862 863 void 864 APFloat::zeroSignificand() 865 { 866 APInt::tcSet(significandParts(), 0, partCount()); 867 } 868 869 /* Increment an fcNormal floating point number's significand. */ 870 void 871 APFloat::incrementSignificand() 872 { 873 integerPart carry; 874 875 carry = APInt::tcIncrement(significandParts(), partCount()); 876 877 /* Our callers should never cause us to overflow. */ 878 assert(carry == 0); 879 (void)carry; 880 } 881 882 /* Add the significand of the RHS. Returns the carry flag. */ 883 integerPart 884 APFloat::addSignificand(const APFloat &rhs) 885 { 886 integerPart *parts; 887 888 parts = significandParts(); 889 890 assert(semantics == rhs.semantics); 891 assert(exponent == rhs.exponent); 892 893 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 894 } 895 896 /* Subtract the significand of the RHS with a borrow flag. Returns 897 the borrow flag. */ 898 integerPart 899 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 900 { 901 integerPart *parts; 902 903 parts = significandParts(); 904 905 assert(semantics == rhs.semantics); 906 assert(exponent == rhs.exponent); 907 908 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 909 partCount()); 910 } 911 912 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 913 on to the full-precision result of the multiplication. Returns the 914 lost fraction. */ 915 lostFraction 916 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 917 { 918 unsigned int omsb; // One, not zero, based MSB. 919 unsigned int partsCount, newPartsCount, precision; 920 integerPart *lhsSignificand; 921 integerPart scratch[4]; 922 integerPart *fullSignificand; 923 lostFraction lost_fraction; 924 bool ignored; 925 926 assert(semantics == rhs.semantics); 927 928 precision = semantics->precision; 929 newPartsCount = partCountForBits(precision * 2); 930 931 if (newPartsCount > 4) 932 fullSignificand = new integerPart[newPartsCount]; 933 else 934 fullSignificand = scratch; 935 936 lhsSignificand = significandParts(); 937 partsCount = partCount(); 938 939 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 940 rhs.significandParts(), partsCount, partsCount); 941 942 lost_fraction = lfExactlyZero; 943 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 944 exponent += rhs.exponent; 945 946 // Assume the operands involved in the multiplication are single-precision 947 // FP, and the two multiplicants are: 948 // *this = a23 . a22 ... a0 * 2^e1 949 // rhs = b23 . b22 ... b0 * 2^e2 950 // the result of multiplication is: 951 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) 952 // Note that there are two significant bits at the left-hand side of the 953 // radix point. Move the radix point toward left by one bit, and adjust 954 // exponent accordingly. 955 exponent += 1; 956 957 if (addend && addend->isNonZero()) { 958 // The intermediate result of the multiplication has "2 * precision" 959 // signicant bit; adjust the addend to be consistent with mul result. 960 // 961 Significand savedSignificand = significand; 962 const fltSemantics *savedSemantics = semantics; 963 fltSemantics extendedSemantics; 964 opStatus status; 965 unsigned int extendedPrecision; 966 967 /* Normalize our MSB. */ 968 extendedPrecision = 2 * precision; 969 if (omsb != extendedPrecision) { 970 assert(extendedPrecision > omsb); 971 APInt::tcShiftLeft(fullSignificand, newPartsCount, 972 extendedPrecision - omsb); 973 exponent -= extendedPrecision - omsb; 974 } 975 976 /* Create new semantics. */ 977 extendedSemantics = *semantics; 978 extendedSemantics.precision = extendedPrecision; 979 980 if (newPartsCount == 1) 981 significand.part = fullSignificand[0]; 982 else 983 significand.parts = fullSignificand; 984 semantics = &extendedSemantics; 985 986 APFloat extendedAddend(*addend); 987 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 988 assert(status == opOK); 989 (void)status; 990 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 991 992 /* Restore our state. */ 993 if (newPartsCount == 1) 994 fullSignificand[0] = significand.part; 995 significand = savedSignificand; 996 semantics = savedSemantics; 997 998 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 999 } 1000 1001 // Convert the result having "2 * precision" significant-bits back to the one 1002 // having "precision" significant-bits. First, move the radix point from 1003 // poision "2*precision - 1" to "precision - 1". The exponent need to be 1004 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 1005 exponent -= precision; 1006 1007 // In case MSB resides at the left-hand side of radix point, shift the 1008 // mantissa right by some amount to make sure the MSB reside right before 1009 // the radix point (i.e. "MSB . rest-significant-bits"). 1010 // 1011 // Note that the result is not normalized when "omsb < precision". So, the 1012 // caller needs to call APFloat::normalize() if normalized value is expected. 1013 if (omsb > precision) { 1014 unsigned int bits, significantParts; 1015 lostFraction lf; 1016 1017 bits = omsb - precision; 1018 significantParts = partCountForBits(omsb); 1019 lf = shiftRight(fullSignificand, significantParts, bits); 1020 lost_fraction = combineLostFractions(lf, lost_fraction); 1021 exponent += bits; 1022 } 1023 1024 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1025 1026 if (newPartsCount > 4) 1027 delete [] fullSignificand; 1028 1029 return lost_fraction; 1030 } 1031 1032 /* Multiply the significands of LHS and RHS to DST. */ 1033 lostFraction 1034 APFloat::divideSignificand(const APFloat &rhs) 1035 { 1036 unsigned int bit, i, partsCount; 1037 const integerPart *rhsSignificand; 1038 integerPart *lhsSignificand, *dividend, *divisor; 1039 integerPart scratch[4]; 1040 lostFraction lost_fraction; 1041 1042 assert(semantics == rhs.semantics); 1043 1044 lhsSignificand = significandParts(); 1045 rhsSignificand = rhs.significandParts(); 1046 partsCount = partCount(); 1047 1048 if (partsCount > 2) 1049 dividend = new integerPart[partsCount * 2]; 1050 else 1051 dividend = scratch; 1052 1053 divisor = dividend + partsCount; 1054 1055 /* Copy the dividend and divisor as they will be modified in-place. */ 1056 for (i = 0; i < partsCount; i++) { 1057 dividend[i] = lhsSignificand[i]; 1058 divisor[i] = rhsSignificand[i]; 1059 lhsSignificand[i] = 0; 1060 } 1061 1062 exponent -= rhs.exponent; 1063 1064 unsigned int precision = semantics->precision; 1065 1066 /* Normalize the divisor. */ 1067 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1068 if (bit) { 1069 exponent += bit; 1070 APInt::tcShiftLeft(divisor, partsCount, bit); 1071 } 1072 1073 /* Normalize the dividend. */ 1074 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1075 if (bit) { 1076 exponent -= bit; 1077 APInt::tcShiftLeft(dividend, partsCount, bit); 1078 } 1079 1080 /* Ensure the dividend >= divisor initially for the loop below. 1081 Incidentally, this means that the division loop below is 1082 guaranteed to set the integer bit to one. */ 1083 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1084 exponent--; 1085 APInt::tcShiftLeft(dividend, partsCount, 1); 1086 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1087 } 1088 1089 /* Long division. */ 1090 for (bit = precision; bit; bit -= 1) { 1091 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1092 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1093 APInt::tcSetBit(lhsSignificand, bit - 1); 1094 } 1095 1096 APInt::tcShiftLeft(dividend, partsCount, 1); 1097 } 1098 1099 /* Figure out the lost fraction. */ 1100 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1101 1102 if (cmp > 0) 1103 lost_fraction = lfMoreThanHalf; 1104 else if (cmp == 0) 1105 lost_fraction = lfExactlyHalf; 1106 else if (APInt::tcIsZero(dividend, partsCount)) 1107 lost_fraction = lfExactlyZero; 1108 else 1109 lost_fraction = lfLessThanHalf; 1110 1111 if (partsCount > 2) 1112 delete [] dividend; 1113 1114 return lost_fraction; 1115 } 1116 1117 unsigned int 1118 APFloat::significandMSB() const 1119 { 1120 return APInt::tcMSB(significandParts(), partCount()); 1121 } 1122 1123 unsigned int 1124 APFloat::significandLSB() const 1125 { 1126 return APInt::tcLSB(significandParts(), partCount()); 1127 } 1128 1129 /* Note that a zero result is NOT normalized to fcZero. */ 1130 lostFraction 1131 APFloat::shiftSignificandRight(unsigned int bits) 1132 { 1133 /* Our exponent should not overflow. */ 1134 assert((ExponentType) (exponent + bits) >= exponent); 1135 1136 exponent += bits; 1137 1138 return shiftRight(significandParts(), partCount(), bits); 1139 } 1140 1141 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1142 void 1143 APFloat::shiftSignificandLeft(unsigned int bits) 1144 { 1145 assert(bits < semantics->precision); 1146 1147 if (bits) { 1148 unsigned int partsCount = partCount(); 1149 1150 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1151 exponent -= bits; 1152 1153 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1154 } 1155 } 1156 1157 APFloat::cmpResult 1158 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1159 { 1160 int compare; 1161 1162 assert(semantics == rhs.semantics); 1163 assert(isFiniteNonZero()); 1164 assert(rhs.isFiniteNonZero()); 1165 1166 compare = exponent - rhs.exponent; 1167 1168 /* If exponents are equal, do an unsigned bignum comparison of the 1169 significands. */ 1170 if (compare == 0) 1171 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1172 partCount()); 1173 1174 if (compare > 0) 1175 return cmpGreaterThan; 1176 else if (compare < 0) 1177 return cmpLessThan; 1178 else 1179 return cmpEqual; 1180 } 1181 1182 /* Handle overflow. Sign is preserved. We either become infinity or 1183 the largest finite number. */ 1184 APFloat::opStatus 1185 APFloat::handleOverflow(roundingMode rounding_mode) 1186 { 1187 /* Infinity? */ 1188 if (rounding_mode == rmNearestTiesToEven || 1189 rounding_mode == rmNearestTiesToAway || 1190 (rounding_mode == rmTowardPositive && !sign) || 1191 (rounding_mode == rmTowardNegative && sign)) { 1192 category = fcInfinity; 1193 return (opStatus) (opOverflow | opInexact); 1194 } 1195 1196 /* Otherwise we become the largest finite number. */ 1197 category = fcNormal; 1198 exponent = semantics->maxExponent; 1199 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1200 semantics->precision); 1201 1202 return opInexact; 1203 } 1204 1205 /* Returns TRUE if, when truncating the current number, with BIT the 1206 new LSB, with the given lost fraction and rounding mode, the result 1207 would need to be rounded away from zero (i.e., by increasing the 1208 signficand). This routine must work for fcZero of both signs, and 1209 fcNormal numbers. */ 1210 bool 1211 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1212 lostFraction lost_fraction, 1213 unsigned int bit) const 1214 { 1215 /* NaNs and infinities should not have lost fractions. */ 1216 assert(isFiniteNonZero() || category == fcZero); 1217 1218 /* Current callers never pass this so we don't handle it. */ 1219 assert(lost_fraction != lfExactlyZero); 1220 1221 switch (rounding_mode) { 1222 case rmNearestTiesToAway: 1223 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1224 1225 case rmNearestTiesToEven: 1226 if (lost_fraction == lfMoreThanHalf) 1227 return true; 1228 1229 /* Our zeroes don't have a significand to test. */ 1230 if (lost_fraction == lfExactlyHalf && category != fcZero) 1231 return APInt::tcExtractBit(significandParts(), bit); 1232 1233 return false; 1234 1235 case rmTowardZero: 1236 return false; 1237 1238 case rmTowardPositive: 1239 return sign == false; 1240 1241 case rmTowardNegative: 1242 return sign == true; 1243 } 1244 llvm_unreachable("Invalid rounding mode found"); 1245 } 1246 1247 APFloat::opStatus 1248 APFloat::normalize(roundingMode rounding_mode, 1249 lostFraction lost_fraction) 1250 { 1251 unsigned int omsb; /* One, not zero, based MSB. */ 1252 int exponentChange; 1253 1254 if (!isFiniteNonZero()) 1255 return opOK; 1256 1257 /* Before rounding normalize the exponent of fcNormal numbers. */ 1258 omsb = significandMSB() + 1; 1259 1260 if (omsb) { 1261 /* OMSB is numbered from 1. We want to place it in the integer 1262 bit numbered PRECISION if possible, with a compensating change in 1263 the exponent. */ 1264 exponentChange = omsb - semantics->precision; 1265 1266 /* If the resulting exponent is too high, overflow according to 1267 the rounding mode. */ 1268 if (exponent + exponentChange > semantics->maxExponent) 1269 return handleOverflow(rounding_mode); 1270 1271 /* Subnormal numbers have exponent minExponent, and their MSB 1272 is forced based on that. */ 1273 if (exponent + exponentChange < semantics->minExponent) 1274 exponentChange = semantics->minExponent - exponent; 1275 1276 /* Shifting left is easy as we don't lose precision. */ 1277 if (exponentChange < 0) { 1278 assert(lost_fraction == lfExactlyZero); 1279 1280 shiftSignificandLeft(-exponentChange); 1281 1282 return opOK; 1283 } 1284 1285 if (exponentChange > 0) { 1286 lostFraction lf; 1287 1288 /* Shift right and capture any new lost fraction. */ 1289 lf = shiftSignificandRight(exponentChange); 1290 1291 lost_fraction = combineLostFractions(lf, lost_fraction); 1292 1293 /* Keep OMSB up-to-date. */ 1294 if (omsb > (unsigned) exponentChange) 1295 omsb -= exponentChange; 1296 else 1297 omsb = 0; 1298 } 1299 } 1300 1301 /* Now round the number according to rounding_mode given the lost 1302 fraction. */ 1303 1304 /* As specified in IEEE 754, since we do not trap we do not report 1305 underflow for exact results. */ 1306 if (lost_fraction == lfExactlyZero) { 1307 /* Canonicalize zeroes. */ 1308 if (omsb == 0) 1309 category = fcZero; 1310 1311 return opOK; 1312 } 1313 1314 /* Increment the significand if we're rounding away from zero. */ 1315 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1316 if (omsb == 0) 1317 exponent = semantics->minExponent; 1318 1319 incrementSignificand(); 1320 omsb = significandMSB() + 1; 1321 1322 /* Did the significand increment overflow? */ 1323 if (omsb == (unsigned) semantics->precision + 1) { 1324 /* Renormalize by incrementing the exponent and shifting our 1325 significand right one. However if we already have the 1326 maximum exponent we overflow to infinity. */ 1327 if (exponent == semantics->maxExponent) { 1328 category = fcInfinity; 1329 1330 return (opStatus) (opOverflow | opInexact); 1331 } 1332 1333 shiftSignificandRight(1); 1334 1335 return opInexact; 1336 } 1337 } 1338 1339 /* The normal case - we were and are not denormal, and any 1340 significand increment above didn't overflow. */ 1341 if (omsb == semantics->precision) 1342 return opInexact; 1343 1344 /* We have a non-zero denormal. */ 1345 assert(omsb < semantics->precision); 1346 1347 /* Canonicalize zeroes. */ 1348 if (omsb == 0) 1349 category = fcZero; 1350 1351 /* The fcZero case is a denormal that underflowed to zero. */ 1352 return (opStatus) (opUnderflow | opInexact); 1353 } 1354 1355 APFloat::opStatus 1356 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1357 { 1358 switch (PackCategoriesIntoKey(category, rhs.category)) { 1359 default: 1360 llvm_unreachable(nullptr); 1361 1362 case PackCategoriesIntoKey(fcNaN, fcZero): 1363 case PackCategoriesIntoKey(fcNaN, fcNormal): 1364 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1365 case PackCategoriesIntoKey(fcNaN, fcNaN): 1366 case PackCategoriesIntoKey(fcNormal, fcZero): 1367 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1368 case PackCategoriesIntoKey(fcInfinity, fcZero): 1369 return opOK; 1370 1371 case PackCategoriesIntoKey(fcZero, fcNaN): 1372 case PackCategoriesIntoKey(fcNormal, fcNaN): 1373 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1374 // We need to be sure to flip the sign here for subtraction because we 1375 // don't have a separate negate operation so -NaN becomes 0 - NaN here. 1376 sign = rhs.sign ^ subtract; 1377 category = fcNaN; 1378 copySignificand(rhs); 1379 return opOK; 1380 1381 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1382 case PackCategoriesIntoKey(fcZero, fcInfinity): 1383 category = fcInfinity; 1384 sign = rhs.sign ^ subtract; 1385 return opOK; 1386 1387 case PackCategoriesIntoKey(fcZero, fcNormal): 1388 assign(rhs); 1389 sign = rhs.sign ^ subtract; 1390 return opOK; 1391 1392 case PackCategoriesIntoKey(fcZero, fcZero): 1393 /* Sign depends on rounding mode; handled by caller. */ 1394 return opOK; 1395 1396 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1397 /* Differently signed infinities can only be validly 1398 subtracted. */ 1399 if (((sign ^ rhs.sign)!=0) != subtract) { 1400 makeNaN(); 1401 return opInvalidOp; 1402 } 1403 1404 return opOK; 1405 1406 case PackCategoriesIntoKey(fcNormal, fcNormal): 1407 return opDivByZero; 1408 } 1409 } 1410 1411 /* Add or subtract two normal numbers. */ 1412 lostFraction 1413 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1414 { 1415 integerPart carry; 1416 lostFraction lost_fraction; 1417 int bits; 1418 1419 /* Determine if the operation on the absolute values is effectively 1420 an addition or subtraction. */ 1421 subtract ^= (sign ^ rhs.sign) ? true : false; 1422 1423 /* Are we bigger exponent-wise than the RHS? */ 1424 bits = exponent - rhs.exponent; 1425 1426 /* Subtraction is more subtle than one might naively expect. */ 1427 if (subtract) { 1428 APFloat temp_rhs(rhs); 1429 bool reverse; 1430 1431 if (bits == 0) { 1432 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1433 lost_fraction = lfExactlyZero; 1434 } else if (bits > 0) { 1435 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1436 shiftSignificandLeft(1); 1437 reverse = false; 1438 } else { 1439 lost_fraction = shiftSignificandRight(-bits - 1); 1440 temp_rhs.shiftSignificandLeft(1); 1441 reverse = true; 1442 } 1443 1444 if (reverse) { 1445 carry = temp_rhs.subtractSignificand 1446 (*this, lost_fraction != lfExactlyZero); 1447 copySignificand(temp_rhs); 1448 sign = !sign; 1449 } else { 1450 carry = subtractSignificand 1451 (temp_rhs, lost_fraction != lfExactlyZero); 1452 } 1453 1454 /* Invert the lost fraction - it was on the RHS and 1455 subtracted. */ 1456 if (lost_fraction == lfLessThanHalf) 1457 lost_fraction = lfMoreThanHalf; 1458 else if (lost_fraction == lfMoreThanHalf) 1459 lost_fraction = lfLessThanHalf; 1460 1461 /* The code above is intended to ensure that no borrow is 1462 necessary. */ 1463 assert(!carry); 1464 (void)carry; 1465 } else { 1466 if (bits > 0) { 1467 APFloat temp_rhs(rhs); 1468 1469 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1470 carry = addSignificand(temp_rhs); 1471 } else { 1472 lost_fraction = shiftSignificandRight(-bits); 1473 carry = addSignificand(rhs); 1474 } 1475 1476 /* We have a guard bit; generating a carry cannot happen. */ 1477 assert(!carry); 1478 (void)carry; 1479 } 1480 1481 return lost_fraction; 1482 } 1483 1484 APFloat::opStatus 1485 APFloat::multiplySpecials(const APFloat &rhs) 1486 { 1487 switch (PackCategoriesIntoKey(category, rhs.category)) { 1488 default: 1489 llvm_unreachable(nullptr); 1490 1491 case PackCategoriesIntoKey(fcNaN, fcZero): 1492 case PackCategoriesIntoKey(fcNaN, fcNormal): 1493 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1494 case PackCategoriesIntoKey(fcNaN, fcNaN): 1495 sign = false; 1496 return opOK; 1497 1498 case PackCategoriesIntoKey(fcZero, fcNaN): 1499 case PackCategoriesIntoKey(fcNormal, fcNaN): 1500 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1501 sign = false; 1502 category = fcNaN; 1503 copySignificand(rhs); 1504 return opOK; 1505 1506 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1507 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1508 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1509 category = fcInfinity; 1510 return opOK; 1511 1512 case PackCategoriesIntoKey(fcZero, fcNormal): 1513 case PackCategoriesIntoKey(fcNormal, fcZero): 1514 case PackCategoriesIntoKey(fcZero, fcZero): 1515 category = fcZero; 1516 return opOK; 1517 1518 case PackCategoriesIntoKey(fcZero, fcInfinity): 1519 case PackCategoriesIntoKey(fcInfinity, fcZero): 1520 makeNaN(); 1521 return opInvalidOp; 1522 1523 case PackCategoriesIntoKey(fcNormal, fcNormal): 1524 return opOK; 1525 } 1526 } 1527 1528 APFloat::opStatus 1529 APFloat::divideSpecials(const APFloat &rhs) 1530 { 1531 switch (PackCategoriesIntoKey(category, rhs.category)) { 1532 default: 1533 llvm_unreachable(nullptr); 1534 1535 case PackCategoriesIntoKey(fcZero, fcNaN): 1536 case PackCategoriesIntoKey(fcNormal, fcNaN): 1537 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1538 category = fcNaN; 1539 copySignificand(rhs); 1540 case PackCategoriesIntoKey(fcNaN, fcZero): 1541 case PackCategoriesIntoKey(fcNaN, fcNormal): 1542 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1543 case PackCategoriesIntoKey(fcNaN, fcNaN): 1544 sign = false; 1545 case PackCategoriesIntoKey(fcInfinity, fcZero): 1546 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1547 case PackCategoriesIntoKey(fcZero, fcInfinity): 1548 case PackCategoriesIntoKey(fcZero, fcNormal): 1549 return opOK; 1550 1551 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1552 category = fcZero; 1553 return opOK; 1554 1555 case PackCategoriesIntoKey(fcNormal, fcZero): 1556 category = fcInfinity; 1557 return opDivByZero; 1558 1559 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1560 case PackCategoriesIntoKey(fcZero, fcZero): 1561 makeNaN(); 1562 return opInvalidOp; 1563 1564 case PackCategoriesIntoKey(fcNormal, fcNormal): 1565 return opOK; 1566 } 1567 } 1568 1569 APFloat::opStatus 1570 APFloat::modSpecials(const APFloat &rhs) 1571 { 1572 switch (PackCategoriesIntoKey(category, rhs.category)) { 1573 default: 1574 llvm_unreachable(nullptr); 1575 1576 case PackCategoriesIntoKey(fcNaN, fcZero): 1577 case PackCategoriesIntoKey(fcNaN, fcNormal): 1578 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1579 case PackCategoriesIntoKey(fcNaN, fcNaN): 1580 case PackCategoriesIntoKey(fcZero, fcInfinity): 1581 case PackCategoriesIntoKey(fcZero, fcNormal): 1582 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1583 return opOK; 1584 1585 case PackCategoriesIntoKey(fcZero, fcNaN): 1586 case PackCategoriesIntoKey(fcNormal, fcNaN): 1587 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1588 sign = false; 1589 category = fcNaN; 1590 copySignificand(rhs); 1591 return opOK; 1592 1593 case PackCategoriesIntoKey(fcNormal, fcZero): 1594 case PackCategoriesIntoKey(fcInfinity, fcZero): 1595 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1596 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1597 case PackCategoriesIntoKey(fcZero, fcZero): 1598 makeNaN(); 1599 return opInvalidOp; 1600 1601 case PackCategoriesIntoKey(fcNormal, fcNormal): 1602 return opOK; 1603 } 1604 } 1605 1606 /* Change sign. */ 1607 void 1608 APFloat::changeSign() 1609 { 1610 /* Look mummy, this one's easy. */ 1611 sign = !sign; 1612 } 1613 1614 void 1615 APFloat::clearSign() 1616 { 1617 /* So is this one. */ 1618 sign = 0; 1619 } 1620 1621 void 1622 APFloat::copySign(const APFloat &rhs) 1623 { 1624 /* And this one. */ 1625 sign = rhs.sign; 1626 } 1627 1628 /* Normalized addition or subtraction. */ 1629 APFloat::opStatus 1630 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1631 bool subtract) 1632 { 1633 opStatus fs; 1634 1635 fs = addOrSubtractSpecials(rhs, subtract); 1636 1637 /* This return code means it was not a simple case. */ 1638 if (fs == opDivByZero) { 1639 lostFraction lost_fraction; 1640 1641 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1642 fs = normalize(rounding_mode, lost_fraction); 1643 1644 /* Can only be zero if we lost no fraction. */ 1645 assert(category != fcZero || lost_fraction == lfExactlyZero); 1646 } 1647 1648 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1649 positive zero unless rounding to minus infinity, except that 1650 adding two like-signed zeroes gives that zero. */ 1651 if (category == fcZero) { 1652 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1653 sign = (rounding_mode == rmTowardNegative); 1654 } 1655 1656 return fs; 1657 } 1658 1659 /* Normalized addition. */ 1660 APFloat::opStatus 1661 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1662 { 1663 return addOrSubtract(rhs, rounding_mode, false); 1664 } 1665 1666 /* Normalized subtraction. */ 1667 APFloat::opStatus 1668 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1669 { 1670 return addOrSubtract(rhs, rounding_mode, true); 1671 } 1672 1673 /* Normalized multiply. */ 1674 APFloat::opStatus 1675 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1676 { 1677 opStatus fs; 1678 1679 sign ^= rhs.sign; 1680 fs = multiplySpecials(rhs); 1681 1682 if (isFiniteNonZero()) { 1683 lostFraction lost_fraction = multiplySignificand(rhs, nullptr); 1684 fs = normalize(rounding_mode, lost_fraction); 1685 if (lost_fraction != lfExactlyZero) 1686 fs = (opStatus) (fs | opInexact); 1687 } 1688 1689 return fs; 1690 } 1691 1692 /* Normalized divide. */ 1693 APFloat::opStatus 1694 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1695 { 1696 opStatus fs; 1697 1698 sign ^= rhs.sign; 1699 fs = divideSpecials(rhs); 1700 1701 if (isFiniteNonZero()) { 1702 lostFraction lost_fraction = divideSignificand(rhs); 1703 fs = normalize(rounding_mode, lost_fraction); 1704 if (lost_fraction != lfExactlyZero) 1705 fs = (opStatus) (fs | opInexact); 1706 } 1707 1708 return fs; 1709 } 1710 1711 /* Normalized remainder. This is not currently correct in all cases. */ 1712 APFloat::opStatus 1713 APFloat::remainder(const APFloat &rhs) 1714 { 1715 opStatus fs; 1716 APFloat V = *this; 1717 unsigned int origSign = sign; 1718 1719 fs = V.divide(rhs, rmNearestTiesToEven); 1720 if (fs == opDivByZero) 1721 return fs; 1722 1723 int parts = partCount(); 1724 integerPart *x = new integerPart[parts]; 1725 bool ignored; 1726 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1727 rmNearestTiesToEven, &ignored); 1728 if (fs==opInvalidOp) 1729 return fs; 1730 1731 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1732 rmNearestTiesToEven); 1733 assert(fs==opOK); // should always work 1734 1735 fs = V.multiply(rhs, rmNearestTiesToEven); 1736 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1737 1738 fs = subtract(V, rmNearestTiesToEven); 1739 assert(fs==opOK || fs==opInexact); // likewise 1740 1741 if (isZero()) 1742 sign = origSign; // IEEE754 requires this 1743 delete[] x; 1744 return fs; 1745 } 1746 1747 /* Normalized llvm frem (C fmod). 1748 This is not currently correct in all cases. */ 1749 APFloat::opStatus 1750 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1751 { 1752 opStatus fs; 1753 fs = modSpecials(rhs); 1754 1755 if (isFiniteNonZero() && rhs.isFiniteNonZero()) { 1756 APFloat V = *this; 1757 unsigned int origSign = sign; 1758 1759 fs = V.divide(rhs, rmNearestTiesToEven); 1760 if (fs == opDivByZero) 1761 return fs; 1762 1763 int parts = partCount(); 1764 integerPart *x = new integerPart[parts]; 1765 bool ignored; 1766 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1767 rmTowardZero, &ignored); 1768 if (fs==opInvalidOp) 1769 return fs; 1770 1771 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1772 rmNearestTiesToEven); 1773 assert(fs==opOK); // should always work 1774 1775 fs = V.multiply(rhs, rounding_mode); 1776 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1777 1778 fs = subtract(V, rounding_mode); 1779 assert(fs==opOK || fs==opInexact); // likewise 1780 1781 if (isZero()) 1782 sign = origSign; // IEEE754 requires this 1783 delete[] x; 1784 } 1785 return fs; 1786 } 1787 1788 /* Normalized fused-multiply-add. */ 1789 APFloat::opStatus 1790 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1791 const APFloat &addend, 1792 roundingMode rounding_mode) 1793 { 1794 opStatus fs; 1795 1796 /* Post-multiplication sign, before addition. */ 1797 sign ^= multiplicand.sign; 1798 1799 /* If and only if all arguments are normal do we need to do an 1800 extended-precision calculation. */ 1801 if (isFiniteNonZero() && 1802 multiplicand.isFiniteNonZero() && 1803 addend.isFinite()) { 1804 lostFraction lost_fraction; 1805 1806 lost_fraction = multiplySignificand(multiplicand, &addend); 1807 fs = normalize(rounding_mode, lost_fraction); 1808 if (lost_fraction != lfExactlyZero) 1809 fs = (opStatus) (fs | opInexact); 1810 1811 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1812 positive zero unless rounding to minus infinity, except that 1813 adding two like-signed zeroes gives that zero. */ 1814 if (category == fcZero && sign != addend.sign) 1815 sign = (rounding_mode == rmTowardNegative); 1816 } else { 1817 fs = multiplySpecials(multiplicand); 1818 1819 /* FS can only be opOK or opInvalidOp. There is no more work 1820 to do in the latter case. The IEEE-754R standard says it is 1821 implementation-defined in this case whether, if ADDEND is a 1822 quiet NaN, we raise invalid op; this implementation does so. 1823 1824 If we need to do the addition we can do so with normal 1825 precision. */ 1826 if (fs == opOK) 1827 fs = addOrSubtract(addend, rounding_mode, false); 1828 } 1829 1830 return fs; 1831 } 1832 1833 /* Rounding-mode corrrect round to integral value. */ 1834 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1835 opStatus fs; 1836 1837 // If the exponent is large enough, we know that this value is already 1838 // integral, and the arithmetic below would potentially cause it to saturate 1839 // to +/-Inf. Bail out early instead. 1840 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) 1841 return opOK; 1842 1843 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1844 // precision of our format, and then subtract it back off again. The choice 1845 // of rounding modes for the addition/subtraction determines the rounding mode 1846 // for our integral rounding as well. 1847 // NOTE: When the input value is negative, we do subtraction followed by 1848 // addition instead. 1849 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1850 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1851 APFloat MagicConstant(*semantics); 1852 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1853 rmNearestTiesToEven); 1854 MagicConstant.copySign(*this); 1855 1856 if (fs != opOK) 1857 return fs; 1858 1859 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1860 bool inputSign = isNegative(); 1861 1862 fs = add(MagicConstant, rounding_mode); 1863 if (fs != opOK && fs != opInexact) 1864 return fs; 1865 1866 fs = subtract(MagicConstant, rounding_mode); 1867 1868 // Restore the input sign. 1869 if (inputSign != isNegative()) 1870 changeSign(); 1871 1872 return fs; 1873 } 1874 1875 1876 /* Comparison requires normalized numbers. */ 1877 APFloat::cmpResult 1878 APFloat::compare(const APFloat &rhs) const 1879 { 1880 cmpResult result; 1881 1882 assert(semantics == rhs.semantics); 1883 1884 switch (PackCategoriesIntoKey(category, rhs.category)) { 1885 default: 1886 llvm_unreachable(nullptr); 1887 1888 case PackCategoriesIntoKey(fcNaN, fcZero): 1889 case PackCategoriesIntoKey(fcNaN, fcNormal): 1890 case PackCategoriesIntoKey(fcNaN, fcInfinity): 1891 case PackCategoriesIntoKey(fcNaN, fcNaN): 1892 case PackCategoriesIntoKey(fcZero, fcNaN): 1893 case PackCategoriesIntoKey(fcNormal, fcNaN): 1894 case PackCategoriesIntoKey(fcInfinity, fcNaN): 1895 return cmpUnordered; 1896 1897 case PackCategoriesIntoKey(fcInfinity, fcNormal): 1898 case PackCategoriesIntoKey(fcInfinity, fcZero): 1899 case PackCategoriesIntoKey(fcNormal, fcZero): 1900 if (sign) 1901 return cmpLessThan; 1902 else 1903 return cmpGreaterThan; 1904 1905 case PackCategoriesIntoKey(fcNormal, fcInfinity): 1906 case PackCategoriesIntoKey(fcZero, fcInfinity): 1907 case PackCategoriesIntoKey(fcZero, fcNormal): 1908 if (rhs.sign) 1909 return cmpGreaterThan; 1910 else 1911 return cmpLessThan; 1912 1913 case PackCategoriesIntoKey(fcInfinity, fcInfinity): 1914 if (sign == rhs.sign) 1915 return cmpEqual; 1916 else if (sign) 1917 return cmpLessThan; 1918 else 1919 return cmpGreaterThan; 1920 1921 case PackCategoriesIntoKey(fcZero, fcZero): 1922 return cmpEqual; 1923 1924 case PackCategoriesIntoKey(fcNormal, fcNormal): 1925 break; 1926 } 1927 1928 /* Two normal numbers. Do they have the same sign? */ 1929 if (sign != rhs.sign) { 1930 if (sign) 1931 result = cmpLessThan; 1932 else 1933 result = cmpGreaterThan; 1934 } else { 1935 /* Compare absolute values; invert result if negative. */ 1936 result = compareAbsoluteValue(rhs); 1937 1938 if (sign) { 1939 if (result == cmpLessThan) 1940 result = cmpGreaterThan; 1941 else if (result == cmpGreaterThan) 1942 result = cmpLessThan; 1943 } 1944 } 1945 1946 return result; 1947 } 1948 1949 /// APFloat::convert - convert a value of one floating point type to another. 1950 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1951 /// records whether the transformation lost information, i.e. whether 1952 /// converting the result back to the original type will produce the 1953 /// original value (this is almost the same as return value==fsOK, but there 1954 /// are edge cases where this is not so). 1955 1956 APFloat::opStatus 1957 APFloat::convert(const fltSemantics &toSemantics, 1958 roundingMode rounding_mode, bool *losesInfo) 1959 { 1960 lostFraction lostFraction; 1961 unsigned int newPartCount, oldPartCount; 1962 opStatus fs; 1963 int shift; 1964 const fltSemantics &fromSemantics = *semantics; 1965 1966 lostFraction = lfExactlyZero; 1967 newPartCount = partCountForBits(toSemantics.precision + 1); 1968 oldPartCount = partCount(); 1969 shift = toSemantics.precision - fromSemantics.precision; 1970 1971 bool X86SpecialNan = false; 1972 if (&fromSemantics == &APFloat::x87DoubleExtended && 1973 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1974 (!(*significandParts() & 0x8000000000000000ULL) || 1975 !(*significandParts() & 0x4000000000000000ULL))) { 1976 // x86 has some unusual NaNs which cannot be represented in any other 1977 // format; note them here. 1978 X86SpecialNan = true; 1979 } 1980 1981 // If this is a truncation of a denormal number, and the target semantics 1982 // has larger exponent range than the source semantics (this can happen 1983 // when truncating from PowerPC double-double to double format), the 1984 // right shift could lose result mantissa bits. Adjust exponent instead 1985 // of performing excessive shift. 1986 if (shift < 0 && isFiniteNonZero()) { 1987 int exponentChange = significandMSB() + 1 - fromSemantics.precision; 1988 if (exponent + exponentChange < toSemantics.minExponent) 1989 exponentChange = toSemantics.minExponent - exponent; 1990 if (exponentChange < shift) 1991 exponentChange = shift; 1992 if (exponentChange < 0) { 1993 shift -= exponentChange; 1994 exponent += exponentChange; 1995 } 1996 } 1997 1998 // If this is a truncation, perform the shift before we narrow the storage. 1999 if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) 2000 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 2001 2002 // Fix the storage so it can hold to new value. 2003 if (newPartCount > oldPartCount) { 2004 // The new type requires more storage; make it available. 2005 integerPart *newParts; 2006 newParts = new integerPart[newPartCount]; 2007 APInt::tcSet(newParts, 0, newPartCount); 2008 if (isFiniteNonZero() || category==fcNaN) 2009 APInt::tcAssign(newParts, significandParts(), oldPartCount); 2010 freeSignificand(); 2011 significand.parts = newParts; 2012 } else if (newPartCount == 1 && oldPartCount != 1) { 2013 // Switch to built-in storage for a single part. 2014 integerPart newPart = 0; 2015 if (isFiniteNonZero() || category==fcNaN) 2016 newPart = significandParts()[0]; 2017 freeSignificand(); 2018 significand.part = newPart; 2019 } 2020 2021 // Now that we have the right storage, switch the semantics. 2022 semantics = &toSemantics; 2023 2024 // If this is an extension, perform the shift now that the storage is 2025 // available. 2026 if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) 2027 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 2028 2029 if (isFiniteNonZero()) { 2030 fs = normalize(rounding_mode, lostFraction); 2031 *losesInfo = (fs != opOK); 2032 } else if (category == fcNaN) { 2033 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2034 2035 // For x87 extended precision, we want to make a NaN, not a special NaN if 2036 // the input wasn't special either. 2037 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2038 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2039 2040 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2041 // does not give you back the same bits. This is dubious, and we 2042 // don't currently do it. You're really supposed to get 2043 // an invalid operation signal at runtime, but nobody does that. 2044 fs = opOK; 2045 } else { 2046 *losesInfo = false; 2047 fs = opOK; 2048 } 2049 2050 return fs; 2051 } 2052 2053 /* Convert a floating point number to an integer according to the 2054 rounding mode. If the rounded integer value is out of range this 2055 returns an invalid operation exception and the contents of the 2056 destination parts are unspecified. If the rounded value is in 2057 range but the floating point number is not the exact integer, the C 2058 standard doesn't require an inexact exception to be raised. IEEE 2059 854 does require it so we do that. 2060 2061 Note that for conversions to integer type the C standard requires 2062 round-to-zero to always be used. */ 2063 APFloat::opStatus 2064 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2065 bool isSigned, 2066 roundingMode rounding_mode, 2067 bool *isExact) const 2068 { 2069 lostFraction lost_fraction; 2070 const integerPart *src; 2071 unsigned int dstPartsCount, truncatedBits; 2072 2073 *isExact = false; 2074 2075 /* Handle the three special cases first. */ 2076 if (category == fcInfinity || category == fcNaN) 2077 return opInvalidOp; 2078 2079 dstPartsCount = partCountForBits(width); 2080 2081 if (category == fcZero) { 2082 APInt::tcSet(parts, 0, dstPartsCount); 2083 // Negative zero can't be represented as an int. 2084 *isExact = !sign; 2085 return opOK; 2086 } 2087 2088 src = significandParts(); 2089 2090 /* Step 1: place our absolute value, with any fraction truncated, in 2091 the destination. */ 2092 if (exponent < 0) { 2093 /* Our absolute value is less than one; truncate everything. */ 2094 APInt::tcSet(parts, 0, dstPartsCount); 2095 /* For exponent -1 the integer bit represents .5, look at that. 2096 For smaller exponents leftmost truncated bit is 0. */ 2097 truncatedBits = semantics->precision -1U - exponent; 2098 } else { 2099 /* We want the most significant (exponent + 1) bits; the rest are 2100 truncated. */ 2101 unsigned int bits = exponent + 1U; 2102 2103 /* Hopelessly large in magnitude? */ 2104 if (bits > width) 2105 return opInvalidOp; 2106 2107 if (bits < semantics->precision) { 2108 /* We truncate (semantics->precision - bits) bits. */ 2109 truncatedBits = semantics->precision - bits; 2110 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2111 } else { 2112 /* We want at least as many bits as are available. */ 2113 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2114 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2115 truncatedBits = 0; 2116 } 2117 } 2118 2119 /* Step 2: work out any lost fraction, and increment the absolute 2120 value if we would round away from zero. */ 2121 if (truncatedBits) { 2122 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2123 truncatedBits); 2124 if (lost_fraction != lfExactlyZero && 2125 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2126 if (APInt::tcIncrement(parts, dstPartsCount)) 2127 return opInvalidOp; /* Overflow. */ 2128 } 2129 } else { 2130 lost_fraction = lfExactlyZero; 2131 } 2132 2133 /* Step 3: check if we fit in the destination. */ 2134 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2135 2136 if (sign) { 2137 if (!isSigned) { 2138 /* Negative numbers cannot be represented as unsigned. */ 2139 if (omsb != 0) 2140 return opInvalidOp; 2141 } else { 2142 /* It takes omsb bits to represent the unsigned integer value. 2143 We lose a bit for the sign, but care is needed as the 2144 maximally negative integer is a special case. */ 2145 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2146 return opInvalidOp; 2147 2148 /* This case can happen because of rounding. */ 2149 if (omsb > width) 2150 return opInvalidOp; 2151 } 2152 2153 APInt::tcNegate (parts, dstPartsCount); 2154 } else { 2155 if (omsb >= width + !isSigned) 2156 return opInvalidOp; 2157 } 2158 2159 if (lost_fraction == lfExactlyZero) { 2160 *isExact = true; 2161 return opOK; 2162 } else 2163 return opInexact; 2164 } 2165 2166 /* Same as convertToSignExtendedInteger, except we provide 2167 deterministic values in case of an invalid operation exception, 2168 namely zero for NaNs and the minimal or maximal value respectively 2169 for underflow or overflow. 2170 The *isExact output tells whether the result is exact, in the sense 2171 that converting it back to the original floating point type produces 2172 the original value. This is almost equivalent to result==opOK, 2173 except for negative zeroes. 2174 */ 2175 APFloat::opStatus 2176 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2177 bool isSigned, 2178 roundingMode rounding_mode, bool *isExact) const 2179 { 2180 opStatus fs; 2181 2182 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2183 isExact); 2184 2185 if (fs == opInvalidOp) { 2186 unsigned int bits, dstPartsCount; 2187 2188 dstPartsCount = partCountForBits(width); 2189 2190 if (category == fcNaN) 2191 bits = 0; 2192 else if (sign) 2193 bits = isSigned; 2194 else 2195 bits = width - isSigned; 2196 2197 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2198 if (sign && isSigned) 2199 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2200 } 2201 2202 return fs; 2203 } 2204 2205 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2206 an APSInt, whose initial bit-width and signed-ness are used to determine the 2207 precision of the conversion. 2208 */ 2209 APFloat::opStatus 2210 APFloat::convertToInteger(APSInt &result, 2211 roundingMode rounding_mode, bool *isExact) const 2212 { 2213 unsigned bitWidth = result.getBitWidth(); 2214 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2215 opStatus status = convertToInteger( 2216 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2217 // Keeps the original signed-ness. 2218 result = APInt(bitWidth, parts); 2219 return status; 2220 } 2221 2222 /* Convert an unsigned integer SRC to a floating point number, 2223 rounding according to ROUNDING_MODE. The sign of the floating 2224 point number is not modified. */ 2225 APFloat::opStatus 2226 APFloat::convertFromUnsignedParts(const integerPart *src, 2227 unsigned int srcCount, 2228 roundingMode rounding_mode) 2229 { 2230 unsigned int omsb, precision, dstCount; 2231 integerPart *dst; 2232 lostFraction lost_fraction; 2233 2234 category = fcNormal; 2235 omsb = APInt::tcMSB(src, srcCount) + 1; 2236 dst = significandParts(); 2237 dstCount = partCount(); 2238 precision = semantics->precision; 2239 2240 /* We want the most significant PRECISION bits of SRC. There may not 2241 be that many; extract what we can. */ 2242 if (precision <= omsb) { 2243 exponent = omsb - 1; 2244 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2245 omsb - precision); 2246 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2247 } else { 2248 exponent = precision - 1; 2249 lost_fraction = lfExactlyZero; 2250 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2251 } 2252 2253 return normalize(rounding_mode, lost_fraction); 2254 } 2255 2256 APFloat::opStatus 2257 APFloat::convertFromAPInt(const APInt &Val, 2258 bool isSigned, 2259 roundingMode rounding_mode) 2260 { 2261 unsigned int partCount = Val.getNumWords(); 2262 APInt api = Val; 2263 2264 sign = false; 2265 if (isSigned && api.isNegative()) { 2266 sign = true; 2267 api = -api; 2268 } 2269 2270 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2271 } 2272 2273 /* Convert a two's complement integer SRC to a floating point number, 2274 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2275 integer is signed, in which case it must be sign-extended. */ 2276 APFloat::opStatus 2277 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2278 unsigned int srcCount, 2279 bool isSigned, 2280 roundingMode rounding_mode) 2281 { 2282 opStatus status; 2283 2284 if (isSigned && 2285 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2286 integerPart *copy; 2287 2288 /* If we're signed and negative negate a copy. */ 2289 sign = true; 2290 copy = new integerPart[srcCount]; 2291 APInt::tcAssign(copy, src, srcCount); 2292 APInt::tcNegate(copy, srcCount); 2293 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2294 delete [] copy; 2295 } else { 2296 sign = false; 2297 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2298 } 2299 2300 return status; 2301 } 2302 2303 /* FIXME: should this just take a const APInt reference? */ 2304 APFloat::opStatus 2305 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2306 unsigned int width, bool isSigned, 2307 roundingMode rounding_mode) 2308 { 2309 unsigned int partCount = partCountForBits(width); 2310 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2311 2312 sign = false; 2313 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2314 sign = true; 2315 api = -api; 2316 } 2317 2318 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2319 } 2320 2321 APFloat::opStatus 2322 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2323 { 2324 lostFraction lost_fraction = lfExactlyZero; 2325 2326 category = fcNormal; 2327 zeroSignificand(); 2328 exponent = 0; 2329 2330 integerPart *significand = significandParts(); 2331 unsigned partsCount = partCount(); 2332 unsigned bitPos = partsCount * integerPartWidth; 2333 bool computedTrailingFraction = false; 2334 2335 // Skip leading zeroes and any (hexa)decimal point. 2336 StringRef::iterator begin = s.begin(); 2337 StringRef::iterator end = s.end(); 2338 StringRef::iterator dot; 2339 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2340 StringRef::iterator firstSignificantDigit = p; 2341 2342 while (p != end) { 2343 integerPart hex_value; 2344 2345 if (*p == '.') { 2346 assert(dot == end && "String contains multiple dots"); 2347 dot = p++; 2348 continue; 2349 } 2350 2351 hex_value = hexDigitValue(*p); 2352 if (hex_value == -1U) 2353 break; 2354 2355 p++; 2356 2357 // Store the number while we have space. 2358 if (bitPos) { 2359 bitPos -= 4; 2360 hex_value <<= bitPos % integerPartWidth; 2361 significand[bitPos / integerPartWidth] |= hex_value; 2362 } else if (!computedTrailingFraction) { 2363 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2364 computedTrailingFraction = true; 2365 } 2366 } 2367 2368 /* Hex floats require an exponent but not a hexadecimal point. */ 2369 assert(p != end && "Hex strings require an exponent"); 2370 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2371 assert(p != begin && "Significand has no digits"); 2372 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2373 2374 /* Ignore the exponent if we are zero. */ 2375 if (p != firstSignificantDigit) { 2376 int expAdjustment; 2377 2378 /* Implicit hexadecimal point? */ 2379 if (dot == end) 2380 dot = p; 2381 2382 /* Calculate the exponent adjustment implicit in the number of 2383 significant digits. */ 2384 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2385 if (expAdjustment < 0) 2386 expAdjustment++; 2387 expAdjustment = expAdjustment * 4 - 1; 2388 2389 /* Adjust for writing the significand starting at the most 2390 significant nibble. */ 2391 expAdjustment += semantics->precision; 2392 expAdjustment -= partsCount * integerPartWidth; 2393 2394 /* Adjust for the given exponent. */ 2395 exponent = totalExponent(p + 1, end, expAdjustment); 2396 } 2397 2398 return normalize(rounding_mode, lost_fraction); 2399 } 2400 2401 APFloat::opStatus 2402 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2403 unsigned sigPartCount, int exp, 2404 roundingMode rounding_mode) 2405 { 2406 unsigned int parts, pow5PartCount; 2407 fltSemantics calcSemantics = { 32767, -32767, 0 }; 2408 integerPart pow5Parts[maxPowerOfFiveParts]; 2409 bool isNearest; 2410 2411 isNearest = (rounding_mode == rmNearestTiesToEven || 2412 rounding_mode == rmNearestTiesToAway); 2413 2414 parts = partCountForBits(semantics->precision + 11); 2415 2416 /* Calculate pow(5, abs(exp)). */ 2417 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2418 2419 for (;; parts *= 2) { 2420 opStatus sigStatus, powStatus; 2421 unsigned int excessPrecision, truncatedBits; 2422 2423 calcSemantics.precision = parts * integerPartWidth - 1; 2424 excessPrecision = calcSemantics.precision - semantics->precision; 2425 truncatedBits = excessPrecision; 2426 2427 APFloat decSig = APFloat::getZero(calcSemantics, sign); 2428 APFloat pow5(calcSemantics); 2429 2430 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2431 rmNearestTiesToEven); 2432 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2433 rmNearestTiesToEven); 2434 /* Add exp, as 10^n = 5^n * 2^n. */ 2435 decSig.exponent += exp; 2436 2437 lostFraction calcLostFraction; 2438 integerPart HUerr, HUdistance; 2439 unsigned int powHUerr; 2440 2441 if (exp >= 0) { 2442 /* multiplySignificand leaves the precision-th bit set to 1. */ 2443 calcLostFraction = decSig.multiplySignificand(pow5, nullptr); 2444 powHUerr = powStatus != opOK; 2445 } else { 2446 calcLostFraction = decSig.divideSignificand(pow5); 2447 /* Denormal numbers have less precision. */ 2448 if (decSig.exponent < semantics->minExponent) { 2449 excessPrecision += (semantics->minExponent - decSig.exponent); 2450 truncatedBits = excessPrecision; 2451 if (excessPrecision > calcSemantics.precision) 2452 excessPrecision = calcSemantics.precision; 2453 } 2454 /* Extra half-ulp lost in reciprocal of exponent. */ 2455 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2456 } 2457 2458 /* Both multiplySignificand and divideSignificand return the 2459 result with the integer bit set. */ 2460 assert(APInt::tcExtractBit 2461 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2462 2463 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2464 powHUerr); 2465 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2466 excessPrecision, isNearest); 2467 2468 /* Are we guaranteed to round correctly if we truncate? */ 2469 if (HUdistance >= HUerr) { 2470 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2471 calcSemantics.precision - excessPrecision, 2472 excessPrecision); 2473 /* Take the exponent of decSig. If we tcExtract-ed less bits 2474 above we must adjust our exponent to compensate for the 2475 implicit right shift. */ 2476 exponent = (decSig.exponent + semantics->precision 2477 - (calcSemantics.precision - excessPrecision)); 2478 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2479 decSig.partCount(), 2480 truncatedBits); 2481 return normalize(rounding_mode, calcLostFraction); 2482 } 2483 } 2484 } 2485 2486 APFloat::opStatus 2487 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2488 { 2489 decimalInfo D; 2490 opStatus fs; 2491 2492 /* Scan the text. */ 2493 StringRef::iterator p = str.begin(); 2494 interpretDecimal(p, str.end(), &D); 2495 2496 /* Handle the quick cases. First the case of no significant digits, 2497 i.e. zero, and then exponents that are obviously too large or too 2498 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2499 definitely overflows if 2500 2501 (exp - 1) * L >= maxExponent 2502 2503 and definitely underflows to zero where 2504 2505 (exp + 1) * L <= minExponent - precision 2506 2507 With integer arithmetic the tightest bounds for L are 2508 2509 93/28 < L < 196/59 [ numerator <= 256 ] 2510 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2511 */ 2512 2513 // Test if we have a zero number allowing for strings with no null terminators 2514 // and zero decimals with non-zero exponents. 2515 // 2516 // We computed firstSigDigit by ignoring all zeros and dots. Thus if 2517 // D->firstSigDigit equals str.end(), every digit must be a zero and there can 2518 // be at most one dot. On the other hand, if we have a zero with a non-zero 2519 // exponent, then we know that D.firstSigDigit will be non-numeric. 2520 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { 2521 category = fcZero; 2522 fs = opOK; 2523 2524 /* Check whether the normalized exponent is high enough to overflow 2525 max during the log-rebasing in the max-exponent check below. */ 2526 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2527 fs = handleOverflow(rounding_mode); 2528 2529 /* If it wasn't, then it also wasn't high enough to overflow max 2530 during the log-rebasing in the min-exponent check. Check that it 2531 won't overflow min in either check, then perform the min-exponent 2532 check. */ 2533 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2534 (D.normalizedExponent + 1) * 28738 <= 2535 8651 * (semantics->minExponent - (int) semantics->precision)) { 2536 /* Underflow to zero and round. */ 2537 category = fcNormal; 2538 zeroSignificand(); 2539 fs = normalize(rounding_mode, lfLessThanHalf); 2540 2541 /* We can finally safely perform the max-exponent check. */ 2542 } else if ((D.normalizedExponent - 1) * 42039 2543 >= 12655 * semantics->maxExponent) { 2544 /* Overflow and round. */ 2545 fs = handleOverflow(rounding_mode); 2546 } else { 2547 integerPart *decSignificand; 2548 unsigned int partCount; 2549 2550 /* A tight upper bound on number of bits required to hold an 2551 N-digit decimal integer is N * 196 / 59. Allocate enough space 2552 to hold the full significand, and an extra part required by 2553 tcMultiplyPart. */ 2554 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2555 partCount = partCountForBits(1 + 196 * partCount / 59); 2556 decSignificand = new integerPart[partCount + 1]; 2557 partCount = 0; 2558 2559 /* Convert to binary efficiently - we do almost all multiplication 2560 in an integerPart. When this would overflow do we do a single 2561 bignum multiplication, and then revert again to multiplication 2562 in an integerPart. */ 2563 do { 2564 integerPart decValue, val, multiplier; 2565 2566 val = 0; 2567 multiplier = 1; 2568 2569 do { 2570 if (*p == '.') { 2571 p++; 2572 if (p == str.end()) { 2573 break; 2574 } 2575 } 2576 decValue = decDigitValue(*p++); 2577 assert(decValue < 10U && "Invalid character in significand"); 2578 multiplier *= 10; 2579 val = val * 10 + decValue; 2580 /* The maximum number that can be multiplied by ten with any 2581 digit added without overflowing an integerPart. */ 2582 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2583 2584 /* Multiply out the current part. */ 2585 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2586 partCount, partCount + 1, false); 2587 2588 /* If we used another part (likely but not guaranteed), increase 2589 the count. */ 2590 if (decSignificand[partCount]) 2591 partCount++; 2592 } while (p <= D.lastSigDigit); 2593 2594 category = fcNormal; 2595 fs = roundSignificandWithExponent(decSignificand, partCount, 2596 D.exponent, rounding_mode); 2597 2598 delete [] decSignificand; 2599 } 2600 2601 return fs; 2602 } 2603 2604 bool 2605 APFloat::convertFromStringSpecials(StringRef str) { 2606 if (str.equals("inf") || str.equals("INFINITY")) { 2607 makeInf(false); 2608 return true; 2609 } 2610 2611 if (str.equals("-inf") || str.equals("-INFINITY")) { 2612 makeInf(true); 2613 return true; 2614 } 2615 2616 if (str.equals("nan") || str.equals("NaN")) { 2617 makeNaN(false, false); 2618 return true; 2619 } 2620 2621 if (str.equals("-nan") || str.equals("-NaN")) { 2622 makeNaN(false, true); 2623 return true; 2624 } 2625 2626 return false; 2627 } 2628 2629 APFloat::opStatus 2630 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2631 { 2632 assert(!str.empty() && "Invalid string length"); 2633 2634 // Handle special cases. 2635 if (convertFromStringSpecials(str)) 2636 return opOK; 2637 2638 /* Handle a leading minus sign. */ 2639 StringRef::iterator p = str.begin(); 2640 size_t slen = str.size(); 2641 sign = *p == '-' ? 1 : 0; 2642 if (*p == '-' || *p == '+') { 2643 p++; 2644 slen--; 2645 assert(slen && "String has no digits"); 2646 } 2647 2648 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2649 assert(slen - 2 && "Invalid string"); 2650 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2651 rounding_mode); 2652 } 2653 2654 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2655 } 2656 2657 /* Write out a hexadecimal representation of the floating point value 2658 to DST, which must be of sufficient size, in the C99 form 2659 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2660 excluding the terminating NUL. 2661 2662 If UPPERCASE, the output is in upper case, otherwise in lower case. 2663 2664 HEXDIGITS digits appear altogether, rounding the value if 2665 necessary. If HEXDIGITS is 0, the minimal precision to display the 2666 number precisely is used instead. If nothing would appear after 2667 the decimal point it is suppressed. 2668 2669 The decimal exponent is always printed and has at least one digit. 2670 Zero values display an exponent of zero. Infinities and NaNs 2671 appear as "infinity" or "nan" respectively. 2672 2673 The above rules are as specified by C99. There is ambiguity about 2674 what the leading hexadecimal digit should be. This implementation 2675 uses whatever is necessary so that the exponent is displayed as 2676 stored. This implies the exponent will fall within the IEEE format 2677 range, and the leading hexadecimal digit will be 0 (for denormals), 2678 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2679 any other digits zero). 2680 */ 2681 unsigned int 2682 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2683 bool upperCase, roundingMode rounding_mode) const 2684 { 2685 char *p; 2686 2687 p = dst; 2688 if (sign) 2689 *dst++ = '-'; 2690 2691 switch (category) { 2692 case fcInfinity: 2693 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2694 dst += sizeof infinityL - 1; 2695 break; 2696 2697 case fcNaN: 2698 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2699 dst += sizeof NaNU - 1; 2700 break; 2701 2702 case fcZero: 2703 *dst++ = '0'; 2704 *dst++ = upperCase ? 'X': 'x'; 2705 *dst++ = '0'; 2706 if (hexDigits > 1) { 2707 *dst++ = '.'; 2708 memset (dst, '0', hexDigits - 1); 2709 dst += hexDigits - 1; 2710 } 2711 *dst++ = upperCase ? 'P': 'p'; 2712 *dst++ = '0'; 2713 break; 2714 2715 case fcNormal: 2716 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2717 break; 2718 } 2719 2720 *dst = 0; 2721 2722 return static_cast<unsigned int>(dst - p); 2723 } 2724 2725 /* Does the hard work of outputting the correctly rounded hexadecimal 2726 form of a normal floating point number with the specified number of 2727 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2728 digits necessary to print the value precisely is output. */ 2729 char * 2730 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2731 bool upperCase, 2732 roundingMode rounding_mode) const 2733 { 2734 unsigned int count, valueBits, shift, partsCount, outputDigits; 2735 const char *hexDigitChars; 2736 const integerPart *significand; 2737 char *p; 2738 bool roundUp; 2739 2740 *dst++ = '0'; 2741 *dst++ = upperCase ? 'X': 'x'; 2742 2743 roundUp = false; 2744 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2745 2746 significand = significandParts(); 2747 partsCount = partCount(); 2748 2749 /* +3 because the first digit only uses the single integer bit, so 2750 we have 3 virtual zero most-significant-bits. */ 2751 valueBits = semantics->precision + 3; 2752 shift = integerPartWidth - valueBits % integerPartWidth; 2753 2754 /* The natural number of digits required ignoring trailing 2755 insignificant zeroes. */ 2756 outputDigits = (valueBits - significandLSB () + 3) / 4; 2757 2758 /* hexDigits of zero means use the required number for the 2759 precision. Otherwise, see if we are truncating. If we are, 2760 find out if we need to round away from zero. */ 2761 if (hexDigits) { 2762 if (hexDigits < outputDigits) { 2763 /* We are dropping non-zero bits, so need to check how to round. 2764 "bits" is the number of dropped bits. */ 2765 unsigned int bits; 2766 lostFraction fraction; 2767 2768 bits = valueBits - hexDigits * 4; 2769 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2770 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2771 } 2772 outputDigits = hexDigits; 2773 } 2774 2775 /* Write the digits consecutively, and start writing in the location 2776 of the hexadecimal point. We move the most significant digit 2777 left and add the hexadecimal point later. */ 2778 p = ++dst; 2779 2780 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2781 2782 while (outputDigits && count) { 2783 integerPart part; 2784 2785 /* Put the most significant integerPartWidth bits in "part". */ 2786 if (--count == partsCount) 2787 part = 0; /* An imaginary higher zero part. */ 2788 else 2789 part = significand[count] << shift; 2790 2791 if (count && shift) 2792 part |= significand[count - 1] >> (integerPartWidth - shift); 2793 2794 /* Convert as much of "part" to hexdigits as we can. */ 2795 unsigned int curDigits = integerPartWidth / 4; 2796 2797 if (curDigits > outputDigits) 2798 curDigits = outputDigits; 2799 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2800 outputDigits -= curDigits; 2801 } 2802 2803 if (roundUp) { 2804 char *q = dst; 2805 2806 /* Note that hexDigitChars has a trailing '0'. */ 2807 do { 2808 q--; 2809 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2810 } while (*q == '0'); 2811 assert(q >= p); 2812 } else { 2813 /* Add trailing zeroes. */ 2814 memset (dst, '0', outputDigits); 2815 dst += outputDigits; 2816 } 2817 2818 /* Move the most significant digit to before the point, and if there 2819 is something after the decimal point add it. This must come 2820 after rounding above. */ 2821 p[-1] = p[0]; 2822 if (dst -1 == p) 2823 dst--; 2824 else 2825 p[0] = '.'; 2826 2827 /* Finally output the exponent. */ 2828 *dst++ = upperCase ? 'P': 'p'; 2829 2830 return writeSignedDecimal (dst, exponent); 2831 } 2832 2833 hash_code llvm::hash_value(const APFloat &Arg) { 2834 if (!Arg.isFiniteNonZero()) 2835 return hash_combine((uint8_t)Arg.category, 2836 // NaN has no sign, fix it at zero. 2837 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2838 Arg.semantics->precision); 2839 2840 // Normal floats need their exponent and significand hashed. 2841 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2842 Arg.semantics->precision, Arg.exponent, 2843 hash_combine_range( 2844 Arg.significandParts(), 2845 Arg.significandParts() + Arg.partCount())); 2846 } 2847 2848 // Conversion from APFloat to/from host float/double. It may eventually be 2849 // possible to eliminate these and have everybody deal with APFloats, but that 2850 // will take a while. This approach will not easily extend to long double. 2851 // Current implementation requires integerPartWidth==64, which is correct at 2852 // the moment but could be made more general. 2853 2854 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2855 // the actual IEEE respresentations. We compensate for that here. 2856 2857 APInt 2858 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2859 { 2860 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2861 assert(partCount()==2); 2862 2863 uint64_t myexponent, mysignificand; 2864 2865 if (isFiniteNonZero()) { 2866 myexponent = exponent+16383; //bias 2867 mysignificand = significandParts()[0]; 2868 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2869 myexponent = 0; // denormal 2870 } else if (category==fcZero) { 2871 myexponent = 0; 2872 mysignificand = 0; 2873 } else if (category==fcInfinity) { 2874 myexponent = 0x7fff; 2875 mysignificand = 0x8000000000000000ULL; 2876 } else { 2877 assert(category == fcNaN && "Unknown category"); 2878 myexponent = 0x7fff; 2879 mysignificand = significandParts()[0]; 2880 } 2881 2882 uint64_t words[2]; 2883 words[0] = mysignificand; 2884 words[1] = ((uint64_t)(sign & 1) << 15) | 2885 (myexponent & 0x7fffLL); 2886 return APInt(80, words); 2887 } 2888 2889 APInt 2890 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2891 { 2892 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2893 assert(partCount()==2); 2894 2895 uint64_t words[2]; 2896 opStatus fs; 2897 bool losesInfo; 2898 2899 // Convert number to double. To avoid spurious underflows, we re- 2900 // normalize against the "double" minExponent first, and only *then* 2901 // truncate the mantissa. The result of that second conversion 2902 // may be inexact, but should never underflow. 2903 // Declare fltSemantics before APFloat that uses it (and 2904 // saves pointer to it) to ensure correct destruction order. 2905 fltSemantics extendedSemantics = *semantics; 2906 extendedSemantics.minExponent = IEEEdouble.minExponent; 2907 APFloat extended(*this); 2908 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2909 assert(fs == opOK && !losesInfo); 2910 (void)fs; 2911 2912 APFloat u(extended); 2913 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2914 assert(fs == opOK || fs == opInexact); 2915 (void)fs; 2916 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2917 2918 // If conversion was exact or resulted in a special case, we're done; 2919 // just set the second double to zero. Otherwise, re-convert back to 2920 // the extended format and compute the difference. This now should 2921 // convert exactly to double. 2922 if (u.isFiniteNonZero() && losesInfo) { 2923 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2924 assert(fs == opOK && !losesInfo); 2925 (void)fs; 2926 2927 APFloat v(extended); 2928 v.subtract(u, rmNearestTiesToEven); 2929 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2930 assert(fs == opOK && !losesInfo); 2931 (void)fs; 2932 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2933 } else { 2934 words[1] = 0; 2935 } 2936 2937 return APInt(128, words); 2938 } 2939 2940 APInt 2941 APFloat::convertQuadrupleAPFloatToAPInt() const 2942 { 2943 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2944 assert(partCount()==2); 2945 2946 uint64_t myexponent, mysignificand, mysignificand2; 2947 2948 if (isFiniteNonZero()) { 2949 myexponent = exponent+16383; //bias 2950 mysignificand = significandParts()[0]; 2951 mysignificand2 = significandParts()[1]; 2952 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2953 myexponent = 0; // denormal 2954 } else if (category==fcZero) { 2955 myexponent = 0; 2956 mysignificand = mysignificand2 = 0; 2957 } else if (category==fcInfinity) { 2958 myexponent = 0x7fff; 2959 mysignificand = mysignificand2 = 0; 2960 } else { 2961 assert(category == fcNaN && "Unknown category!"); 2962 myexponent = 0x7fff; 2963 mysignificand = significandParts()[0]; 2964 mysignificand2 = significandParts()[1]; 2965 } 2966 2967 uint64_t words[2]; 2968 words[0] = mysignificand; 2969 words[1] = ((uint64_t)(sign & 1) << 63) | 2970 ((myexponent & 0x7fff) << 48) | 2971 (mysignificand2 & 0xffffffffffffLL); 2972 2973 return APInt(128, words); 2974 } 2975 2976 APInt 2977 APFloat::convertDoubleAPFloatToAPInt() const 2978 { 2979 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2980 assert(partCount()==1); 2981 2982 uint64_t myexponent, mysignificand; 2983 2984 if (isFiniteNonZero()) { 2985 myexponent = exponent+1023; //bias 2986 mysignificand = *significandParts(); 2987 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2988 myexponent = 0; // denormal 2989 } else if (category==fcZero) { 2990 myexponent = 0; 2991 mysignificand = 0; 2992 } else if (category==fcInfinity) { 2993 myexponent = 0x7ff; 2994 mysignificand = 0; 2995 } else { 2996 assert(category == fcNaN && "Unknown category!"); 2997 myexponent = 0x7ff; 2998 mysignificand = *significandParts(); 2999 } 3000 3001 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 3002 ((myexponent & 0x7ff) << 52) | 3003 (mysignificand & 0xfffffffffffffLL)))); 3004 } 3005 3006 APInt 3007 APFloat::convertFloatAPFloatToAPInt() const 3008 { 3009 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 3010 assert(partCount()==1); 3011 3012 uint32_t myexponent, mysignificand; 3013 3014 if (isFiniteNonZero()) { 3015 myexponent = exponent+127; //bias 3016 mysignificand = (uint32_t)*significandParts(); 3017 if (myexponent == 1 && !(mysignificand & 0x800000)) 3018 myexponent = 0; // denormal 3019 } else if (category==fcZero) { 3020 myexponent = 0; 3021 mysignificand = 0; 3022 } else if (category==fcInfinity) { 3023 myexponent = 0xff; 3024 mysignificand = 0; 3025 } else { 3026 assert(category == fcNaN && "Unknown category!"); 3027 myexponent = 0xff; 3028 mysignificand = (uint32_t)*significandParts(); 3029 } 3030 3031 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 3032 (mysignificand & 0x7fffff))); 3033 } 3034 3035 APInt 3036 APFloat::convertHalfAPFloatToAPInt() const 3037 { 3038 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 3039 assert(partCount()==1); 3040 3041 uint32_t myexponent, mysignificand; 3042 3043 if (isFiniteNonZero()) { 3044 myexponent = exponent+15; //bias 3045 mysignificand = (uint32_t)*significandParts(); 3046 if (myexponent == 1 && !(mysignificand & 0x400)) 3047 myexponent = 0; // denormal 3048 } else if (category==fcZero) { 3049 myexponent = 0; 3050 mysignificand = 0; 3051 } else if (category==fcInfinity) { 3052 myexponent = 0x1f; 3053 mysignificand = 0; 3054 } else { 3055 assert(category == fcNaN && "Unknown category!"); 3056 myexponent = 0x1f; 3057 mysignificand = (uint32_t)*significandParts(); 3058 } 3059 3060 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3061 (mysignificand & 0x3ff))); 3062 } 3063 3064 // This function creates an APInt that is just a bit map of the floating 3065 // point constant as it would appear in memory. It is not a conversion, 3066 // and treating the result as a normal integer is unlikely to be useful. 3067 3068 APInt 3069 APFloat::bitcastToAPInt() const 3070 { 3071 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3072 return convertHalfAPFloatToAPInt(); 3073 3074 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3075 return convertFloatAPFloatToAPInt(); 3076 3077 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3078 return convertDoubleAPFloatToAPInt(); 3079 3080 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3081 return convertQuadrupleAPFloatToAPInt(); 3082 3083 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3084 return convertPPCDoubleDoubleAPFloatToAPInt(); 3085 3086 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3087 "unknown format!"); 3088 return convertF80LongDoubleAPFloatToAPInt(); 3089 } 3090 3091 float 3092 APFloat::convertToFloat() const 3093 { 3094 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3095 "Float semantics are not IEEEsingle"); 3096 APInt api = bitcastToAPInt(); 3097 return api.bitsToFloat(); 3098 } 3099 3100 double 3101 APFloat::convertToDouble() const 3102 { 3103 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3104 "Float semantics are not IEEEdouble"); 3105 APInt api = bitcastToAPInt(); 3106 return api.bitsToDouble(); 3107 } 3108 3109 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3110 /// does not support these bit patterns: 3111 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3112 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3113 /// exponent = 0, integer bit 1 ("pseudodenormal") 3114 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3115 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3116 void 3117 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3118 { 3119 assert(api.getBitWidth()==80); 3120 uint64_t i1 = api.getRawData()[0]; 3121 uint64_t i2 = api.getRawData()[1]; 3122 uint64_t myexponent = (i2 & 0x7fff); 3123 uint64_t mysignificand = i1; 3124 3125 initialize(&APFloat::x87DoubleExtended); 3126 assert(partCount()==2); 3127 3128 sign = static_cast<unsigned int>(i2>>15); 3129 if (myexponent==0 && mysignificand==0) { 3130 // exponent, significand meaningless 3131 category = fcZero; 3132 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3133 // exponent, significand meaningless 3134 category = fcInfinity; 3135 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3136 // exponent meaningless 3137 category = fcNaN; 3138 significandParts()[0] = mysignificand; 3139 significandParts()[1] = 0; 3140 } else { 3141 category = fcNormal; 3142 exponent = myexponent - 16383; 3143 significandParts()[0] = mysignificand; 3144 significandParts()[1] = 0; 3145 if (myexponent==0) // denormal 3146 exponent = -16382; 3147 } 3148 } 3149 3150 void 3151 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3152 { 3153 assert(api.getBitWidth()==128); 3154 uint64_t i1 = api.getRawData()[0]; 3155 uint64_t i2 = api.getRawData()[1]; 3156 opStatus fs; 3157 bool losesInfo; 3158 3159 // Get the first double and convert to our format. 3160 initFromDoubleAPInt(APInt(64, i1)); 3161 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3162 assert(fs == opOK && !losesInfo); 3163 (void)fs; 3164 3165 // Unless we have a special case, add in second double. 3166 if (isFiniteNonZero()) { 3167 APFloat v(IEEEdouble, APInt(64, i2)); 3168 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3169 assert(fs == opOK && !losesInfo); 3170 (void)fs; 3171 3172 add(v, rmNearestTiesToEven); 3173 } 3174 } 3175 3176 void 3177 APFloat::initFromQuadrupleAPInt(const APInt &api) 3178 { 3179 assert(api.getBitWidth()==128); 3180 uint64_t i1 = api.getRawData()[0]; 3181 uint64_t i2 = api.getRawData()[1]; 3182 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3183 uint64_t mysignificand = i1; 3184 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3185 3186 initialize(&APFloat::IEEEquad); 3187 assert(partCount()==2); 3188 3189 sign = static_cast<unsigned int>(i2>>63); 3190 if (myexponent==0 && 3191 (mysignificand==0 && mysignificand2==0)) { 3192 // exponent, significand meaningless 3193 category = fcZero; 3194 } else if (myexponent==0x7fff && 3195 (mysignificand==0 && mysignificand2==0)) { 3196 // exponent, significand meaningless 3197 category = fcInfinity; 3198 } else if (myexponent==0x7fff && 3199 (mysignificand!=0 || mysignificand2 !=0)) { 3200 // exponent meaningless 3201 category = fcNaN; 3202 significandParts()[0] = mysignificand; 3203 significandParts()[1] = mysignificand2; 3204 } else { 3205 category = fcNormal; 3206 exponent = myexponent - 16383; 3207 significandParts()[0] = mysignificand; 3208 significandParts()[1] = mysignificand2; 3209 if (myexponent==0) // denormal 3210 exponent = -16382; 3211 else 3212 significandParts()[1] |= 0x1000000000000LL; // integer bit 3213 } 3214 } 3215 3216 void 3217 APFloat::initFromDoubleAPInt(const APInt &api) 3218 { 3219 assert(api.getBitWidth()==64); 3220 uint64_t i = *api.getRawData(); 3221 uint64_t myexponent = (i >> 52) & 0x7ff; 3222 uint64_t mysignificand = i & 0xfffffffffffffLL; 3223 3224 initialize(&APFloat::IEEEdouble); 3225 assert(partCount()==1); 3226 3227 sign = static_cast<unsigned int>(i>>63); 3228 if (myexponent==0 && mysignificand==0) { 3229 // exponent, significand meaningless 3230 category = fcZero; 3231 } else if (myexponent==0x7ff && mysignificand==0) { 3232 // exponent, significand meaningless 3233 category = fcInfinity; 3234 } else if (myexponent==0x7ff && mysignificand!=0) { 3235 // exponent meaningless 3236 category = fcNaN; 3237 *significandParts() = mysignificand; 3238 } else { 3239 category = fcNormal; 3240 exponent = myexponent - 1023; 3241 *significandParts() = mysignificand; 3242 if (myexponent==0) // denormal 3243 exponent = -1022; 3244 else 3245 *significandParts() |= 0x10000000000000LL; // integer bit 3246 } 3247 } 3248 3249 void 3250 APFloat::initFromFloatAPInt(const APInt & api) 3251 { 3252 assert(api.getBitWidth()==32); 3253 uint32_t i = (uint32_t)*api.getRawData(); 3254 uint32_t myexponent = (i >> 23) & 0xff; 3255 uint32_t mysignificand = i & 0x7fffff; 3256 3257 initialize(&APFloat::IEEEsingle); 3258 assert(partCount()==1); 3259 3260 sign = i >> 31; 3261 if (myexponent==0 && mysignificand==0) { 3262 // exponent, significand meaningless 3263 category = fcZero; 3264 } else if (myexponent==0xff && mysignificand==0) { 3265 // exponent, significand meaningless 3266 category = fcInfinity; 3267 } else if (myexponent==0xff && mysignificand!=0) { 3268 // sign, exponent, significand meaningless 3269 category = fcNaN; 3270 *significandParts() = mysignificand; 3271 } else { 3272 category = fcNormal; 3273 exponent = myexponent - 127; //bias 3274 *significandParts() = mysignificand; 3275 if (myexponent==0) // denormal 3276 exponent = -126; 3277 else 3278 *significandParts() |= 0x800000; // integer bit 3279 } 3280 } 3281 3282 void 3283 APFloat::initFromHalfAPInt(const APInt & api) 3284 { 3285 assert(api.getBitWidth()==16); 3286 uint32_t i = (uint32_t)*api.getRawData(); 3287 uint32_t myexponent = (i >> 10) & 0x1f; 3288 uint32_t mysignificand = i & 0x3ff; 3289 3290 initialize(&APFloat::IEEEhalf); 3291 assert(partCount()==1); 3292 3293 sign = i >> 15; 3294 if (myexponent==0 && mysignificand==0) { 3295 // exponent, significand meaningless 3296 category = fcZero; 3297 } else if (myexponent==0x1f && mysignificand==0) { 3298 // exponent, significand meaningless 3299 category = fcInfinity; 3300 } else if (myexponent==0x1f && mysignificand!=0) { 3301 // sign, exponent, significand meaningless 3302 category = fcNaN; 3303 *significandParts() = mysignificand; 3304 } else { 3305 category = fcNormal; 3306 exponent = myexponent - 15; //bias 3307 *significandParts() = mysignificand; 3308 if (myexponent==0) // denormal 3309 exponent = -14; 3310 else 3311 *significandParts() |= 0x400; // integer bit 3312 } 3313 } 3314 3315 /// Treat api as containing the bits of a floating point number. Currently 3316 /// we infer the floating point type from the size of the APInt. The 3317 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3318 /// when the size is anything else). 3319 void 3320 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3321 { 3322 if (Sem == &IEEEhalf) 3323 return initFromHalfAPInt(api); 3324 if (Sem == &IEEEsingle) 3325 return initFromFloatAPInt(api); 3326 if (Sem == &IEEEdouble) 3327 return initFromDoubleAPInt(api); 3328 if (Sem == &x87DoubleExtended) 3329 return initFromF80LongDoubleAPInt(api); 3330 if (Sem == &IEEEquad) 3331 return initFromQuadrupleAPInt(api); 3332 if (Sem == &PPCDoubleDouble) 3333 return initFromPPCDoubleDoubleAPInt(api); 3334 3335 llvm_unreachable(nullptr); 3336 } 3337 3338 APFloat 3339 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3340 { 3341 switch (BitWidth) { 3342 case 16: 3343 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3344 case 32: 3345 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3346 case 64: 3347 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3348 case 80: 3349 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3350 case 128: 3351 if (isIEEE) 3352 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3353 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3354 default: 3355 llvm_unreachable("Unknown floating bit width"); 3356 } 3357 } 3358 3359 /// Make this number the largest magnitude normal number in the given 3360 /// semantics. 3361 void APFloat::makeLargest(bool Negative) { 3362 // We want (in interchange format): 3363 // sign = {Negative} 3364 // exponent = 1..10 3365 // significand = 1..1 3366 category = fcNormal; 3367 sign = Negative; 3368 exponent = semantics->maxExponent; 3369 3370 // Use memset to set all but the highest integerPart to all ones. 3371 integerPart *significand = significandParts(); 3372 unsigned PartCount = partCount(); 3373 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3374 3375 // Set the high integerPart especially setting all unused top bits for 3376 // internal consistency. 3377 const unsigned NumUnusedHighBits = 3378 PartCount*integerPartWidth - semantics->precision; 3379 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth) 3380 ? (~integerPart(0) >> NumUnusedHighBits) 3381 : 0; 3382 } 3383 3384 /// Make this number the smallest magnitude denormal number in the given 3385 /// semantics. 3386 void APFloat::makeSmallest(bool Negative) { 3387 // We want (in interchange format): 3388 // sign = {Negative} 3389 // exponent = 0..0 3390 // significand = 0..01 3391 category = fcNormal; 3392 sign = Negative; 3393 exponent = semantics->minExponent; 3394 APInt::tcSet(significandParts(), 1, partCount()); 3395 } 3396 3397 3398 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3399 // We want (in interchange format): 3400 // sign = {Negative} 3401 // exponent = 1..10 3402 // significand = 1..1 3403 APFloat Val(Sem, uninitialized); 3404 Val.makeLargest(Negative); 3405 return Val; 3406 } 3407 3408 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3409 // We want (in interchange format): 3410 // sign = {Negative} 3411 // exponent = 0..0 3412 // significand = 0..01 3413 APFloat Val(Sem, uninitialized); 3414 Val.makeSmallest(Negative); 3415 return Val; 3416 } 3417 3418 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3419 APFloat Val(Sem, uninitialized); 3420 3421 // We want (in interchange format): 3422 // sign = {Negative} 3423 // exponent = 0..0 3424 // significand = 10..0 3425 3426 Val.category = fcNormal; 3427 Val.zeroSignificand(); 3428 Val.sign = Negative; 3429 Val.exponent = Sem.minExponent; 3430 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3431 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3432 3433 return Val; 3434 } 3435 3436 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3437 initFromAPInt(&Sem, API); 3438 } 3439 3440 APFloat::APFloat(float f) { 3441 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3442 } 3443 3444 APFloat::APFloat(double d) { 3445 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3446 } 3447 3448 namespace { 3449 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3450 Buffer.append(Str.begin(), Str.end()); 3451 } 3452 3453 /// Removes data from the given significand until it is no more 3454 /// precise than is required for the desired precision. 3455 void AdjustToPrecision(APInt &significand, 3456 int &exp, unsigned FormatPrecision) { 3457 unsigned bits = significand.getActiveBits(); 3458 3459 // 196/59 is a very slight overestimate of lg_2(10). 3460 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3461 3462 if (bits <= bitsRequired) return; 3463 3464 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3465 if (!tensRemovable) return; 3466 3467 exp += tensRemovable; 3468 3469 APInt divisor(significand.getBitWidth(), 1); 3470 APInt powten(significand.getBitWidth(), 10); 3471 while (true) { 3472 if (tensRemovable & 1) 3473 divisor *= powten; 3474 tensRemovable >>= 1; 3475 if (!tensRemovable) break; 3476 powten *= powten; 3477 } 3478 3479 significand = significand.udiv(divisor); 3480 3481 // Truncate the significand down to its active bit count. 3482 significand = significand.trunc(significand.getActiveBits()); 3483 } 3484 3485 3486 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3487 int &exp, unsigned FormatPrecision) { 3488 unsigned N = buffer.size(); 3489 if (N <= FormatPrecision) return; 3490 3491 // The most significant figures are the last ones in the buffer. 3492 unsigned FirstSignificant = N - FormatPrecision; 3493 3494 // Round. 3495 // FIXME: this probably shouldn't use 'round half up'. 3496 3497 // Rounding down is just a truncation, except we also want to drop 3498 // trailing zeros from the new result. 3499 if (buffer[FirstSignificant - 1] < '5') { 3500 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3501 FirstSignificant++; 3502 3503 exp += FirstSignificant; 3504 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3505 return; 3506 } 3507 3508 // Rounding up requires a decimal add-with-carry. If we continue 3509 // the carry, the newly-introduced zeros will just be truncated. 3510 for (unsigned I = FirstSignificant; I != N; ++I) { 3511 if (buffer[I] == '9') { 3512 FirstSignificant++; 3513 } else { 3514 buffer[I]++; 3515 break; 3516 } 3517 } 3518 3519 // If we carried through, we have exactly one digit of precision. 3520 if (FirstSignificant == N) { 3521 exp += FirstSignificant; 3522 buffer.clear(); 3523 buffer.push_back('1'); 3524 return; 3525 } 3526 3527 exp += FirstSignificant; 3528 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3529 } 3530 } 3531 3532 void APFloat::toString(SmallVectorImpl<char> &Str, 3533 unsigned FormatPrecision, 3534 unsigned FormatMaxPadding) const { 3535 switch (category) { 3536 case fcInfinity: 3537 if (isNegative()) 3538 return append(Str, "-Inf"); 3539 else 3540 return append(Str, "+Inf"); 3541 3542 case fcNaN: return append(Str, "NaN"); 3543 3544 case fcZero: 3545 if (isNegative()) 3546 Str.push_back('-'); 3547 3548 if (!FormatMaxPadding) 3549 append(Str, "0.0E+0"); 3550 else 3551 Str.push_back('0'); 3552 return; 3553 3554 case fcNormal: 3555 break; 3556 } 3557 3558 if (isNegative()) 3559 Str.push_back('-'); 3560 3561 // Decompose the number into an APInt and an exponent. 3562 int exp = exponent - ((int) semantics->precision - 1); 3563 APInt significand(semantics->precision, 3564 makeArrayRef(significandParts(), 3565 partCountForBits(semantics->precision))); 3566 3567 // Set FormatPrecision if zero. We want to do this before we 3568 // truncate trailing zeros, as those are part of the precision. 3569 if (!FormatPrecision) { 3570 // We use enough digits so the number can be round-tripped back to an 3571 // APFloat. The formula comes from "How to Print Floating-Point Numbers 3572 // Accurately" by Steele and White. 3573 // FIXME: Using a formula based purely on the precision is conservative; 3574 // we can print fewer digits depending on the actual value being printed. 3575 3576 // FormatPrecision = 2 + floor(significandBits / lg_2(10)) 3577 FormatPrecision = 2 + semantics->precision * 59 / 196; 3578 } 3579 3580 // Ignore trailing binary zeros. 3581 int trailingZeros = significand.countTrailingZeros(); 3582 exp += trailingZeros; 3583 significand = significand.lshr(trailingZeros); 3584 3585 // Change the exponent from 2^e to 10^e. 3586 if (exp == 0) { 3587 // Nothing to do. 3588 } else if (exp > 0) { 3589 // Just shift left. 3590 significand = significand.zext(semantics->precision + exp); 3591 significand <<= exp; 3592 exp = 0; 3593 } else { /* exp < 0 */ 3594 int texp = -exp; 3595 3596 // We transform this using the identity: 3597 // (N)(2^-e) == (N)(5^e)(10^-e) 3598 // This means we have to multiply N (the significand) by 5^e. 3599 // To avoid overflow, we have to operate on numbers large 3600 // enough to store N * 5^e: 3601 // log2(N * 5^e) == log2(N) + e * log2(5) 3602 // <= semantics->precision + e * 137 / 59 3603 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3604 3605 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3606 3607 // Multiply significand by 5^e. 3608 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3609 significand = significand.zext(precision); 3610 APInt five_to_the_i(precision, 5); 3611 while (true) { 3612 if (texp & 1) significand *= five_to_the_i; 3613 3614 texp >>= 1; 3615 if (!texp) break; 3616 five_to_the_i *= five_to_the_i; 3617 } 3618 } 3619 3620 AdjustToPrecision(significand, exp, FormatPrecision); 3621 3622 SmallVector<char, 256> buffer; 3623 3624 // Fill the buffer. 3625 unsigned precision = significand.getBitWidth(); 3626 APInt ten(precision, 10); 3627 APInt digit(precision, 0); 3628 3629 bool inTrail = true; 3630 while (significand != 0) { 3631 // digit <- significand % 10 3632 // significand <- significand / 10 3633 APInt::udivrem(significand, ten, significand, digit); 3634 3635 unsigned d = digit.getZExtValue(); 3636 3637 // Drop trailing zeros. 3638 if (inTrail && !d) exp++; 3639 else { 3640 buffer.push_back((char) ('0' + d)); 3641 inTrail = false; 3642 } 3643 } 3644 3645 assert(!buffer.empty() && "no characters in buffer!"); 3646 3647 // Drop down to FormatPrecision. 3648 // TODO: don't do more precise calculations above than are required. 3649 AdjustToPrecision(buffer, exp, FormatPrecision); 3650 3651 unsigned NDigits = buffer.size(); 3652 3653 // Check whether we should use scientific notation. 3654 bool FormatScientific; 3655 if (!FormatMaxPadding) 3656 FormatScientific = true; 3657 else { 3658 if (exp >= 0) { 3659 // 765e3 --> 765000 3660 // ^^^ 3661 // But we shouldn't make the number look more precise than it is. 3662 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3663 NDigits + (unsigned) exp > FormatPrecision); 3664 } else { 3665 // Power of the most significant digit. 3666 int MSD = exp + (int) (NDigits - 1); 3667 if (MSD >= 0) { 3668 // 765e-2 == 7.65 3669 FormatScientific = false; 3670 } else { 3671 // 765e-5 == 0.00765 3672 // ^ ^^ 3673 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3674 } 3675 } 3676 } 3677 3678 // Scientific formatting is pretty straightforward. 3679 if (FormatScientific) { 3680 exp += (NDigits - 1); 3681 3682 Str.push_back(buffer[NDigits-1]); 3683 Str.push_back('.'); 3684 if (NDigits == 1) 3685 Str.push_back('0'); 3686 else 3687 for (unsigned I = 1; I != NDigits; ++I) 3688 Str.push_back(buffer[NDigits-1-I]); 3689 Str.push_back('E'); 3690 3691 Str.push_back(exp >= 0 ? '+' : '-'); 3692 if (exp < 0) exp = -exp; 3693 SmallVector<char, 6> expbuf; 3694 do { 3695 expbuf.push_back((char) ('0' + (exp % 10))); 3696 exp /= 10; 3697 } while (exp); 3698 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3699 Str.push_back(expbuf[E-1-I]); 3700 return; 3701 } 3702 3703 // Non-scientific, positive exponents. 3704 if (exp >= 0) { 3705 for (unsigned I = 0; I != NDigits; ++I) 3706 Str.push_back(buffer[NDigits-1-I]); 3707 for (unsigned I = 0; I != (unsigned) exp; ++I) 3708 Str.push_back('0'); 3709 return; 3710 } 3711 3712 // Non-scientific, negative exponents. 3713 3714 // The number of digits to the left of the decimal point. 3715 int NWholeDigits = exp + (int) NDigits; 3716 3717 unsigned I = 0; 3718 if (NWholeDigits > 0) { 3719 for (; I != (unsigned) NWholeDigits; ++I) 3720 Str.push_back(buffer[NDigits-I-1]); 3721 Str.push_back('.'); 3722 } else { 3723 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3724 3725 Str.push_back('0'); 3726 Str.push_back('.'); 3727 for (unsigned Z = 1; Z != NZeros; ++Z) 3728 Str.push_back('0'); 3729 } 3730 3731 for (; I != NDigits; ++I) 3732 Str.push_back(buffer[NDigits-I-1]); 3733 } 3734 3735 bool APFloat::getExactInverse(APFloat *inv) const { 3736 // Special floats and denormals have no exact inverse. 3737 if (!isFiniteNonZero()) 3738 return false; 3739 3740 // Check that the number is a power of two by making sure that only the 3741 // integer bit is set in the significand. 3742 if (significandLSB() != semantics->precision - 1) 3743 return false; 3744 3745 // Get the inverse. 3746 APFloat reciprocal(*semantics, 1ULL); 3747 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3748 return false; 3749 3750 // Avoid multiplication with a denormal, it is not safe on all platforms and 3751 // may be slower than a normal division. 3752 if (reciprocal.isDenormal()) 3753 return false; 3754 3755 assert(reciprocal.isFiniteNonZero() && 3756 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3757 3758 if (inv) 3759 *inv = reciprocal; 3760 3761 return true; 3762 } 3763 3764 bool APFloat::isSignaling() const { 3765 if (!isNaN()) 3766 return false; 3767 3768 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3769 // first bit of the trailing significand being 0. 3770 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3771 } 3772 3773 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3774 /// 3775 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3776 /// appropriate sign switching before/after the computation. 3777 APFloat::opStatus APFloat::next(bool nextDown) { 3778 // If we are performing nextDown, swap sign so we have -x. 3779 if (nextDown) 3780 changeSign(); 3781 3782 // Compute nextUp(x) 3783 opStatus result = opOK; 3784 3785 // Handle each float category separately. 3786 switch (category) { 3787 case fcInfinity: 3788 // nextUp(+inf) = +inf 3789 if (!isNegative()) 3790 break; 3791 // nextUp(-inf) = -getLargest() 3792 makeLargest(true); 3793 break; 3794 case fcNaN: 3795 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3796 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3797 // change the payload. 3798 if (isSignaling()) { 3799 result = opInvalidOp; 3800 // For consistency, propagate the sign of the sNaN to the qNaN. 3801 makeNaN(false, isNegative(), nullptr); 3802 } 3803 break; 3804 case fcZero: 3805 // nextUp(pm 0) = +getSmallest() 3806 makeSmallest(false); 3807 break; 3808 case fcNormal: 3809 // nextUp(-getSmallest()) = -0 3810 if (isSmallest() && isNegative()) { 3811 APInt::tcSet(significandParts(), 0, partCount()); 3812 category = fcZero; 3813 exponent = 0; 3814 break; 3815 } 3816 3817 // nextUp(getLargest()) == INFINITY 3818 if (isLargest() && !isNegative()) { 3819 APInt::tcSet(significandParts(), 0, partCount()); 3820 category = fcInfinity; 3821 exponent = semantics->maxExponent + 1; 3822 break; 3823 } 3824 3825 // nextUp(normal) == normal + inc. 3826 if (isNegative()) { 3827 // If we are negative, we need to decrement the significand. 3828 3829 // We only cross a binade boundary that requires adjusting the exponent 3830 // if: 3831 // 1. exponent != semantics->minExponent. This implies we are not in the 3832 // smallest binade or are dealing with denormals. 3833 // 2. Our significand excluding the integral bit is all zeros. 3834 bool WillCrossBinadeBoundary = 3835 exponent != semantics->minExponent && isSignificandAllZeros(); 3836 3837 // Decrement the significand. 3838 // 3839 // We always do this since: 3840 // 1. If we are dealing with a non-binade decrement, by definition we 3841 // just decrement the significand. 3842 // 2. If we are dealing with a normal -> normal binade decrement, since 3843 // we have an explicit integral bit the fact that all bits but the 3844 // integral bit are zero implies that subtracting one will yield a 3845 // significand with 0 integral bit and 1 in all other spots. Thus we 3846 // must just adjust the exponent and set the integral bit to 1. 3847 // 3. If we are dealing with a normal -> denormal binade decrement, 3848 // since we set the integral bit to 0 when we represent denormals, we 3849 // just decrement the significand. 3850 integerPart *Parts = significandParts(); 3851 APInt::tcDecrement(Parts, partCount()); 3852 3853 if (WillCrossBinadeBoundary) { 3854 // Our result is a normal number. Do the following: 3855 // 1. Set the integral bit to 1. 3856 // 2. Decrement the exponent. 3857 APInt::tcSetBit(Parts, semantics->precision - 1); 3858 exponent--; 3859 } 3860 } else { 3861 // If we are positive, we need to increment the significand. 3862 3863 // We only cross a binade boundary that requires adjusting the exponent if 3864 // the input is not a denormal and all of said input's significand bits 3865 // are set. If all of said conditions are true: clear the significand, set 3866 // the integral bit to 1, and increment the exponent. If we have a 3867 // denormal always increment since moving denormals and the numbers in the 3868 // smallest normal binade have the same exponent in our representation. 3869 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3870 3871 if (WillCrossBinadeBoundary) { 3872 integerPart *Parts = significandParts(); 3873 APInt::tcSet(Parts, 0, partCount()); 3874 APInt::tcSetBit(Parts, semantics->precision - 1); 3875 assert(exponent != semantics->maxExponent && 3876 "We can not increment an exponent beyond the maxExponent allowed" 3877 " by the given floating point semantics."); 3878 exponent++; 3879 } else { 3880 incrementSignificand(); 3881 } 3882 } 3883 break; 3884 } 3885 3886 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3887 if (nextDown) 3888 changeSign(); 3889 3890 return result; 3891 } 3892 3893 void 3894 APFloat::makeInf(bool Negative) { 3895 category = fcInfinity; 3896 sign = Negative; 3897 exponent = semantics->maxExponent + 1; 3898 APInt::tcSet(significandParts(), 0, partCount()); 3899 } 3900 3901 void 3902 APFloat::makeZero(bool Negative) { 3903 category = fcZero; 3904 sign = Negative; 3905 exponent = semantics->minExponent-1; 3906 APInt::tcSet(significandParts(), 0, partCount()); 3907 } 3908 3909 APFloat llvm::scalbn(APFloat X, int Exp) { 3910 if (X.isInfinity() || X.isZero() || X.isNaN()) 3911 return std::move(X); 3912 3913 auto MaxExp = X.getSemantics().maxExponent; 3914 auto MinExp = X.getSemantics().minExponent; 3915 if (Exp > (MaxExp - X.exponent)) 3916 // Overflow saturates to infinity. 3917 return APFloat::getInf(X.getSemantics(), X.isNegative()); 3918 if (Exp < (MinExp - X.exponent)) 3919 // Underflow saturates to zero. 3920 return APFloat::getZero(X.getSemantics(), X.isNegative()); 3921 3922 X.exponent += Exp; 3923 return std::move(X); 3924 } 3925