1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 29 30 /* Assumed in hexadecimal significand parsing, and conversion to 31 hexadecimal strings. */ 32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] 33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 34 35 namespace llvm { 36 37 /* Represents floating point arithmetic semantics. */ 38 struct fltSemantics { 39 /* The largest E such that 2^E is representable; this matches the 40 definition of IEEE 754. */ 41 exponent_t maxExponent; 42 43 /* The smallest E such that 2^E is a normalized number; this 44 matches the definition of IEEE 754. */ 45 exponent_t minExponent; 46 47 /* Number of bits in the significand. This includes the integer 48 bit. */ 49 unsigned int precision; 50 }; 51 52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; 53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; 54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; 55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; 56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; 57 const fltSemantics APFloat::Bogus = { 0, 0, 0 }; 58 59 /* The PowerPC format consists of two doubles. It does not map cleanly 60 onto the usual format above. It is approximated using twice the 61 mantissa bits. Note that for exponents near the double minimum, 62 we no longer can represent the full 106 mantissa bits, so those 63 will be treated as denormal numbers. 64 65 FIXME: While this approximation is equivalent to what GCC uses for 66 compile-time arithmetic on PPC double-double numbers, it is not able 67 to represent all possible values held by a PPC double-double number, 68 for example: (long double) 1.0 + (long double) 0x1p-106 69 Should this be replaced by a full emulation of PPC double-double? */ 70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; 71 72 /* A tight upper bound on number of parts required to hold the value 73 pow(5, power) is 74 75 power * 815 / (351 * integerPartWidth) + 1 76 77 However, whilst the result may require only this many parts, 78 because we are multiplying two values to get it, the 79 multiplication may require an extra part with the excess part 80 being zero (consider the trivial case of 1 * 1, tcFullMultiply 81 requires two parts to hold the single-part result). So we add an 82 extra one to guarantee enough space whilst multiplying. */ 83 const unsigned int maxExponent = 16383; 84 const unsigned int maxPrecision = 113; 85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 87 / (351 * integerPartWidth)); 88 } 89 90 /* A bunch of private, handy routines. */ 91 92 static inline unsigned int 93 partCountForBits(unsigned int bits) 94 { 95 return ((bits) + integerPartWidth - 1) / integerPartWidth; 96 } 97 98 /* Returns 0U-9U. Return values >= 10U are not digits. */ 99 static inline unsigned int 100 decDigitValue(unsigned int c) 101 { 102 return c - '0'; 103 } 104 105 /* Return the value of a decimal exponent of the form 106 [+-]ddddddd. 107 108 If the exponent overflows, returns a large exponent with the 109 appropriate sign. */ 110 static int 111 readExponent(StringRef::iterator begin, StringRef::iterator end) 112 { 113 bool isNegative; 114 unsigned int absExponent; 115 const unsigned int overlargeExponent = 24000; /* FIXME. */ 116 StringRef::iterator p = begin; 117 118 assert(p != end && "Exponent has no digits"); 119 120 isNegative = (*p == '-'); 121 if (*p == '-' || *p == '+') { 122 p++; 123 assert(p != end && "Exponent has no digits"); 124 } 125 126 absExponent = decDigitValue(*p++); 127 assert(absExponent < 10U && "Invalid character in exponent"); 128 129 for (; p != end; ++p) { 130 unsigned int value; 131 132 value = decDigitValue(*p); 133 assert(value < 10U && "Invalid character in exponent"); 134 135 value += absExponent * 10; 136 if (absExponent >= overlargeExponent) { 137 absExponent = overlargeExponent; 138 p = end; /* outwit assert below */ 139 break; 140 } 141 absExponent = value; 142 } 143 144 assert(p == end && "Invalid exponent in exponent"); 145 146 if (isNegative) 147 return -(int) absExponent; 148 else 149 return (int) absExponent; 150 } 151 152 /* This is ugly and needs cleaning up, but I don't immediately see 153 how whilst remaining safe. */ 154 static int 155 totalExponent(StringRef::iterator p, StringRef::iterator end, 156 int exponentAdjustment) 157 { 158 int unsignedExponent; 159 bool negative, overflow; 160 int exponent = 0; 161 162 assert(p != end && "Exponent has no digits"); 163 164 negative = *p == '-'; 165 if (*p == '-' || *p == '+') { 166 p++; 167 assert(p != end && "Exponent has no digits"); 168 } 169 170 unsignedExponent = 0; 171 overflow = false; 172 for (; p != end; ++p) { 173 unsigned int value; 174 175 value = decDigitValue(*p); 176 assert(value < 10U && "Invalid character in exponent"); 177 178 unsignedExponent = unsignedExponent * 10 + value; 179 if (unsignedExponent > 32767) { 180 overflow = true; 181 break; 182 } 183 } 184 185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 186 overflow = true; 187 188 if (!overflow) { 189 exponent = unsignedExponent; 190 if (negative) 191 exponent = -exponent; 192 exponent += exponentAdjustment; 193 if (exponent > 32767 || exponent < -32768) 194 overflow = true; 195 } 196 197 if (overflow) 198 exponent = negative ? -32768: 32767; 199 200 return exponent; 201 } 202 203 static StringRef::iterator 204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 205 StringRef::iterator *dot) 206 { 207 StringRef::iterator p = begin; 208 *dot = end; 209 while (*p == '0' && p != end) 210 p++; 211 212 if (*p == '.') { 213 *dot = p++; 214 215 assert(end - begin != 1 && "Significand has no digits"); 216 217 while (*p == '0' && p != end) 218 p++; 219 } 220 221 return p; 222 } 223 224 /* Given a normal decimal floating point number of the form 225 226 dddd.dddd[eE][+-]ddd 227 228 where the decimal point and exponent are optional, fill out the 229 structure D. Exponent is appropriate if the significand is 230 treated as an integer, and normalizedExponent if the significand 231 is taken to have the decimal point after a single leading 232 non-zero digit. 233 234 If the value is zero, V->firstSigDigit points to a non-digit, and 235 the return exponent is zero. 236 */ 237 struct decimalInfo { 238 const char *firstSigDigit; 239 const char *lastSigDigit; 240 int exponent; 241 int normalizedExponent; 242 }; 243 244 static void 245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 246 decimalInfo *D) 247 { 248 StringRef::iterator dot = end; 249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 250 251 D->firstSigDigit = p; 252 D->exponent = 0; 253 D->normalizedExponent = 0; 254 255 for (; p != end; ++p) { 256 if (*p == '.') { 257 assert(dot == end && "String contains multiple dots"); 258 dot = p++; 259 if (p == end) 260 break; 261 } 262 if (decDigitValue(*p) >= 10U) 263 break; 264 } 265 266 if (p != end) { 267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 268 assert(p != begin && "Significand has no digits"); 269 assert((dot == end || p - begin != 1) && "Significand has no digits"); 270 271 /* p points to the first non-digit in the string */ 272 D->exponent = readExponent(p + 1, end); 273 274 /* Implied decimal point? */ 275 if (dot == end) 276 dot = p; 277 } 278 279 /* If number is all zeroes accept any exponent. */ 280 if (p != D->firstSigDigit) { 281 /* Drop insignificant trailing zeroes. */ 282 if (p != begin) { 283 do 284 do 285 p--; 286 while (p != begin && *p == '0'); 287 while (p != begin && *p == '.'); 288 } 289 290 /* Adjust the exponents for any decimal point. */ 291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p)); 292 D->normalizedExponent = (D->exponent + 293 static_cast<exponent_t>((p - D->firstSigDigit) 294 - (dot > D->firstSigDigit && dot < p))); 295 } 296 297 D->lastSigDigit = p; 298 } 299 300 /* Return the trailing fraction of a hexadecimal number. 301 DIGITVALUE is the first hex digit of the fraction, P points to 302 the next digit. */ 303 static lostFraction 304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 305 unsigned int digitValue) 306 { 307 unsigned int hexDigit; 308 309 /* If the first trailing digit isn't 0 or 8 we can work out the 310 fraction immediately. */ 311 if (digitValue > 8) 312 return lfMoreThanHalf; 313 else if (digitValue < 8 && digitValue > 0) 314 return lfLessThanHalf; 315 316 /* Otherwise we need to find the first non-zero digit. */ 317 while (*p == '0') 318 p++; 319 320 assert(p != end && "Invalid trailing hexadecimal fraction!"); 321 322 hexDigit = hexDigitValue(*p); 323 324 /* If we ran off the end it is exactly zero or one-half, otherwise 325 a little more. */ 326 if (hexDigit == -1U) 327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 328 else 329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 330 } 331 332 /* Return the fraction lost were a bignum truncated losing the least 333 significant BITS bits. */ 334 static lostFraction 335 lostFractionThroughTruncation(const integerPart *parts, 336 unsigned int partCount, 337 unsigned int bits) 338 { 339 unsigned int lsb; 340 341 lsb = APInt::tcLSB(parts, partCount); 342 343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 344 if (bits <= lsb) 345 return lfExactlyZero; 346 if (bits == lsb + 1) 347 return lfExactlyHalf; 348 if (bits <= partCount * integerPartWidth && 349 APInt::tcExtractBit(parts, bits - 1)) 350 return lfMoreThanHalf; 351 352 return lfLessThanHalf; 353 } 354 355 /* Shift DST right BITS bits noting lost fraction. */ 356 static lostFraction 357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 358 { 359 lostFraction lost_fraction; 360 361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 362 363 APInt::tcShiftRight(dst, parts, bits); 364 365 return lost_fraction; 366 } 367 368 /* Combine the effect of two lost fractions. */ 369 static lostFraction 370 combineLostFractions(lostFraction moreSignificant, 371 lostFraction lessSignificant) 372 { 373 if (lessSignificant != lfExactlyZero) { 374 if (moreSignificant == lfExactlyZero) 375 moreSignificant = lfLessThanHalf; 376 else if (moreSignificant == lfExactlyHalf) 377 moreSignificant = lfMoreThanHalf; 378 } 379 380 return moreSignificant; 381 } 382 383 /* The error from the true value, in half-ulps, on multiplying two 384 floating point numbers, which differ from the value they 385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 386 than the returned value. 387 388 See "How to Read Floating Point Numbers Accurately" by William D 389 Clinger. */ 390 static unsigned int 391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 392 { 393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 394 395 if (HUerr1 + HUerr2 == 0) 396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 397 else 398 return inexactMultiply + 2 * (HUerr1 + HUerr2); 399 } 400 401 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 402 when the least significant BITS are truncated. BITS cannot be 403 zero. */ 404 static integerPart 405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 406 { 407 unsigned int count, partBits; 408 integerPart part, boundary; 409 410 assert(bits != 0); 411 412 bits--; 413 count = bits / integerPartWidth; 414 partBits = bits % integerPartWidth + 1; 415 416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 417 418 if (isNearest) 419 boundary = (integerPart) 1 << (partBits - 1); 420 else 421 boundary = 0; 422 423 if (count == 0) { 424 if (part - boundary <= boundary - part) 425 return part - boundary; 426 else 427 return boundary - part; 428 } 429 430 if (part == boundary) { 431 while (--count) 432 if (parts[count]) 433 return ~(integerPart) 0; /* A lot. */ 434 435 return parts[0]; 436 } else if (part == boundary - 1) { 437 while (--count) 438 if (~parts[count]) 439 return ~(integerPart) 0; /* A lot. */ 440 441 return -parts[0]; 442 } 443 444 return ~(integerPart) 0; /* A lot. */ 445 } 446 447 /* Place pow(5, power) in DST, and return the number of parts used. 448 DST must be at least one part larger than size of the answer. */ 449 static unsigned int 450 powerOf5(integerPart *dst, unsigned int power) 451 { 452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 453 15625, 78125 }; 454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 455 pow5s[0] = 78125 * 5; 456 457 unsigned int partsCount[16] = { 1 }; 458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 459 unsigned int result; 460 assert(power <= maxExponent); 461 462 p1 = dst; 463 p2 = scratch; 464 465 *p1 = firstEightPowers[power & 7]; 466 power >>= 3; 467 468 result = 1; 469 pow5 = pow5s; 470 471 for (unsigned int n = 0; power; power >>= 1, n++) { 472 unsigned int pc; 473 474 pc = partsCount[n]; 475 476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 477 if (pc == 0) { 478 pc = partsCount[n - 1]; 479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 480 pc *= 2; 481 if (pow5[pc - 1] == 0) 482 pc--; 483 partsCount[n] = pc; 484 } 485 486 if (power & 1) { 487 integerPart *tmp; 488 489 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 490 result += pc; 491 if (p2[result - 1] == 0) 492 result--; 493 494 /* Now result is in p1 with partsCount parts and p2 is scratch 495 space. */ 496 tmp = p1, p1 = p2, p2 = tmp; 497 } 498 499 pow5 += pc; 500 } 501 502 if (p1 != dst) 503 APInt::tcAssign(dst, p1, result); 504 505 return result; 506 } 507 508 /* Zero at the end to avoid modular arithmetic when adding one; used 509 when rounding up during hexadecimal output. */ 510 static const char hexDigitsLower[] = "0123456789abcdef0"; 511 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 512 static const char infinityL[] = "infinity"; 513 static const char infinityU[] = "INFINITY"; 514 static const char NaNL[] = "nan"; 515 static const char NaNU[] = "NAN"; 516 517 /* Write out an integerPart in hexadecimal, starting with the most 518 significant nibble. Write out exactly COUNT hexdigits, return 519 COUNT. */ 520 static unsigned int 521 partAsHex (char *dst, integerPart part, unsigned int count, 522 const char *hexDigitChars) 523 { 524 unsigned int result = count; 525 526 assert(count != 0 && count <= integerPartWidth / 4); 527 528 part >>= (integerPartWidth - 4 * count); 529 while (count--) { 530 dst[count] = hexDigitChars[part & 0xf]; 531 part >>= 4; 532 } 533 534 return result; 535 } 536 537 /* Write out an unsigned decimal integer. */ 538 static char * 539 writeUnsignedDecimal (char *dst, unsigned int n) 540 { 541 char buff[40], *p; 542 543 p = buff; 544 do 545 *p++ = '0' + n % 10; 546 while (n /= 10); 547 548 do 549 *dst++ = *--p; 550 while (p != buff); 551 552 return dst; 553 } 554 555 /* Write out a signed decimal integer. */ 556 static char * 557 writeSignedDecimal (char *dst, int value) 558 { 559 if (value < 0) { 560 *dst++ = '-'; 561 dst = writeUnsignedDecimal(dst, -(unsigned) value); 562 } else 563 dst = writeUnsignedDecimal(dst, value); 564 565 return dst; 566 } 567 568 /* Constructors. */ 569 void 570 APFloat::initialize(const fltSemantics *ourSemantics) 571 { 572 unsigned int count; 573 574 semantics = ourSemantics; 575 count = partCount(); 576 if (count > 1) 577 significand.parts = new integerPart[count]; 578 } 579 580 void 581 APFloat::freeSignificand() 582 { 583 if (needsCleanup()) 584 delete [] significand.parts; 585 } 586 587 void 588 APFloat::assign(const APFloat &rhs) 589 { 590 assert(semantics == rhs.semantics); 591 592 sign = rhs.sign; 593 category = rhs.category; 594 exponent = rhs.exponent; 595 if (category == fcNormal || category == fcNaN) 596 copySignificand(rhs); 597 } 598 599 void 600 APFloat::copySignificand(const APFloat &rhs) 601 { 602 assert(category == fcNormal || category == fcNaN); 603 assert(rhs.partCount() >= partCount()); 604 605 APInt::tcAssign(significandParts(), rhs.significandParts(), 606 partCount()); 607 } 608 609 /* Make this number a NaN, with an arbitrary but deterministic value 610 for the significand. If double or longer, this is a signalling NaN, 611 which may not be ideal. If float, this is QNaN(0). */ 612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 613 { 614 category = fcNaN; 615 sign = Negative; 616 617 integerPart *significand = significandParts(); 618 unsigned numParts = partCount(); 619 620 // Set the significand bits to the fill. 621 if (!fill || fill->getNumWords() < numParts) 622 APInt::tcSet(significand, 0, numParts); 623 if (fill) { 624 APInt::tcAssign(significand, fill->getRawData(), 625 std::min(fill->getNumWords(), numParts)); 626 627 // Zero out the excess bits of the significand. 628 unsigned bitsToPreserve = semantics->precision - 1; 629 unsigned part = bitsToPreserve / 64; 630 bitsToPreserve %= 64; 631 significand[part] &= ((1ULL << bitsToPreserve) - 1); 632 for (part++; part != numParts; ++part) 633 significand[part] = 0; 634 } 635 636 unsigned QNaNBit = semantics->precision - 2; 637 638 if (SNaN) { 639 // We always have to clear the QNaN bit to make it an SNaN. 640 APInt::tcClearBit(significand, QNaNBit); 641 642 // If there are no bits set in the payload, we have to set 643 // *something* to make it a NaN instead of an infinity; 644 // conventionally, this is the next bit down from the QNaN bit. 645 if (APInt::tcIsZero(significand, numParts)) 646 APInt::tcSetBit(significand, QNaNBit - 1); 647 } else { 648 // We always have to set the QNaN bit to make it a QNaN. 649 APInt::tcSetBit(significand, QNaNBit); 650 } 651 652 // For x87 extended precision, we want to make a NaN, not a 653 // pseudo-NaN. Maybe we should expose the ability to make 654 // pseudo-NaNs? 655 if (semantics == &APFloat::x87DoubleExtended) 656 APInt::tcSetBit(significand, QNaNBit + 1); 657 } 658 659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 660 const APInt *fill) { 661 APFloat value(Sem, uninitialized); 662 value.makeNaN(SNaN, Negative, fill); 663 return value; 664 } 665 666 APFloat & 667 APFloat::operator=(const APFloat &rhs) 668 { 669 if (this != &rhs) { 670 if (semantics != rhs.semantics) { 671 freeSignificand(); 672 initialize(rhs.semantics); 673 } 674 assign(rhs); 675 } 676 677 return *this; 678 } 679 680 bool 681 APFloat::isDenormal() const { 682 return isNormal() && (exponent == semantics->minExponent) && 683 (APInt::tcExtractBit(significandParts(), 684 semantics->precision - 1) == 0); 685 } 686 687 bool 688 APFloat::isSmallest() const { 689 // The smallest number by magnitude in our format will be the smallest 690 // denormal, i.e. the floating point normal with exponent being minimum 691 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). 692 return isNormal() && exponent == semantics->minExponent && 693 significandMSB() == 0; 694 } 695 696 bool APFloat::isSignificandAllOnes() const { 697 // Test if the significand excluding the integral bit is all ones. This allows 698 // us to test for binade boundaries. 699 const integerPart *Parts = significandParts(); 700 const unsigned PartCount = partCount(); 701 for (unsigned i = 0; i < PartCount - 1; i++) 702 if (~Parts[i]) 703 return false; 704 705 // Set the unused high bits to all ones when we compare. 706 const unsigned NumHighBits = 707 PartCount*integerPartWidth - semantics->precision + 1; 708 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 709 "fill than integerPartWidth"); 710 const integerPart HighBitFill = 711 ~integerPart(0) << (integerPartWidth - NumHighBits); 712 if (~(Parts[PartCount - 1] | HighBitFill)) 713 return false; 714 715 return true; 716 } 717 718 bool APFloat::isSignificandAllZeros() const { 719 // Test if the significand excluding the integral bit is all zeros. This 720 // allows us to test for binade boundaries. 721 const integerPart *Parts = significandParts(); 722 const unsigned PartCount = partCount(); 723 724 for (unsigned i = 0; i < PartCount - 1; i++) 725 if (Parts[i]) 726 return false; 727 728 const unsigned NumHighBits = 729 PartCount*integerPartWidth - semantics->precision + 1; 730 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " 731 "clear than integerPartWidth"); 732 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; 733 734 if (Parts[PartCount - 1] & HighBitMask) 735 return false; 736 737 return true; 738 } 739 740 bool 741 APFloat::isLargest() const { 742 // The largest number by magnitude in our format will be the floating point 743 // number with maximum exponent and with significand that is all ones. 744 return isNormal() && exponent == semantics->maxExponent 745 && isSignificandAllOnes(); 746 } 747 748 bool 749 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 750 if (this == &rhs) 751 return true; 752 if (semantics != rhs.semantics || 753 category != rhs.category || 754 sign != rhs.sign) 755 return false; 756 if (category==fcZero || category==fcInfinity) 757 return true; 758 else if (category==fcNormal && exponent!=rhs.exponent) 759 return false; 760 else { 761 int i= partCount(); 762 const integerPart* p=significandParts(); 763 const integerPart* q=rhs.significandParts(); 764 for (; i>0; i--, p++, q++) { 765 if (*p != *q) 766 return false; 767 } 768 return true; 769 } 770 } 771 772 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 773 initialize(&ourSemantics); 774 sign = 0; 775 zeroSignificand(); 776 exponent = ourSemantics.precision - 1; 777 significandParts()[0] = value; 778 normalize(rmNearestTiesToEven, lfExactlyZero); 779 } 780 781 APFloat::APFloat(const fltSemantics &ourSemantics) { 782 initialize(&ourSemantics); 783 category = fcZero; 784 sign = false; 785 } 786 787 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 788 // Allocates storage if necessary but does not initialize it. 789 initialize(&ourSemantics); 790 } 791 792 APFloat::APFloat(const fltSemantics &ourSemantics, 793 fltCategory ourCategory, bool negative) { 794 initialize(&ourSemantics); 795 category = ourCategory; 796 sign = negative; 797 if (category == fcNormal) 798 category = fcZero; 799 else if (ourCategory == fcNaN) 800 makeNaN(); 801 } 802 803 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 804 initialize(&ourSemantics); 805 convertFromString(text, rmNearestTiesToEven); 806 } 807 808 APFloat::APFloat(const APFloat &rhs) { 809 initialize(rhs.semantics); 810 assign(rhs); 811 } 812 813 APFloat::~APFloat() 814 { 815 freeSignificand(); 816 } 817 818 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 819 void APFloat::Profile(FoldingSetNodeID& ID) const { 820 ID.Add(bitcastToAPInt()); 821 } 822 823 unsigned int 824 APFloat::partCount() const 825 { 826 return partCountForBits(semantics->precision + 1); 827 } 828 829 unsigned int 830 APFloat::semanticsPrecision(const fltSemantics &semantics) 831 { 832 return semantics.precision; 833 } 834 835 const integerPart * 836 APFloat::significandParts() const 837 { 838 return const_cast<APFloat *>(this)->significandParts(); 839 } 840 841 integerPart * 842 APFloat::significandParts() 843 { 844 assert(category == fcNormal || category == fcNaN); 845 846 if (partCount() > 1) 847 return significand.parts; 848 else 849 return &significand.part; 850 } 851 852 void 853 APFloat::zeroSignificand() 854 { 855 category = fcNormal; 856 APInt::tcSet(significandParts(), 0, partCount()); 857 } 858 859 /* Increment an fcNormal floating point number's significand. */ 860 void 861 APFloat::incrementSignificand() 862 { 863 integerPart carry; 864 865 carry = APInt::tcIncrement(significandParts(), partCount()); 866 867 /* Our callers should never cause us to overflow. */ 868 assert(carry == 0); 869 (void)carry; 870 } 871 872 /* Add the significand of the RHS. Returns the carry flag. */ 873 integerPart 874 APFloat::addSignificand(const APFloat &rhs) 875 { 876 integerPart *parts; 877 878 parts = significandParts(); 879 880 assert(semantics == rhs.semantics); 881 assert(exponent == rhs.exponent); 882 883 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 884 } 885 886 /* Subtract the significand of the RHS with a borrow flag. Returns 887 the borrow flag. */ 888 integerPart 889 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 890 { 891 integerPart *parts; 892 893 parts = significandParts(); 894 895 assert(semantics == rhs.semantics); 896 assert(exponent == rhs.exponent); 897 898 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 899 partCount()); 900 } 901 902 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 903 on to the full-precision result of the multiplication. Returns the 904 lost fraction. */ 905 lostFraction 906 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 907 { 908 unsigned int omsb; // One, not zero, based MSB. 909 unsigned int partsCount, newPartsCount, precision; 910 integerPart *lhsSignificand; 911 integerPart scratch[4]; 912 integerPart *fullSignificand; 913 lostFraction lost_fraction; 914 bool ignored; 915 916 assert(semantics == rhs.semantics); 917 918 precision = semantics->precision; 919 newPartsCount = partCountForBits(precision * 2); 920 921 if (newPartsCount > 4) 922 fullSignificand = new integerPart[newPartsCount]; 923 else 924 fullSignificand = scratch; 925 926 lhsSignificand = significandParts(); 927 partsCount = partCount(); 928 929 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 930 rhs.significandParts(), partsCount, partsCount); 931 932 lost_fraction = lfExactlyZero; 933 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 934 exponent += rhs.exponent; 935 936 // Assume the operands involved in the multiplication are single-precision 937 // FP, and the two multiplicants are: 938 // *this = a23 . a22 ... a0 * 2^e1 939 // rhs = b23 . b22 ... b0 * 2^e2 940 // the result of multiplication is: 941 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) 942 // Note that there are two significant bits at the left-hand side of the 943 // radix point. Move the radix point toward left by one bit, and adjust 944 // exponent accordingly. 945 exponent += 1; 946 947 if (addend) { 948 // The intermediate result of the multiplication has "2 * precision" 949 // signicant bit; adjust the addend to be consistent with mul result. 950 // 951 Significand savedSignificand = significand; 952 const fltSemantics *savedSemantics = semantics; 953 fltSemantics extendedSemantics; 954 opStatus status; 955 unsigned int extendedPrecision; 956 957 /* Normalize our MSB. */ 958 extendedPrecision = 2 * precision; 959 if (omsb != extendedPrecision) { 960 assert(extendedPrecision > omsb); 961 APInt::tcShiftLeft(fullSignificand, newPartsCount, 962 extendedPrecision - omsb); 963 exponent -= extendedPrecision - omsb; 964 } 965 966 /* Create new semantics. */ 967 extendedSemantics = *semantics; 968 extendedSemantics.precision = extendedPrecision; 969 970 if (newPartsCount == 1) 971 significand.part = fullSignificand[0]; 972 else 973 significand.parts = fullSignificand; 974 semantics = &extendedSemantics; 975 976 APFloat extendedAddend(*addend); 977 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 978 assert(status == opOK); 979 (void)status; 980 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 981 982 /* Restore our state. */ 983 if (newPartsCount == 1) 984 fullSignificand[0] = significand.part; 985 significand = savedSignificand; 986 semantics = savedSemantics; 987 988 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 989 } 990 991 // Convert the result having "2 * precision" significant-bits back to the one 992 // having "precision" significant-bits. First, move the radix point from 993 // poision "2*precision - 1" to "precision - 1". The exponent need to be 994 // adjusted by "2*precision - 1" - "precision - 1" = "precision". 995 exponent -= precision; 996 997 // In case MSB resides at the left-hand side of radix point, shift the 998 // mantissa right by some amount to make sure the MSB reside right before 999 // the radix point (i.e. "MSB . rest-significant-bits"). 1000 // 1001 // Note that the result is not normalized when "omsb < precision". So, the 1002 // caller needs to call APFloat::normalize() if normalized value is expected. 1003 if (omsb > precision) { 1004 unsigned int bits, significantParts; 1005 lostFraction lf; 1006 1007 bits = omsb - precision; 1008 significantParts = partCountForBits(omsb); 1009 lf = shiftRight(fullSignificand, significantParts, bits); 1010 lost_fraction = combineLostFractions(lf, lost_fraction); 1011 exponent += bits; 1012 } 1013 1014 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 1015 1016 if (newPartsCount > 4) 1017 delete [] fullSignificand; 1018 1019 return lost_fraction; 1020 } 1021 1022 /* Multiply the significands of LHS and RHS to DST. */ 1023 lostFraction 1024 APFloat::divideSignificand(const APFloat &rhs) 1025 { 1026 unsigned int bit, i, partsCount; 1027 const integerPart *rhsSignificand; 1028 integerPart *lhsSignificand, *dividend, *divisor; 1029 integerPart scratch[4]; 1030 lostFraction lost_fraction; 1031 1032 assert(semantics == rhs.semantics); 1033 1034 lhsSignificand = significandParts(); 1035 rhsSignificand = rhs.significandParts(); 1036 partsCount = partCount(); 1037 1038 if (partsCount > 2) 1039 dividend = new integerPart[partsCount * 2]; 1040 else 1041 dividend = scratch; 1042 1043 divisor = dividend + partsCount; 1044 1045 /* Copy the dividend and divisor as they will be modified in-place. */ 1046 for (i = 0; i < partsCount; i++) { 1047 dividend[i] = lhsSignificand[i]; 1048 divisor[i] = rhsSignificand[i]; 1049 lhsSignificand[i] = 0; 1050 } 1051 1052 exponent -= rhs.exponent; 1053 1054 unsigned int precision = semantics->precision; 1055 1056 /* Normalize the divisor. */ 1057 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 1058 if (bit) { 1059 exponent += bit; 1060 APInt::tcShiftLeft(divisor, partsCount, bit); 1061 } 1062 1063 /* Normalize the dividend. */ 1064 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 1065 if (bit) { 1066 exponent -= bit; 1067 APInt::tcShiftLeft(dividend, partsCount, bit); 1068 } 1069 1070 /* Ensure the dividend >= divisor initially for the loop below. 1071 Incidentally, this means that the division loop below is 1072 guaranteed to set the integer bit to one. */ 1073 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 1074 exponent--; 1075 APInt::tcShiftLeft(dividend, partsCount, 1); 1076 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 1077 } 1078 1079 /* Long division. */ 1080 for (bit = precision; bit; bit -= 1) { 1081 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 1082 APInt::tcSubtract(dividend, divisor, 0, partsCount); 1083 APInt::tcSetBit(lhsSignificand, bit - 1); 1084 } 1085 1086 APInt::tcShiftLeft(dividend, partsCount, 1); 1087 } 1088 1089 /* Figure out the lost fraction. */ 1090 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1091 1092 if (cmp > 0) 1093 lost_fraction = lfMoreThanHalf; 1094 else if (cmp == 0) 1095 lost_fraction = lfExactlyHalf; 1096 else if (APInt::tcIsZero(dividend, partsCount)) 1097 lost_fraction = lfExactlyZero; 1098 else 1099 lost_fraction = lfLessThanHalf; 1100 1101 if (partsCount > 2) 1102 delete [] dividend; 1103 1104 return lost_fraction; 1105 } 1106 1107 unsigned int 1108 APFloat::significandMSB() const 1109 { 1110 return APInt::tcMSB(significandParts(), partCount()); 1111 } 1112 1113 unsigned int 1114 APFloat::significandLSB() const 1115 { 1116 return APInt::tcLSB(significandParts(), partCount()); 1117 } 1118 1119 /* Note that a zero result is NOT normalized to fcZero. */ 1120 lostFraction 1121 APFloat::shiftSignificandRight(unsigned int bits) 1122 { 1123 /* Our exponent should not overflow. */ 1124 assert((exponent_t) (exponent + bits) >= exponent); 1125 1126 exponent += bits; 1127 1128 return shiftRight(significandParts(), partCount(), bits); 1129 } 1130 1131 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1132 void 1133 APFloat::shiftSignificandLeft(unsigned int bits) 1134 { 1135 assert(bits < semantics->precision); 1136 1137 if (bits) { 1138 unsigned int partsCount = partCount(); 1139 1140 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1141 exponent -= bits; 1142 1143 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1144 } 1145 } 1146 1147 APFloat::cmpResult 1148 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1149 { 1150 int compare; 1151 1152 assert(semantics == rhs.semantics); 1153 assert(category == fcNormal); 1154 assert(rhs.category == fcNormal); 1155 1156 compare = exponent - rhs.exponent; 1157 1158 /* If exponents are equal, do an unsigned bignum comparison of the 1159 significands. */ 1160 if (compare == 0) 1161 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1162 partCount()); 1163 1164 if (compare > 0) 1165 return cmpGreaterThan; 1166 else if (compare < 0) 1167 return cmpLessThan; 1168 else 1169 return cmpEqual; 1170 } 1171 1172 /* Handle overflow. Sign is preserved. We either become infinity or 1173 the largest finite number. */ 1174 APFloat::opStatus 1175 APFloat::handleOverflow(roundingMode rounding_mode) 1176 { 1177 /* Infinity? */ 1178 if (rounding_mode == rmNearestTiesToEven || 1179 rounding_mode == rmNearestTiesToAway || 1180 (rounding_mode == rmTowardPositive && !sign) || 1181 (rounding_mode == rmTowardNegative && sign)) { 1182 category = fcInfinity; 1183 return (opStatus) (opOverflow | opInexact); 1184 } 1185 1186 /* Otherwise we become the largest finite number. */ 1187 category = fcNormal; 1188 exponent = semantics->maxExponent; 1189 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1190 semantics->precision); 1191 1192 return opInexact; 1193 } 1194 1195 /* Returns TRUE if, when truncating the current number, with BIT the 1196 new LSB, with the given lost fraction and rounding mode, the result 1197 would need to be rounded away from zero (i.e., by increasing the 1198 signficand). This routine must work for fcZero of both signs, and 1199 fcNormal numbers. */ 1200 bool 1201 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1202 lostFraction lost_fraction, 1203 unsigned int bit) const 1204 { 1205 /* NaNs and infinities should not have lost fractions. */ 1206 assert(category == fcNormal || category == fcZero); 1207 1208 /* Current callers never pass this so we don't handle it. */ 1209 assert(lost_fraction != lfExactlyZero); 1210 1211 switch (rounding_mode) { 1212 case rmNearestTiesToAway: 1213 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1214 1215 case rmNearestTiesToEven: 1216 if (lost_fraction == lfMoreThanHalf) 1217 return true; 1218 1219 /* Our zeroes don't have a significand to test. */ 1220 if (lost_fraction == lfExactlyHalf && category != fcZero) 1221 return APInt::tcExtractBit(significandParts(), bit); 1222 1223 return false; 1224 1225 case rmTowardZero: 1226 return false; 1227 1228 case rmTowardPositive: 1229 return sign == false; 1230 1231 case rmTowardNegative: 1232 return sign == true; 1233 } 1234 llvm_unreachable("Invalid rounding mode found"); 1235 } 1236 1237 APFloat::opStatus 1238 APFloat::normalize(roundingMode rounding_mode, 1239 lostFraction lost_fraction) 1240 { 1241 unsigned int omsb; /* One, not zero, based MSB. */ 1242 int exponentChange; 1243 1244 if (category != fcNormal) 1245 return opOK; 1246 1247 /* Before rounding normalize the exponent of fcNormal numbers. */ 1248 omsb = significandMSB() + 1; 1249 1250 if (omsb) { 1251 /* OMSB is numbered from 1. We want to place it in the integer 1252 bit numbered PRECISION if possible, with a compensating change in 1253 the exponent. */ 1254 exponentChange = omsb - semantics->precision; 1255 1256 /* If the resulting exponent is too high, overflow according to 1257 the rounding mode. */ 1258 if (exponent + exponentChange > semantics->maxExponent) 1259 return handleOverflow(rounding_mode); 1260 1261 /* Subnormal numbers have exponent minExponent, and their MSB 1262 is forced based on that. */ 1263 if (exponent + exponentChange < semantics->minExponent) 1264 exponentChange = semantics->minExponent - exponent; 1265 1266 /* Shifting left is easy as we don't lose precision. */ 1267 if (exponentChange < 0) { 1268 assert(lost_fraction == lfExactlyZero); 1269 1270 shiftSignificandLeft(-exponentChange); 1271 1272 return opOK; 1273 } 1274 1275 if (exponentChange > 0) { 1276 lostFraction lf; 1277 1278 /* Shift right and capture any new lost fraction. */ 1279 lf = shiftSignificandRight(exponentChange); 1280 1281 lost_fraction = combineLostFractions(lf, lost_fraction); 1282 1283 /* Keep OMSB up-to-date. */ 1284 if (omsb > (unsigned) exponentChange) 1285 omsb -= exponentChange; 1286 else 1287 omsb = 0; 1288 } 1289 } 1290 1291 /* Now round the number according to rounding_mode given the lost 1292 fraction. */ 1293 1294 /* As specified in IEEE 754, since we do not trap we do not report 1295 underflow for exact results. */ 1296 if (lost_fraction == lfExactlyZero) { 1297 /* Canonicalize zeroes. */ 1298 if (omsb == 0) 1299 category = fcZero; 1300 1301 return opOK; 1302 } 1303 1304 /* Increment the significand if we're rounding away from zero. */ 1305 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1306 if (omsb == 0) 1307 exponent = semantics->minExponent; 1308 1309 incrementSignificand(); 1310 omsb = significandMSB() + 1; 1311 1312 /* Did the significand increment overflow? */ 1313 if (omsb == (unsigned) semantics->precision + 1) { 1314 /* Renormalize by incrementing the exponent and shifting our 1315 significand right one. However if we already have the 1316 maximum exponent we overflow to infinity. */ 1317 if (exponent == semantics->maxExponent) { 1318 category = fcInfinity; 1319 1320 return (opStatus) (opOverflow | opInexact); 1321 } 1322 1323 shiftSignificandRight(1); 1324 1325 return opInexact; 1326 } 1327 } 1328 1329 /* The normal case - we were and are not denormal, and any 1330 significand increment above didn't overflow. */ 1331 if (omsb == semantics->precision) 1332 return opInexact; 1333 1334 /* We have a non-zero denormal. */ 1335 assert(omsb < semantics->precision); 1336 1337 /* Canonicalize zeroes. */ 1338 if (omsb == 0) 1339 category = fcZero; 1340 1341 /* The fcZero case is a denormal that underflowed to zero. */ 1342 return (opStatus) (opUnderflow | opInexact); 1343 } 1344 1345 APFloat::opStatus 1346 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1347 { 1348 switch (convolve(category, rhs.category)) { 1349 default: 1350 llvm_unreachable(0); 1351 1352 case convolve(fcNaN, fcZero): 1353 case convolve(fcNaN, fcNormal): 1354 case convolve(fcNaN, fcInfinity): 1355 case convolve(fcNaN, fcNaN): 1356 case convolve(fcNormal, fcZero): 1357 case convolve(fcInfinity, fcNormal): 1358 case convolve(fcInfinity, fcZero): 1359 return opOK; 1360 1361 case convolve(fcZero, fcNaN): 1362 case convolve(fcNormal, fcNaN): 1363 case convolve(fcInfinity, fcNaN): 1364 category = fcNaN; 1365 copySignificand(rhs); 1366 return opOK; 1367 1368 case convolve(fcNormal, fcInfinity): 1369 case convolve(fcZero, fcInfinity): 1370 category = fcInfinity; 1371 sign = rhs.sign ^ subtract; 1372 return opOK; 1373 1374 case convolve(fcZero, fcNormal): 1375 assign(rhs); 1376 sign = rhs.sign ^ subtract; 1377 return opOK; 1378 1379 case convolve(fcZero, fcZero): 1380 /* Sign depends on rounding mode; handled by caller. */ 1381 return opOK; 1382 1383 case convolve(fcInfinity, fcInfinity): 1384 /* Differently signed infinities can only be validly 1385 subtracted. */ 1386 if (((sign ^ rhs.sign)!=0) != subtract) { 1387 makeNaN(); 1388 return opInvalidOp; 1389 } 1390 1391 return opOK; 1392 1393 case convolve(fcNormal, fcNormal): 1394 return opDivByZero; 1395 } 1396 } 1397 1398 /* Add or subtract two normal numbers. */ 1399 lostFraction 1400 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1401 { 1402 integerPart carry; 1403 lostFraction lost_fraction; 1404 int bits; 1405 1406 /* Determine if the operation on the absolute values is effectively 1407 an addition or subtraction. */ 1408 subtract ^= (sign ^ rhs.sign) ? true : false; 1409 1410 /* Are we bigger exponent-wise than the RHS? */ 1411 bits = exponent - rhs.exponent; 1412 1413 /* Subtraction is more subtle than one might naively expect. */ 1414 if (subtract) { 1415 APFloat temp_rhs(rhs); 1416 bool reverse; 1417 1418 if (bits == 0) { 1419 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1420 lost_fraction = lfExactlyZero; 1421 } else if (bits > 0) { 1422 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1423 shiftSignificandLeft(1); 1424 reverse = false; 1425 } else { 1426 lost_fraction = shiftSignificandRight(-bits - 1); 1427 temp_rhs.shiftSignificandLeft(1); 1428 reverse = true; 1429 } 1430 1431 if (reverse) { 1432 carry = temp_rhs.subtractSignificand 1433 (*this, lost_fraction != lfExactlyZero); 1434 copySignificand(temp_rhs); 1435 sign = !sign; 1436 } else { 1437 carry = subtractSignificand 1438 (temp_rhs, lost_fraction != lfExactlyZero); 1439 } 1440 1441 /* Invert the lost fraction - it was on the RHS and 1442 subtracted. */ 1443 if (lost_fraction == lfLessThanHalf) 1444 lost_fraction = lfMoreThanHalf; 1445 else if (lost_fraction == lfMoreThanHalf) 1446 lost_fraction = lfLessThanHalf; 1447 1448 /* The code above is intended to ensure that no borrow is 1449 necessary. */ 1450 assert(!carry); 1451 (void)carry; 1452 } else { 1453 if (bits > 0) { 1454 APFloat temp_rhs(rhs); 1455 1456 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1457 carry = addSignificand(temp_rhs); 1458 } else { 1459 lost_fraction = shiftSignificandRight(-bits); 1460 carry = addSignificand(rhs); 1461 } 1462 1463 /* We have a guard bit; generating a carry cannot happen. */ 1464 assert(!carry); 1465 (void)carry; 1466 } 1467 1468 return lost_fraction; 1469 } 1470 1471 APFloat::opStatus 1472 APFloat::multiplySpecials(const APFloat &rhs) 1473 { 1474 switch (convolve(category, rhs.category)) { 1475 default: 1476 llvm_unreachable(0); 1477 1478 case convolve(fcNaN, fcZero): 1479 case convolve(fcNaN, fcNormal): 1480 case convolve(fcNaN, fcInfinity): 1481 case convolve(fcNaN, fcNaN): 1482 return opOK; 1483 1484 case convolve(fcZero, fcNaN): 1485 case convolve(fcNormal, fcNaN): 1486 case convolve(fcInfinity, fcNaN): 1487 category = fcNaN; 1488 copySignificand(rhs); 1489 return opOK; 1490 1491 case convolve(fcNormal, fcInfinity): 1492 case convolve(fcInfinity, fcNormal): 1493 case convolve(fcInfinity, fcInfinity): 1494 category = fcInfinity; 1495 return opOK; 1496 1497 case convolve(fcZero, fcNormal): 1498 case convolve(fcNormal, fcZero): 1499 case convolve(fcZero, fcZero): 1500 category = fcZero; 1501 return opOK; 1502 1503 case convolve(fcZero, fcInfinity): 1504 case convolve(fcInfinity, fcZero): 1505 makeNaN(); 1506 return opInvalidOp; 1507 1508 case convolve(fcNormal, fcNormal): 1509 return opOK; 1510 } 1511 } 1512 1513 APFloat::opStatus 1514 APFloat::divideSpecials(const APFloat &rhs) 1515 { 1516 switch (convolve(category, rhs.category)) { 1517 default: 1518 llvm_unreachable(0); 1519 1520 case convolve(fcNaN, fcZero): 1521 case convolve(fcNaN, fcNormal): 1522 case convolve(fcNaN, fcInfinity): 1523 case convolve(fcNaN, fcNaN): 1524 case convolve(fcInfinity, fcZero): 1525 case convolve(fcInfinity, fcNormal): 1526 case convolve(fcZero, fcInfinity): 1527 case convolve(fcZero, fcNormal): 1528 return opOK; 1529 1530 case convolve(fcZero, fcNaN): 1531 case convolve(fcNormal, fcNaN): 1532 case convolve(fcInfinity, fcNaN): 1533 category = fcNaN; 1534 copySignificand(rhs); 1535 return opOK; 1536 1537 case convolve(fcNormal, fcInfinity): 1538 category = fcZero; 1539 return opOK; 1540 1541 case convolve(fcNormal, fcZero): 1542 category = fcInfinity; 1543 return opDivByZero; 1544 1545 case convolve(fcInfinity, fcInfinity): 1546 case convolve(fcZero, fcZero): 1547 makeNaN(); 1548 return opInvalidOp; 1549 1550 case convolve(fcNormal, fcNormal): 1551 return opOK; 1552 } 1553 } 1554 1555 APFloat::opStatus 1556 APFloat::modSpecials(const APFloat &rhs) 1557 { 1558 switch (convolve(category, rhs.category)) { 1559 default: 1560 llvm_unreachable(0); 1561 1562 case convolve(fcNaN, fcZero): 1563 case convolve(fcNaN, fcNormal): 1564 case convolve(fcNaN, fcInfinity): 1565 case convolve(fcNaN, fcNaN): 1566 case convolve(fcZero, fcInfinity): 1567 case convolve(fcZero, fcNormal): 1568 case convolve(fcNormal, fcInfinity): 1569 return opOK; 1570 1571 case convolve(fcZero, fcNaN): 1572 case convolve(fcNormal, fcNaN): 1573 case convolve(fcInfinity, fcNaN): 1574 category = fcNaN; 1575 copySignificand(rhs); 1576 return opOK; 1577 1578 case convolve(fcNormal, fcZero): 1579 case convolve(fcInfinity, fcZero): 1580 case convolve(fcInfinity, fcNormal): 1581 case convolve(fcInfinity, fcInfinity): 1582 case convolve(fcZero, fcZero): 1583 makeNaN(); 1584 return opInvalidOp; 1585 1586 case convolve(fcNormal, fcNormal): 1587 return opOK; 1588 } 1589 } 1590 1591 /* Change sign. */ 1592 void 1593 APFloat::changeSign() 1594 { 1595 /* Look mummy, this one's easy. */ 1596 sign = !sign; 1597 } 1598 1599 void 1600 APFloat::clearSign() 1601 { 1602 /* So is this one. */ 1603 sign = 0; 1604 } 1605 1606 void 1607 APFloat::copySign(const APFloat &rhs) 1608 { 1609 /* And this one. */ 1610 sign = rhs.sign; 1611 } 1612 1613 /* Normalized addition or subtraction. */ 1614 APFloat::opStatus 1615 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1616 bool subtract) 1617 { 1618 opStatus fs; 1619 1620 fs = addOrSubtractSpecials(rhs, subtract); 1621 1622 /* This return code means it was not a simple case. */ 1623 if (fs == opDivByZero) { 1624 lostFraction lost_fraction; 1625 1626 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1627 fs = normalize(rounding_mode, lost_fraction); 1628 1629 /* Can only be zero if we lost no fraction. */ 1630 assert(category != fcZero || lost_fraction == lfExactlyZero); 1631 } 1632 1633 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1634 positive zero unless rounding to minus infinity, except that 1635 adding two like-signed zeroes gives that zero. */ 1636 if (category == fcZero) { 1637 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1638 sign = (rounding_mode == rmTowardNegative); 1639 } 1640 1641 return fs; 1642 } 1643 1644 /* Normalized addition. */ 1645 APFloat::opStatus 1646 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1647 { 1648 return addOrSubtract(rhs, rounding_mode, false); 1649 } 1650 1651 /* Normalized subtraction. */ 1652 APFloat::opStatus 1653 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1654 { 1655 return addOrSubtract(rhs, rounding_mode, true); 1656 } 1657 1658 /* Normalized multiply. */ 1659 APFloat::opStatus 1660 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1661 { 1662 opStatus fs; 1663 1664 sign ^= rhs.sign; 1665 fs = multiplySpecials(rhs); 1666 1667 if (category == fcNormal) { 1668 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1669 fs = normalize(rounding_mode, lost_fraction); 1670 if (lost_fraction != lfExactlyZero) 1671 fs = (opStatus) (fs | opInexact); 1672 } 1673 1674 return fs; 1675 } 1676 1677 /* Normalized divide. */ 1678 APFloat::opStatus 1679 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1680 { 1681 opStatus fs; 1682 1683 sign ^= rhs.sign; 1684 fs = divideSpecials(rhs); 1685 1686 if (category == fcNormal) { 1687 lostFraction lost_fraction = divideSignificand(rhs); 1688 fs = normalize(rounding_mode, lost_fraction); 1689 if (lost_fraction != lfExactlyZero) 1690 fs = (opStatus) (fs | opInexact); 1691 } 1692 1693 return fs; 1694 } 1695 1696 /* Normalized remainder. This is not currently correct in all cases. */ 1697 APFloat::opStatus 1698 APFloat::remainder(const APFloat &rhs) 1699 { 1700 opStatus fs; 1701 APFloat V = *this; 1702 unsigned int origSign = sign; 1703 1704 fs = V.divide(rhs, rmNearestTiesToEven); 1705 if (fs == opDivByZero) 1706 return fs; 1707 1708 int parts = partCount(); 1709 integerPart *x = new integerPart[parts]; 1710 bool ignored; 1711 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1712 rmNearestTiesToEven, &ignored); 1713 if (fs==opInvalidOp) 1714 return fs; 1715 1716 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1717 rmNearestTiesToEven); 1718 assert(fs==opOK); // should always work 1719 1720 fs = V.multiply(rhs, rmNearestTiesToEven); 1721 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1722 1723 fs = subtract(V, rmNearestTiesToEven); 1724 assert(fs==opOK || fs==opInexact); // likewise 1725 1726 if (isZero()) 1727 sign = origSign; // IEEE754 requires this 1728 delete[] x; 1729 return fs; 1730 } 1731 1732 /* Normalized llvm frem (C fmod). 1733 This is not currently correct in all cases. */ 1734 APFloat::opStatus 1735 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1736 { 1737 opStatus fs; 1738 fs = modSpecials(rhs); 1739 1740 if (category == fcNormal && rhs.category == fcNormal) { 1741 APFloat V = *this; 1742 unsigned int origSign = sign; 1743 1744 fs = V.divide(rhs, rmNearestTiesToEven); 1745 if (fs == opDivByZero) 1746 return fs; 1747 1748 int parts = partCount(); 1749 integerPart *x = new integerPart[parts]; 1750 bool ignored; 1751 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1752 rmTowardZero, &ignored); 1753 if (fs==opInvalidOp) 1754 return fs; 1755 1756 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1757 rmNearestTiesToEven); 1758 assert(fs==opOK); // should always work 1759 1760 fs = V.multiply(rhs, rounding_mode); 1761 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1762 1763 fs = subtract(V, rounding_mode); 1764 assert(fs==opOK || fs==opInexact); // likewise 1765 1766 if (isZero()) 1767 sign = origSign; // IEEE754 requires this 1768 delete[] x; 1769 } 1770 return fs; 1771 } 1772 1773 /* Normalized fused-multiply-add. */ 1774 APFloat::opStatus 1775 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1776 const APFloat &addend, 1777 roundingMode rounding_mode) 1778 { 1779 opStatus fs; 1780 1781 /* Post-multiplication sign, before addition. */ 1782 sign ^= multiplicand.sign; 1783 1784 /* If and only if all arguments are normal do we need to do an 1785 extended-precision calculation. */ 1786 if (category == fcNormal && 1787 multiplicand.category == fcNormal && 1788 addend.category == fcNormal) { 1789 lostFraction lost_fraction; 1790 1791 lost_fraction = multiplySignificand(multiplicand, &addend); 1792 fs = normalize(rounding_mode, lost_fraction); 1793 if (lost_fraction != lfExactlyZero) 1794 fs = (opStatus) (fs | opInexact); 1795 1796 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1797 positive zero unless rounding to minus infinity, except that 1798 adding two like-signed zeroes gives that zero. */ 1799 if (category == fcZero && sign != addend.sign) 1800 sign = (rounding_mode == rmTowardNegative); 1801 } else { 1802 fs = multiplySpecials(multiplicand); 1803 1804 /* FS can only be opOK or opInvalidOp. There is no more work 1805 to do in the latter case. The IEEE-754R standard says it is 1806 implementation-defined in this case whether, if ADDEND is a 1807 quiet NaN, we raise invalid op; this implementation does so. 1808 1809 If we need to do the addition we can do so with normal 1810 precision. */ 1811 if (fs == opOK) 1812 fs = addOrSubtract(addend, rounding_mode, false); 1813 } 1814 1815 return fs; 1816 } 1817 1818 /* Rounding-mode corrrect round to integral value. */ 1819 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1820 opStatus fs; 1821 1822 // If the exponent is large enough, we know that this value is already 1823 // integral, and the arithmetic below would potentially cause it to saturate 1824 // to +/-Inf. Bail out early instead. 1825 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics)) 1826 return opOK; 1827 1828 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1829 // precision of our format, and then subtract it back off again. The choice 1830 // of rounding modes for the addition/subtraction determines the rounding mode 1831 // for our integral rounding as well. 1832 // NOTE: When the input value is negative, we do subtraction followed by 1833 // addition instead. 1834 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1835 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1836 APFloat MagicConstant(*semantics); 1837 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1838 rmNearestTiesToEven); 1839 MagicConstant.copySign(*this); 1840 1841 if (fs != opOK) 1842 return fs; 1843 1844 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1845 bool inputSign = isNegative(); 1846 1847 fs = add(MagicConstant, rounding_mode); 1848 if (fs != opOK && fs != opInexact) 1849 return fs; 1850 1851 fs = subtract(MagicConstant, rounding_mode); 1852 1853 // Restore the input sign. 1854 if (inputSign != isNegative()) 1855 changeSign(); 1856 1857 return fs; 1858 } 1859 1860 1861 /* Comparison requires normalized numbers. */ 1862 APFloat::cmpResult 1863 APFloat::compare(const APFloat &rhs) const 1864 { 1865 cmpResult result; 1866 1867 assert(semantics == rhs.semantics); 1868 1869 switch (convolve(category, rhs.category)) { 1870 default: 1871 llvm_unreachable(0); 1872 1873 case convolve(fcNaN, fcZero): 1874 case convolve(fcNaN, fcNormal): 1875 case convolve(fcNaN, fcInfinity): 1876 case convolve(fcNaN, fcNaN): 1877 case convolve(fcZero, fcNaN): 1878 case convolve(fcNormal, fcNaN): 1879 case convolve(fcInfinity, fcNaN): 1880 return cmpUnordered; 1881 1882 case convolve(fcInfinity, fcNormal): 1883 case convolve(fcInfinity, fcZero): 1884 case convolve(fcNormal, fcZero): 1885 if (sign) 1886 return cmpLessThan; 1887 else 1888 return cmpGreaterThan; 1889 1890 case convolve(fcNormal, fcInfinity): 1891 case convolve(fcZero, fcInfinity): 1892 case convolve(fcZero, fcNormal): 1893 if (rhs.sign) 1894 return cmpGreaterThan; 1895 else 1896 return cmpLessThan; 1897 1898 case convolve(fcInfinity, fcInfinity): 1899 if (sign == rhs.sign) 1900 return cmpEqual; 1901 else if (sign) 1902 return cmpLessThan; 1903 else 1904 return cmpGreaterThan; 1905 1906 case convolve(fcZero, fcZero): 1907 return cmpEqual; 1908 1909 case convolve(fcNormal, fcNormal): 1910 break; 1911 } 1912 1913 /* Two normal numbers. Do they have the same sign? */ 1914 if (sign != rhs.sign) { 1915 if (sign) 1916 result = cmpLessThan; 1917 else 1918 result = cmpGreaterThan; 1919 } else { 1920 /* Compare absolute values; invert result if negative. */ 1921 result = compareAbsoluteValue(rhs); 1922 1923 if (sign) { 1924 if (result == cmpLessThan) 1925 result = cmpGreaterThan; 1926 else if (result == cmpGreaterThan) 1927 result = cmpLessThan; 1928 } 1929 } 1930 1931 return result; 1932 } 1933 1934 /// APFloat::convert - convert a value of one floating point type to another. 1935 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1936 /// records whether the transformation lost information, i.e. whether 1937 /// converting the result back to the original type will produce the 1938 /// original value (this is almost the same as return value==fsOK, but there 1939 /// are edge cases where this is not so). 1940 1941 APFloat::opStatus 1942 APFloat::convert(const fltSemantics &toSemantics, 1943 roundingMode rounding_mode, bool *losesInfo) 1944 { 1945 lostFraction lostFraction; 1946 unsigned int newPartCount, oldPartCount; 1947 opStatus fs; 1948 int shift; 1949 const fltSemantics &fromSemantics = *semantics; 1950 1951 lostFraction = lfExactlyZero; 1952 newPartCount = partCountForBits(toSemantics.precision + 1); 1953 oldPartCount = partCount(); 1954 shift = toSemantics.precision - fromSemantics.precision; 1955 1956 bool X86SpecialNan = false; 1957 if (&fromSemantics == &APFloat::x87DoubleExtended && 1958 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1959 (!(*significandParts() & 0x8000000000000000ULL) || 1960 !(*significandParts() & 0x4000000000000000ULL))) { 1961 // x86 has some unusual NaNs which cannot be represented in any other 1962 // format; note them here. 1963 X86SpecialNan = true; 1964 } 1965 1966 // If this is a truncation, perform the shift before we narrow the storage. 1967 if (shift < 0 && (category==fcNormal || category==fcNaN)) 1968 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 1969 1970 // Fix the storage so it can hold to new value. 1971 if (newPartCount > oldPartCount) { 1972 // The new type requires more storage; make it available. 1973 integerPart *newParts; 1974 newParts = new integerPart[newPartCount]; 1975 APInt::tcSet(newParts, 0, newPartCount); 1976 if (category==fcNormal || category==fcNaN) 1977 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1978 freeSignificand(); 1979 significand.parts = newParts; 1980 } else if (newPartCount == 1 && oldPartCount != 1) { 1981 // Switch to built-in storage for a single part. 1982 integerPart newPart = 0; 1983 if (category==fcNormal || category==fcNaN) 1984 newPart = significandParts()[0]; 1985 freeSignificand(); 1986 significand.part = newPart; 1987 } 1988 1989 // Now that we have the right storage, switch the semantics. 1990 semantics = &toSemantics; 1991 1992 // If this is an extension, perform the shift now that the storage is 1993 // available. 1994 if (shift > 0 && (category==fcNormal || category==fcNaN)) 1995 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1996 1997 if (category == fcNormal) { 1998 fs = normalize(rounding_mode, lostFraction); 1999 *losesInfo = (fs != opOK); 2000 } else if (category == fcNaN) { 2001 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 2002 2003 // For x87 extended precision, we want to make a NaN, not a special NaN if 2004 // the input wasn't special either. 2005 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) 2006 APInt::tcSetBit(significandParts(), semantics->precision - 1); 2007 2008 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 2009 // does not give you back the same bits. This is dubious, and we 2010 // don't currently do it. You're really supposed to get 2011 // an invalid operation signal at runtime, but nobody does that. 2012 fs = opOK; 2013 } else { 2014 *losesInfo = false; 2015 fs = opOK; 2016 } 2017 2018 return fs; 2019 } 2020 2021 /* Convert a floating point number to an integer according to the 2022 rounding mode. If the rounded integer value is out of range this 2023 returns an invalid operation exception and the contents of the 2024 destination parts are unspecified. If the rounded value is in 2025 range but the floating point number is not the exact integer, the C 2026 standard doesn't require an inexact exception to be raised. IEEE 2027 854 does require it so we do that. 2028 2029 Note that for conversions to integer type the C standard requires 2030 round-to-zero to always be used. */ 2031 APFloat::opStatus 2032 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 2033 bool isSigned, 2034 roundingMode rounding_mode, 2035 bool *isExact) const 2036 { 2037 lostFraction lost_fraction; 2038 const integerPart *src; 2039 unsigned int dstPartsCount, truncatedBits; 2040 2041 *isExact = false; 2042 2043 /* Handle the three special cases first. */ 2044 if (category == fcInfinity || category == fcNaN) 2045 return opInvalidOp; 2046 2047 dstPartsCount = partCountForBits(width); 2048 2049 if (category == fcZero) { 2050 APInt::tcSet(parts, 0, dstPartsCount); 2051 // Negative zero can't be represented as an int. 2052 *isExact = !sign; 2053 return opOK; 2054 } 2055 2056 src = significandParts(); 2057 2058 /* Step 1: place our absolute value, with any fraction truncated, in 2059 the destination. */ 2060 if (exponent < 0) { 2061 /* Our absolute value is less than one; truncate everything. */ 2062 APInt::tcSet(parts, 0, dstPartsCount); 2063 /* For exponent -1 the integer bit represents .5, look at that. 2064 For smaller exponents leftmost truncated bit is 0. */ 2065 truncatedBits = semantics->precision -1U - exponent; 2066 } else { 2067 /* We want the most significant (exponent + 1) bits; the rest are 2068 truncated. */ 2069 unsigned int bits = exponent + 1U; 2070 2071 /* Hopelessly large in magnitude? */ 2072 if (bits > width) 2073 return opInvalidOp; 2074 2075 if (bits < semantics->precision) { 2076 /* We truncate (semantics->precision - bits) bits. */ 2077 truncatedBits = semantics->precision - bits; 2078 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 2079 } else { 2080 /* We want at least as many bits as are available. */ 2081 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 2082 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 2083 truncatedBits = 0; 2084 } 2085 } 2086 2087 /* Step 2: work out any lost fraction, and increment the absolute 2088 value if we would round away from zero. */ 2089 if (truncatedBits) { 2090 lost_fraction = lostFractionThroughTruncation(src, partCount(), 2091 truncatedBits); 2092 if (lost_fraction != lfExactlyZero && 2093 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2094 if (APInt::tcIncrement(parts, dstPartsCount)) 2095 return opInvalidOp; /* Overflow. */ 2096 } 2097 } else { 2098 lost_fraction = lfExactlyZero; 2099 } 2100 2101 /* Step 3: check if we fit in the destination. */ 2102 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2103 2104 if (sign) { 2105 if (!isSigned) { 2106 /* Negative numbers cannot be represented as unsigned. */ 2107 if (omsb != 0) 2108 return opInvalidOp; 2109 } else { 2110 /* It takes omsb bits to represent the unsigned integer value. 2111 We lose a bit for the sign, but care is needed as the 2112 maximally negative integer is a special case. */ 2113 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2114 return opInvalidOp; 2115 2116 /* This case can happen because of rounding. */ 2117 if (omsb > width) 2118 return opInvalidOp; 2119 } 2120 2121 APInt::tcNegate (parts, dstPartsCount); 2122 } else { 2123 if (omsb >= width + !isSigned) 2124 return opInvalidOp; 2125 } 2126 2127 if (lost_fraction == lfExactlyZero) { 2128 *isExact = true; 2129 return opOK; 2130 } else 2131 return opInexact; 2132 } 2133 2134 /* Same as convertToSignExtendedInteger, except we provide 2135 deterministic values in case of an invalid operation exception, 2136 namely zero for NaNs and the minimal or maximal value respectively 2137 for underflow or overflow. 2138 The *isExact output tells whether the result is exact, in the sense 2139 that converting it back to the original floating point type produces 2140 the original value. This is almost equivalent to result==opOK, 2141 except for negative zeroes. 2142 */ 2143 APFloat::opStatus 2144 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2145 bool isSigned, 2146 roundingMode rounding_mode, bool *isExact) const 2147 { 2148 opStatus fs; 2149 2150 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2151 isExact); 2152 2153 if (fs == opInvalidOp) { 2154 unsigned int bits, dstPartsCount; 2155 2156 dstPartsCount = partCountForBits(width); 2157 2158 if (category == fcNaN) 2159 bits = 0; 2160 else if (sign) 2161 bits = isSigned; 2162 else 2163 bits = width - isSigned; 2164 2165 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2166 if (sign && isSigned) 2167 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2168 } 2169 2170 return fs; 2171 } 2172 2173 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2174 an APSInt, whose initial bit-width and signed-ness are used to determine the 2175 precision of the conversion. 2176 */ 2177 APFloat::opStatus 2178 APFloat::convertToInteger(APSInt &result, 2179 roundingMode rounding_mode, bool *isExact) const 2180 { 2181 unsigned bitWidth = result.getBitWidth(); 2182 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2183 opStatus status = convertToInteger( 2184 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2185 // Keeps the original signed-ness. 2186 result = APInt(bitWidth, parts); 2187 return status; 2188 } 2189 2190 /* Convert an unsigned integer SRC to a floating point number, 2191 rounding according to ROUNDING_MODE. The sign of the floating 2192 point number is not modified. */ 2193 APFloat::opStatus 2194 APFloat::convertFromUnsignedParts(const integerPart *src, 2195 unsigned int srcCount, 2196 roundingMode rounding_mode) 2197 { 2198 unsigned int omsb, precision, dstCount; 2199 integerPart *dst; 2200 lostFraction lost_fraction; 2201 2202 category = fcNormal; 2203 omsb = APInt::tcMSB(src, srcCount) + 1; 2204 dst = significandParts(); 2205 dstCount = partCount(); 2206 precision = semantics->precision; 2207 2208 /* We want the most significant PRECISION bits of SRC. There may not 2209 be that many; extract what we can. */ 2210 if (precision <= omsb) { 2211 exponent = omsb - 1; 2212 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2213 omsb - precision); 2214 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2215 } else { 2216 exponent = precision - 1; 2217 lost_fraction = lfExactlyZero; 2218 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2219 } 2220 2221 return normalize(rounding_mode, lost_fraction); 2222 } 2223 2224 APFloat::opStatus 2225 APFloat::convertFromAPInt(const APInt &Val, 2226 bool isSigned, 2227 roundingMode rounding_mode) 2228 { 2229 unsigned int partCount = Val.getNumWords(); 2230 APInt api = Val; 2231 2232 sign = false; 2233 if (isSigned && api.isNegative()) { 2234 sign = true; 2235 api = -api; 2236 } 2237 2238 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2239 } 2240 2241 /* Convert a two's complement integer SRC to a floating point number, 2242 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2243 integer is signed, in which case it must be sign-extended. */ 2244 APFloat::opStatus 2245 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2246 unsigned int srcCount, 2247 bool isSigned, 2248 roundingMode rounding_mode) 2249 { 2250 opStatus status; 2251 2252 if (isSigned && 2253 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2254 integerPart *copy; 2255 2256 /* If we're signed and negative negate a copy. */ 2257 sign = true; 2258 copy = new integerPart[srcCount]; 2259 APInt::tcAssign(copy, src, srcCount); 2260 APInt::tcNegate(copy, srcCount); 2261 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2262 delete [] copy; 2263 } else { 2264 sign = false; 2265 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2266 } 2267 2268 return status; 2269 } 2270 2271 /* FIXME: should this just take a const APInt reference? */ 2272 APFloat::opStatus 2273 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2274 unsigned int width, bool isSigned, 2275 roundingMode rounding_mode) 2276 { 2277 unsigned int partCount = partCountForBits(width); 2278 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2279 2280 sign = false; 2281 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2282 sign = true; 2283 api = -api; 2284 } 2285 2286 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2287 } 2288 2289 APFloat::opStatus 2290 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2291 { 2292 lostFraction lost_fraction = lfExactlyZero; 2293 integerPart *significand; 2294 unsigned int bitPos, partsCount; 2295 StringRef::iterator dot, firstSignificantDigit; 2296 2297 zeroSignificand(); 2298 exponent = 0; 2299 category = fcNormal; 2300 2301 significand = significandParts(); 2302 partsCount = partCount(); 2303 bitPos = partsCount * integerPartWidth; 2304 2305 /* Skip leading zeroes and any (hexa)decimal point. */ 2306 StringRef::iterator begin = s.begin(); 2307 StringRef::iterator end = s.end(); 2308 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2309 firstSignificantDigit = p; 2310 2311 for (; p != end;) { 2312 integerPart hex_value; 2313 2314 if (*p == '.') { 2315 assert(dot == end && "String contains multiple dots"); 2316 dot = p++; 2317 if (p == end) { 2318 break; 2319 } 2320 } 2321 2322 hex_value = hexDigitValue(*p); 2323 if (hex_value == -1U) { 2324 break; 2325 } 2326 2327 p++; 2328 2329 if (p == end) { 2330 break; 2331 } else { 2332 /* Store the number whilst 4-bit nibbles remain. */ 2333 if (bitPos) { 2334 bitPos -= 4; 2335 hex_value <<= bitPos % integerPartWidth; 2336 significand[bitPos / integerPartWidth] |= hex_value; 2337 } else { 2338 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2339 while (p != end && hexDigitValue(*p) != -1U) 2340 p++; 2341 break; 2342 } 2343 } 2344 } 2345 2346 /* Hex floats require an exponent but not a hexadecimal point. */ 2347 assert(p != end && "Hex strings require an exponent"); 2348 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2349 assert(p != begin && "Significand has no digits"); 2350 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2351 2352 /* Ignore the exponent if we are zero. */ 2353 if (p != firstSignificantDigit) { 2354 int expAdjustment; 2355 2356 /* Implicit hexadecimal point? */ 2357 if (dot == end) 2358 dot = p; 2359 2360 /* Calculate the exponent adjustment implicit in the number of 2361 significant digits. */ 2362 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2363 if (expAdjustment < 0) 2364 expAdjustment++; 2365 expAdjustment = expAdjustment * 4 - 1; 2366 2367 /* Adjust for writing the significand starting at the most 2368 significant nibble. */ 2369 expAdjustment += semantics->precision; 2370 expAdjustment -= partsCount * integerPartWidth; 2371 2372 /* Adjust for the given exponent. */ 2373 exponent = totalExponent(p + 1, end, expAdjustment); 2374 } 2375 2376 return normalize(rounding_mode, lost_fraction); 2377 } 2378 2379 APFloat::opStatus 2380 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2381 unsigned sigPartCount, int exp, 2382 roundingMode rounding_mode) 2383 { 2384 unsigned int parts, pow5PartCount; 2385 fltSemantics calcSemantics = { 32767, -32767, 0 }; 2386 integerPart pow5Parts[maxPowerOfFiveParts]; 2387 bool isNearest; 2388 2389 isNearest = (rounding_mode == rmNearestTiesToEven || 2390 rounding_mode == rmNearestTiesToAway); 2391 2392 parts = partCountForBits(semantics->precision + 11); 2393 2394 /* Calculate pow(5, abs(exp)). */ 2395 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2396 2397 for (;; parts *= 2) { 2398 opStatus sigStatus, powStatus; 2399 unsigned int excessPrecision, truncatedBits; 2400 2401 calcSemantics.precision = parts * integerPartWidth - 1; 2402 excessPrecision = calcSemantics.precision - semantics->precision; 2403 truncatedBits = excessPrecision; 2404 2405 APFloat decSig(calcSemantics, fcZero, sign); 2406 APFloat pow5(calcSemantics, fcZero, false); 2407 2408 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2409 rmNearestTiesToEven); 2410 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2411 rmNearestTiesToEven); 2412 /* Add exp, as 10^n = 5^n * 2^n. */ 2413 decSig.exponent += exp; 2414 2415 lostFraction calcLostFraction; 2416 integerPart HUerr, HUdistance; 2417 unsigned int powHUerr; 2418 2419 if (exp >= 0) { 2420 /* multiplySignificand leaves the precision-th bit set to 1. */ 2421 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2422 powHUerr = powStatus != opOK; 2423 } else { 2424 calcLostFraction = decSig.divideSignificand(pow5); 2425 /* Denormal numbers have less precision. */ 2426 if (decSig.exponent < semantics->minExponent) { 2427 excessPrecision += (semantics->minExponent - decSig.exponent); 2428 truncatedBits = excessPrecision; 2429 if (excessPrecision > calcSemantics.precision) 2430 excessPrecision = calcSemantics.precision; 2431 } 2432 /* Extra half-ulp lost in reciprocal of exponent. */ 2433 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2434 } 2435 2436 /* Both multiplySignificand and divideSignificand return the 2437 result with the integer bit set. */ 2438 assert(APInt::tcExtractBit 2439 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2440 2441 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2442 powHUerr); 2443 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2444 excessPrecision, isNearest); 2445 2446 /* Are we guaranteed to round correctly if we truncate? */ 2447 if (HUdistance >= HUerr) { 2448 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2449 calcSemantics.precision - excessPrecision, 2450 excessPrecision); 2451 /* Take the exponent of decSig. If we tcExtract-ed less bits 2452 above we must adjust our exponent to compensate for the 2453 implicit right shift. */ 2454 exponent = (decSig.exponent + semantics->precision 2455 - (calcSemantics.precision - excessPrecision)); 2456 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2457 decSig.partCount(), 2458 truncatedBits); 2459 return normalize(rounding_mode, calcLostFraction); 2460 } 2461 } 2462 } 2463 2464 APFloat::opStatus 2465 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2466 { 2467 decimalInfo D; 2468 opStatus fs; 2469 2470 /* Scan the text. */ 2471 StringRef::iterator p = str.begin(); 2472 interpretDecimal(p, str.end(), &D); 2473 2474 /* Handle the quick cases. First the case of no significant digits, 2475 i.e. zero, and then exponents that are obviously too large or too 2476 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2477 definitely overflows if 2478 2479 (exp - 1) * L >= maxExponent 2480 2481 and definitely underflows to zero where 2482 2483 (exp + 1) * L <= minExponent - precision 2484 2485 With integer arithmetic the tightest bounds for L are 2486 2487 93/28 < L < 196/59 [ numerator <= 256 ] 2488 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2489 */ 2490 2491 if (decDigitValue(*D.firstSigDigit) >= 10U) { 2492 category = fcZero; 2493 fs = opOK; 2494 2495 /* Check whether the normalized exponent is high enough to overflow 2496 max during the log-rebasing in the max-exponent check below. */ 2497 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2498 fs = handleOverflow(rounding_mode); 2499 2500 /* If it wasn't, then it also wasn't high enough to overflow max 2501 during the log-rebasing in the min-exponent check. Check that it 2502 won't overflow min in either check, then perform the min-exponent 2503 check. */ 2504 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2505 (D.normalizedExponent + 1) * 28738 <= 2506 8651 * (semantics->minExponent - (int) semantics->precision)) { 2507 /* Underflow to zero and round. */ 2508 zeroSignificand(); 2509 fs = normalize(rounding_mode, lfLessThanHalf); 2510 2511 /* We can finally safely perform the max-exponent check. */ 2512 } else if ((D.normalizedExponent - 1) * 42039 2513 >= 12655 * semantics->maxExponent) { 2514 /* Overflow and round. */ 2515 fs = handleOverflow(rounding_mode); 2516 } else { 2517 integerPart *decSignificand; 2518 unsigned int partCount; 2519 2520 /* A tight upper bound on number of bits required to hold an 2521 N-digit decimal integer is N * 196 / 59. Allocate enough space 2522 to hold the full significand, and an extra part required by 2523 tcMultiplyPart. */ 2524 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2525 partCount = partCountForBits(1 + 196 * partCount / 59); 2526 decSignificand = new integerPart[partCount + 1]; 2527 partCount = 0; 2528 2529 /* Convert to binary efficiently - we do almost all multiplication 2530 in an integerPart. When this would overflow do we do a single 2531 bignum multiplication, and then revert again to multiplication 2532 in an integerPart. */ 2533 do { 2534 integerPart decValue, val, multiplier; 2535 2536 val = 0; 2537 multiplier = 1; 2538 2539 do { 2540 if (*p == '.') { 2541 p++; 2542 if (p == str.end()) { 2543 break; 2544 } 2545 } 2546 decValue = decDigitValue(*p++); 2547 assert(decValue < 10U && "Invalid character in significand"); 2548 multiplier *= 10; 2549 val = val * 10 + decValue; 2550 /* The maximum number that can be multiplied by ten with any 2551 digit added without overflowing an integerPart. */ 2552 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2553 2554 /* Multiply out the current part. */ 2555 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2556 partCount, partCount + 1, false); 2557 2558 /* If we used another part (likely but not guaranteed), increase 2559 the count. */ 2560 if (decSignificand[partCount]) 2561 partCount++; 2562 } while (p <= D.lastSigDigit); 2563 2564 category = fcNormal; 2565 fs = roundSignificandWithExponent(decSignificand, partCount, 2566 D.exponent, rounding_mode); 2567 2568 delete [] decSignificand; 2569 } 2570 2571 return fs; 2572 } 2573 2574 APFloat::opStatus 2575 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2576 { 2577 assert(!str.empty() && "Invalid string length"); 2578 2579 /* Handle a leading minus sign. */ 2580 StringRef::iterator p = str.begin(); 2581 size_t slen = str.size(); 2582 sign = *p == '-' ? 1 : 0; 2583 if (*p == '-' || *p == '+') { 2584 p++; 2585 slen--; 2586 assert(slen && "String has no digits"); 2587 } 2588 2589 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2590 assert(slen - 2 && "Invalid string"); 2591 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2592 rounding_mode); 2593 } 2594 2595 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2596 } 2597 2598 /* Write out a hexadecimal representation of the floating point value 2599 to DST, which must be of sufficient size, in the C99 form 2600 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2601 excluding the terminating NUL. 2602 2603 If UPPERCASE, the output is in upper case, otherwise in lower case. 2604 2605 HEXDIGITS digits appear altogether, rounding the value if 2606 necessary. If HEXDIGITS is 0, the minimal precision to display the 2607 number precisely is used instead. If nothing would appear after 2608 the decimal point it is suppressed. 2609 2610 The decimal exponent is always printed and has at least one digit. 2611 Zero values display an exponent of zero. Infinities and NaNs 2612 appear as "infinity" or "nan" respectively. 2613 2614 The above rules are as specified by C99. There is ambiguity about 2615 what the leading hexadecimal digit should be. This implementation 2616 uses whatever is necessary so that the exponent is displayed as 2617 stored. This implies the exponent will fall within the IEEE format 2618 range, and the leading hexadecimal digit will be 0 (for denormals), 2619 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2620 any other digits zero). 2621 */ 2622 unsigned int 2623 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2624 bool upperCase, roundingMode rounding_mode) const 2625 { 2626 char *p; 2627 2628 p = dst; 2629 if (sign) 2630 *dst++ = '-'; 2631 2632 switch (category) { 2633 case fcInfinity: 2634 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2635 dst += sizeof infinityL - 1; 2636 break; 2637 2638 case fcNaN: 2639 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2640 dst += sizeof NaNU - 1; 2641 break; 2642 2643 case fcZero: 2644 *dst++ = '0'; 2645 *dst++ = upperCase ? 'X': 'x'; 2646 *dst++ = '0'; 2647 if (hexDigits > 1) { 2648 *dst++ = '.'; 2649 memset (dst, '0', hexDigits - 1); 2650 dst += hexDigits - 1; 2651 } 2652 *dst++ = upperCase ? 'P': 'p'; 2653 *dst++ = '0'; 2654 break; 2655 2656 case fcNormal: 2657 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2658 break; 2659 } 2660 2661 *dst = 0; 2662 2663 return static_cast<unsigned int>(dst - p); 2664 } 2665 2666 /* Does the hard work of outputting the correctly rounded hexadecimal 2667 form of a normal floating point number with the specified number of 2668 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2669 digits necessary to print the value precisely is output. */ 2670 char * 2671 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2672 bool upperCase, 2673 roundingMode rounding_mode) const 2674 { 2675 unsigned int count, valueBits, shift, partsCount, outputDigits; 2676 const char *hexDigitChars; 2677 const integerPart *significand; 2678 char *p; 2679 bool roundUp; 2680 2681 *dst++ = '0'; 2682 *dst++ = upperCase ? 'X': 'x'; 2683 2684 roundUp = false; 2685 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2686 2687 significand = significandParts(); 2688 partsCount = partCount(); 2689 2690 /* +3 because the first digit only uses the single integer bit, so 2691 we have 3 virtual zero most-significant-bits. */ 2692 valueBits = semantics->precision + 3; 2693 shift = integerPartWidth - valueBits % integerPartWidth; 2694 2695 /* The natural number of digits required ignoring trailing 2696 insignificant zeroes. */ 2697 outputDigits = (valueBits - significandLSB () + 3) / 4; 2698 2699 /* hexDigits of zero means use the required number for the 2700 precision. Otherwise, see if we are truncating. If we are, 2701 find out if we need to round away from zero. */ 2702 if (hexDigits) { 2703 if (hexDigits < outputDigits) { 2704 /* We are dropping non-zero bits, so need to check how to round. 2705 "bits" is the number of dropped bits. */ 2706 unsigned int bits; 2707 lostFraction fraction; 2708 2709 bits = valueBits - hexDigits * 4; 2710 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2711 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2712 } 2713 outputDigits = hexDigits; 2714 } 2715 2716 /* Write the digits consecutively, and start writing in the location 2717 of the hexadecimal point. We move the most significant digit 2718 left and add the hexadecimal point later. */ 2719 p = ++dst; 2720 2721 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2722 2723 while (outputDigits && count) { 2724 integerPart part; 2725 2726 /* Put the most significant integerPartWidth bits in "part". */ 2727 if (--count == partsCount) 2728 part = 0; /* An imaginary higher zero part. */ 2729 else 2730 part = significand[count] << shift; 2731 2732 if (count && shift) 2733 part |= significand[count - 1] >> (integerPartWidth - shift); 2734 2735 /* Convert as much of "part" to hexdigits as we can. */ 2736 unsigned int curDigits = integerPartWidth / 4; 2737 2738 if (curDigits > outputDigits) 2739 curDigits = outputDigits; 2740 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2741 outputDigits -= curDigits; 2742 } 2743 2744 if (roundUp) { 2745 char *q = dst; 2746 2747 /* Note that hexDigitChars has a trailing '0'. */ 2748 do { 2749 q--; 2750 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2751 } while (*q == '0'); 2752 assert(q >= p); 2753 } else { 2754 /* Add trailing zeroes. */ 2755 memset (dst, '0', outputDigits); 2756 dst += outputDigits; 2757 } 2758 2759 /* Move the most significant digit to before the point, and if there 2760 is something after the decimal point add it. This must come 2761 after rounding above. */ 2762 p[-1] = p[0]; 2763 if (dst -1 == p) 2764 dst--; 2765 else 2766 p[0] = '.'; 2767 2768 /* Finally output the exponent. */ 2769 *dst++ = upperCase ? 'P': 'p'; 2770 2771 return writeSignedDecimal (dst, exponent); 2772 } 2773 2774 hash_code llvm::hash_value(const APFloat &Arg) { 2775 if (Arg.category != APFloat::fcNormal) 2776 return hash_combine((uint8_t)Arg.category, 2777 // NaN has no sign, fix it at zero. 2778 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2779 Arg.semantics->precision); 2780 2781 // Normal floats need their exponent and significand hashed. 2782 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2783 Arg.semantics->precision, Arg.exponent, 2784 hash_combine_range( 2785 Arg.significandParts(), 2786 Arg.significandParts() + Arg.partCount())); 2787 } 2788 2789 // Conversion from APFloat to/from host float/double. It may eventually be 2790 // possible to eliminate these and have everybody deal with APFloats, but that 2791 // will take a while. This approach will not easily extend to long double. 2792 // Current implementation requires integerPartWidth==64, which is correct at 2793 // the moment but could be made more general. 2794 2795 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2796 // the actual IEEE respresentations. We compensate for that here. 2797 2798 APInt 2799 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2800 { 2801 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2802 assert(partCount()==2); 2803 2804 uint64_t myexponent, mysignificand; 2805 2806 if (category==fcNormal) { 2807 myexponent = exponent+16383; //bias 2808 mysignificand = significandParts()[0]; 2809 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2810 myexponent = 0; // denormal 2811 } else if (category==fcZero) { 2812 myexponent = 0; 2813 mysignificand = 0; 2814 } else if (category==fcInfinity) { 2815 myexponent = 0x7fff; 2816 mysignificand = 0x8000000000000000ULL; 2817 } else { 2818 assert(category == fcNaN && "Unknown category"); 2819 myexponent = 0x7fff; 2820 mysignificand = significandParts()[0]; 2821 } 2822 2823 uint64_t words[2]; 2824 words[0] = mysignificand; 2825 words[1] = ((uint64_t)(sign & 1) << 15) | 2826 (myexponent & 0x7fffLL); 2827 return APInt(80, words); 2828 } 2829 2830 APInt 2831 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2832 { 2833 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2834 assert(partCount()==2); 2835 2836 uint64_t words[2]; 2837 opStatus fs; 2838 bool losesInfo; 2839 2840 // Convert number to double. To avoid spurious underflows, we re- 2841 // normalize against the "double" minExponent first, and only *then* 2842 // truncate the mantissa. The result of that second conversion 2843 // may be inexact, but should never underflow. 2844 // Declare fltSemantics before APFloat that uses it (and 2845 // saves pointer to it) to ensure correct destruction order. 2846 fltSemantics extendedSemantics = *semantics; 2847 extendedSemantics.minExponent = IEEEdouble.minExponent; 2848 APFloat extended(*this); 2849 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2850 assert(fs == opOK && !losesInfo); 2851 (void)fs; 2852 2853 APFloat u(extended); 2854 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2855 assert(fs == opOK || fs == opInexact); 2856 (void)fs; 2857 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2858 2859 // If conversion was exact or resulted in a special case, we're done; 2860 // just set the second double to zero. Otherwise, re-convert back to 2861 // the extended format and compute the difference. This now should 2862 // convert exactly to double. 2863 if (u.category == fcNormal && losesInfo) { 2864 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2865 assert(fs == opOK && !losesInfo); 2866 (void)fs; 2867 2868 APFloat v(extended); 2869 v.subtract(u, rmNearestTiesToEven); 2870 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2871 assert(fs == opOK && !losesInfo); 2872 (void)fs; 2873 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2874 } else { 2875 words[1] = 0; 2876 } 2877 2878 return APInt(128, words); 2879 } 2880 2881 APInt 2882 APFloat::convertQuadrupleAPFloatToAPInt() const 2883 { 2884 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2885 assert(partCount()==2); 2886 2887 uint64_t myexponent, mysignificand, mysignificand2; 2888 2889 if (category==fcNormal) { 2890 myexponent = exponent+16383; //bias 2891 mysignificand = significandParts()[0]; 2892 mysignificand2 = significandParts()[1]; 2893 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2894 myexponent = 0; // denormal 2895 } else if (category==fcZero) { 2896 myexponent = 0; 2897 mysignificand = mysignificand2 = 0; 2898 } else if (category==fcInfinity) { 2899 myexponent = 0x7fff; 2900 mysignificand = mysignificand2 = 0; 2901 } else { 2902 assert(category == fcNaN && "Unknown category!"); 2903 myexponent = 0x7fff; 2904 mysignificand = significandParts()[0]; 2905 mysignificand2 = significandParts()[1]; 2906 } 2907 2908 uint64_t words[2]; 2909 words[0] = mysignificand; 2910 words[1] = ((uint64_t)(sign & 1) << 63) | 2911 ((myexponent & 0x7fff) << 48) | 2912 (mysignificand2 & 0xffffffffffffLL); 2913 2914 return APInt(128, words); 2915 } 2916 2917 APInt 2918 APFloat::convertDoubleAPFloatToAPInt() const 2919 { 2920 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2921 assert(partCount()==1); 2922 2923 uint64_t myexponent, mysignificand; 2924 2925 if (category==fcNormal) { 2926 myexponent = exponent+1023; //bias 2927 mysignificand = *significandParts(); 2928 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2929 myexponent = 0; // denormal 2930 } else if (category==fcZero) { 2931 myexponent = 0; 2932 mysignificand = 0; 2933 } else if (category==fcInfinity) { 2934 myexponent = 0x7ff; 2935 mysignificand = 0; 2936 } else { 2937 assert(category == fcNaN && "Unknown category!"); 2938 myexponent = 0x7ff; 2939 mysignificand = *significandParts(); 2940 } 2941 2942 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2943 ((myexponent & 0x7ff) << 52) | 2944 (mysignificand & 0xfffffffffffffLL)))); 2945 } 2946 2947 APInt 2948 APFloat::convertFloatAPFloatToAPInt() const 2949 { 2950 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2951 assert(partCount()==1); 2952 2953 uint32_t myexponent, mysignificand; 2954 2955 if (category==fcNormal) { 2956 myexponent = exponent+127; //bias 2957 mysignificand = (uint32_t)*significandParts(); 2958 if (myexponent == 1 && !(mysignificand & 0x800000)) 2959 myexponent = 0; // denormal 2960 } else if (category==fcZero) { 2961 myexponent = 0; 2962 mysignificand = 0; 2963 } else if (category==fcInfinity) { 2964 myexponent = 0xff; 2965 mysignificand = 0; 2966 } else { 2967 assert(category == fcNaN && "Unknown category!"); 2968 myexponent = 0xff; 2969 mysignificand = (uint32_t)*significandParts(); 2970 } 2971 2972 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2973 (mysignificand & 0x7fffff))); 2974 } 2975 2976 APInt 2977 APFloat::convertHalfAPFloatToAPInt() const 2978 { 2979 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 2980 assert(partCount()==1); 2981 2982 uint32_t myexponent, mysignificand; 2983 2984 if (category==fcNormal) { 2985 myexponent = exponent+15; //bias 2986 mysignificand = (uint32_t)*significandParts(); 2987 if (myexponent == 1 && !(mysignificand & 0x400)) 2988 myexponent = 0; // denormal 2989 } else if (category==fcZero) { 2990 myexponent = 0; 2991 mysignificand = 0; 2992 } else if (category==fcInfinity) { 2993 myexponent = 0x1f; 2994 mysignificand = 0; 2995 } else { 2996 assert(category == fcNaN && "Unknown category!"); 2997 myexponent = 0x1f; 2998 mysignificand = (uint32_t)*significandParts(); 2999 } 3000 3001 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 3002 (mysignificand & 0x3ff))); 3003 } 3004 3005 // This function creates an APInt that is just a bit map of the floating 3006 // point constant as it would appear in memory. It is not a conversion, 3007 // and treating the result as a normal integer is unlikely to be useful. 3008 3009 APInt 3010 APFloat::bitcastToAPInt() const 3011 { 3012 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 3013 return convertHalfAPFloatToAPInt(); 3014 3015 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 3016 return convertFloatAPFloatToAPInt(); 3017 3018 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 3019 return convertDoubleAPFloatToAPInt(); 3020 3021 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 3022 return convertQuadrupleAPFloatToAPInt(); 3023 3024 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 3025 return convertPPCDoubleDoubleAPFloatToAPInt(); 3026 3027 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 3028 "unknown format!"); 3029 return convertF80LongDoubleAPFloatToAPInt(); 3030 } 3031 3032 float 3033 APFloat::convertToFloat() const 3034 { 3035 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 3036 "Float semantics are not IEEEsingle"); 3037 APInt api = bitcastToAPInt(); 3038 return api.bitsToFloat(); 3039 } 3040 3041 double 3042 APFloat::convertToDouble() const 3043 { 3044 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 3045 "Float semantics are not IEEEdouble"); 3046 APInt api = bitcastToAPInt(); 3047 return api.bitsToDouble(); 3048 } 3049 3050 /// Integer bit is explicit in this format. Intel hardware (387 and later) 3051 /// does not support these bit patterns: 3052 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 3053 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 3054 /// exponent = 0, integer bit 1 ("pseudodenormal") 3055 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 3056 /// At the moment, the first two are treated as NaNs, the second two as Normal. 3057 void 3058 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 3059 { 3060 assert(api.getBitWidth()==80); 3061 uint64_t i1 = api.getRawData()[0]; 3062 uint64_t i2 = api.getRawData()[1]; 3063 uint64_t myexponent = (i2 & 0x7fff); 3064 uint64_t mysignificand = i1; 3065 3066 initialize(&APFloat::x87DoubleExtended); 3067 assert(partCount()==2); 3068 3069 sign = static_cast<unsigned int>(i2>>15); 3070 if (myexponent==0 && mysignificand==0) { 3071 // exponent, significand meaningless 3072 category = fcZero; 3073 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 3074 // exponent, significand meaningless 3075 category = fcInfinity; 3076 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 3077 // exponent meaningless 3078 category = fcNaN; 3079 significandParts()[0] = mysignificand; 3080 significandParts()[1] = 0; 3081 } else { 3082 category = fcNormal; 3083 exponent = myexponent - 16383; 3084 significandParts()[0] = mysignificand; 3085 significandParts()[1] = 0; 3086 if (myexponent==0) // denormal 3087 exponent = -16382; 3088 } 3089 } 3090 3091 void 3092 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3093 { 3094 assert(api.getBitWidth()==128); 3095 uint64_t i1 = api.getRawData()[0]; 3096 uint64_t i2 = api.getRawData()[1]; 3097 opStatus fs; 3098 bool losesInfo; 3099 3100 // Get the first double and convert to our format. 3101 initFromDoubleAPInt(APInt(64, i1)); 3102 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3103 assert(fs == opOK && !losesInfo); 3104 (void)fs; 3105 3106 // Unless we have a special case, add in second double. 3107 if (category == fcNormal) { 3108 APFloat v(IEEEdouble, APInt(64, i2)); 3109 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3110 assert(fs == opOK && !losesInfo); 3111 (void)fs; 3112 3113 add(v, rmNearestTiesToEven); 3114 } 3115 } 3116 3117 void 3118 APFloat::initFromQuadrupleAPInt(const APInt &api) 3119 { 3120 assert(api.getBitWidth()==128); 3121 uint64_t i1 = api.getRawData()[0]; 3122 uint64_t i2 = api.getRawData()[1]; 3123 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3124 uint64_t mysignificand = i1; 3125 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3126 3127 initialize(&APFloat::IEEEquad); 3128 assert(partCount()==2); 3129 3130 sign = static_cast<unsigned int>(i2>>63); 3131 if (myexponent==0 && 3132 (mysignificand==0 && mysignificand2==0)) { 3133 // exponent, significand meaningless 3134 category = fcZero; 3135 } else if (myexponent==0x7fff && 3136 (mysignificand==0 && mysignificand2==0)) { 3137 // exponent, significand meaningless 3138 category = fcInfinity; 3139 } else if (myexponent==0x7fff && 3140 (mysignificand!=0 || mysignificand2 !=0)) { 3141 // exponent meaningless 3142 category = fcNaN; 3143 significandParts()[0] = mysignificand; 3144 significandParts()[1] = mysignificand2; 3145 } else { 3146 category = fcNormal; 3147 exponent = myexponent - 16383; 3148 significandParts()[0] = mysignificand; 3149 significandParts()[1] = mysignificand2; 3150 if (myexponent==0) // denormal 3151 exponent = -16382; 3152 else 3153 significandParts()[1] |= 0x1000000000000LL; // integer bit 3154 } 3155 } 3156 3157 void 3158 APFloat::initFromDoubleAPInt(const APInt &api) 3159 { 3160 assert(api.getBitWidth()==64); 3161 uint64_t i = *api.getRawData(); 3162 uint64_t myexponent = (i >> 52) & 0x7ff; 3163 uint64_t mysignificand = i & 0xfffffffffffffLL; 3164 3165 initialize(&APFloat::IEEEdouble); 3166 assert(partCount()==1); 3167 3168 sign = static_cast<unsigned int>(i>>63); 3169 if (myexponent==0 && mysignificand==0) { 3170 // exponent, significand meaningless 3171 category = fcZero; 3172 } else if (myexponent==0x7ff && mysignificand==0) { 3173 // exponent, significand meaningless 3174 category = fcInfinity; 3175 } else if (myexponent==0x7ff && mysignificand!=0) { 3176 // exponent meaningless 3177 category = fcNaN; 3178 *significandParts() = mysignificand; 3179 } else { 3180 category = fcNormal; 3181 exponent = myexponent - 1023; 3182 *significandParts() = mysignificand; 3183 if (myexponent==0) // denormal 3184 exponent = -1022; 3185 else 3186 *significandParts() |= 0x10000000000000LL; // integer bit 3187 } 3188 } 3189 3190 void 3191 APFloat::initFromFloatAPInt(const APInt & api) 3192 { 3193 assert(api.getBitWidth()==32); 3194 uint32_t i = (uint32_t)*api.getRawData(); 3195 uint32_t myexponent = (i >> 23) & 0xff; 3196 uint32_t mysignificand = i & 0x7fffff; 3197 3198 initialize(&APFloat::IEEEsingle); 3199 assert(partCount()==1); 3200 3201 sign = i >> 31; 3202 if (myexponent==0 && mysignificand==0) { 3203 // exponent, significand meaningless 3204 category = fcZero; 3205 } else if (myexponent==0xff && mysignificand==0) { 3206 // exponent, significand meaningless 3207 category = fcInfinity; 3208 } else if (myexponent==0xff && mysignificand!=0) { 3209 // sign, exponent, significand meaningless 3210 category = fcNaN; 3211 *significandParts() = mysignificand; 3212 } else { 3213 category = fcNormal; 3214 exponent = myexponent - 127; //bias 3215 *significandParts() = mysignificand; 3216 if (myexponent==0) // denormal 3217 exponent = -126; 3218 else 3219 *significandParts() |= 0x800000; // integer bit 3220 } 3221 } 3222 3223 void 3224 APFloat::initFromHalfAPInt(const APInt & api) 3225 { 3226 assert(api.getBitWidth()==16); 3227 uint32_t i = (uint32_t)*api.getRawData(); 3228 uint32_t myexponent = (i >> 10) & 0x1f; 3229 uint32_t mysignificand = i & 0x3ff; 3230 3231 initialize(&APFloat::IEEEhalf); 3232 assert(partCount()==1); 3233 3234 sign = i >> 15; 3235 if (myexponent==0 && mysignificand==0) { 3236 // exponent, significand meaningless 3237 category = fcZero; 3238 } else if (myexponent==0x1f && mysignificand==0) { 3239 // exponent, significand meaningless 3240 category = fcInfinity; 3241 } else if (myexponent==0x1f && mysignificand!=0) { 3242 // sign, exponent, significand meaningless 3243 category = fcNaN; 3244 *significandParts() = mysignificand; 3245 } else { 3246 category = fcNormal; 3247 exponent = myexponent - 15; //bias 3248 *significandParts() = mysignificand; 3249 if (myexponent==0) // denormal 3250 exponent = -14; 3251 else 3252 *significandParts() |= 0x400; // integer bit 3253 } 3254 } 3255 3256 /// Treat api as containing the bits of a floating point number. Currently 3257 /// we infer the floating point type from the size of the APInt. The 3258 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3259 /// when the size is anything else). 3260 void 3261 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3262 { 3263 if (Sem == &IEEEhalf) 3264 return initFromHalfAPInt(api); 3265 if (Sem == &IEEEsingle) 3266 return initFromFloatAPInt(api); 3267 if (Sem == &IEEEdouble) 3268 return initFromDoubleAPInt(api); 3269 if (Sem == &x87DoubleExtended) 3270 return initFromF80LongDoubleAPInt(api); 3271 if (Sem == &IEEEquad) 3272 return initFromQuadrupleAPInt(api); 3273 if (Sem == &PPCDoubleDouble) 3274 return initFromPPCDoubleDoubleAPInt(api); 3275 3276 llvm_unreachable(0); 3277 } 3278 3279 APFloat 3280 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3281 { 3282 switch (BitWidth) { 3283 case 16: 3284 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3285 case 32: 3286 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3287 case 64: 3288 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3289 case 80: 3290 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3291 case 128: 3292 if (isIEEE) 3293 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3294 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3295 default: 3296 llvm_unreachable("Unknown floating bit width"); 3297 } 3298 } 3299 3300 /// Make this number the largest magnitude normal number in the given 3301 /// semantics. 3302 void APFloat::makeLargest(bool Negative) { 3303 // We want (in interchange format): 3304 // sign = {Negative} 3305 // exponent = 1..10 3306 // significand = 1..1 3307 category = fcNormal; 3308 sign = Negative; 3309 exponent = semantics->maxExponent; 3310 3311 // Use memset to set all but the highest integerPart to all ones. 3312 integerPart *significand = significandParts(); 3313 unsigned PartCount = partCount(); 3314 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); 3315 3316 // Set the high integerPart especially setting all unused top bits for 3317 // internal consistency. 3318 const unsigned NumUnusedHighBits = 3319 PartCount*integerPartWidth - semantics->precision; 3320 significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits; 3321 } 3322 3323 /// Make this number the smallest magnitude denormal number in the given 3324 /// semantics. 3325 void APFloat::makeSmallest(bool Negative) { 3326 // We want (in interchange format): 3327 // sign = {Negative} 3328 // exponent = 0..0 3329 // significand = 0..01 3330 category = fcNormal; 3331 sign = Negative; 3332 exponent = semantics->minExponent; 3333 APInt::tcSet(significandParts(), 1, partCount()); 3334 } 3335 3336 3337 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3338 // We want (in interchange format): 3339 // sign = {Negative} 3340 // exponent = 1..10 3341 // significand = 1..1 3342 APFloat Val(Sem, uninitialized); 3343 Val.makeLargest(Negative); 3344 return Val; 3345 } 3346 3347 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3348 // We want (in interchange format): 3349 // sign = {Negative} 3350 // exponent = 0..0 3351 // significand = 0..01 3352 APFloat Val(Sem, uninitialized); 3353 Val.makeSmallest(Negative); 3354 return Val; 3355 } 3356 3357 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3358 APFloat Val(Sem, fcNormal, Negative); 3359 3360 // We want (in interchange format): 3361 // sign = {Negative} 3362 // exponent = 0..0 3363 // significand = 10..0 3364 3365 Val.exponent = Sem.minExponent; 3366 Val.zeroSignificand(); 3367 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3368 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3369 3370 return Val; 3371 } 3372 3373 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3374 initFromAPInt(&Sem, API); 3375 } 3376 3377 APFloat::APFloat(float f) { 3378 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3379 } 3380 3381 APFloat::APFloat(double d) { 3382 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3383 } 3384 3385 namespace { 3386 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3387 Buffer.append(Str.begin(), Str.end()); 3388 } 3389 3390 /// Removes data from the given significand until it is no more 3391 /// precise than is required for the desired precision. 3392 void AdjustToPrecision(APInt &significand, 3393 int &exp, unsigned FormatPrecision) { 3394 unsigned bits = significand.getActiveBits(); 3395 3396 // 196/59 is a very slight overestimate of lg_2(10). 3397 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3398 3399 if (bits <= bitsRequired) return; 3400 3401 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3402 if (!tensRemovable) return; 3403 3404 exp += tensRemovable; 3405 3406 APInt divisor(significand.getBitWidth(), 1); 3407 APInt powten(significand.getBitWidth(), 10); 3408 while (true) { 3409 if (tensRemovable & 1) 3410 divisor *= powten; 3411 tensRemovable >>= 1; 3412 if (!tensRemovable) break; 3413 powten *= powten; 3414 } 3415 3416 significand = significand.udiv(divisor); 3417 3418 // Truncate the significand down to its active bit count. 3419 significand = significand.trunc(significand.getActiveBits()); 3420 } 3421 3422 3423 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3424 int &exp, unsigned FormatPrecision) { 3425 unsigned N = buffer.size(); 3426 if (N <= FormatPrecision) return; 3427 3428 // The most significant figures are the last ones in the buffer. 3429 unsigned FirstSignificant = N - FormatPrecision; 3430 3431 // Round. 3432 // FIXME: this probably shouldn't use 'round half up'. 3433 3434 // Rounding down is just a truncation, except we also want to drop 3435 // trailing zeros from the new result. 3436 if (buffer[FirstSignificant - 1] < '5') { 3437 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3438 FirstSignificant++; 3439 3440 exp += FirstSignificant; 3441 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3442 return; 3443 } 3444 3445 // Rounding up requires a decimal add-with-carry. If we continue 3446 // the carry, the newly-introduced zeros will just be truncated. 3447 for (unsigned I = FirstSignificant; I != N; ++I) { 3448 if (buffer[I] == '9') { 3449 FirstSignificant++; 3450 } else { 3451 buffer[I]++; 3452 break; 3453 } 3454 } 3455 3456 // If we carried through, we have exactly one digit of precision. 3457 if (FirstSignificant == N) { 3458 exp += FirstSignificant; 3459 buffer.clear(); 3460 buffer.push_back('1'); 3461 return; 3462 } 3463 3464 exp += FirstSignificant; 3465 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3466 } 3467 } 3468 3469 void APFloat::toString(SmallVectorImpl<char> &Str, 3470 unsigned FormatPrecision, 3471 unsigned FormatMaxPadding) const { 3472 switch (category) { 3473 case fcInfinity: 3474 if (isNegative()) 3475 return append(Str, "-Inf"); 3476 else 3477 return append(Str, "+Inf"); 3478 3479 case fcNaN: return append(Str, "NaN"); 3480 3481 case fcZero: 3482 if (isNegative()) 3483 Str.push_back('-'); 3484 3485 if (!FormatMaxPadding) 3486 append(Str, "0.0E+0"); 3487 else 3488 Str.push_back('0'); 3489 return; 3490 3491 case fcNormal: 3492 break; 3493 } 3494 3495 if (isNegative()) 3496 Str.push_back('-'); 3497 3498 // Decompose the number into an APInt and an exponent. 3499 int exp = exponent - ((int) semantics->precision - 1); 3500 APInt significand(semantics->precision, 3501 makeArrayRef(significandParts(), 3502 partCountForBits(semantics->precision))); 3503 3504 // Set FormatPrecision if zero. We want to do this before we 3505 // truncate trailing zeros, as those are part of the precision. 3506 if (!FormatPrecision) { 3507 // It's an interesting question whether to use the nominal 3508 // precision or the active precision here for denormals. 3509 3510 // FormatPrecision = ceil(significandBits / lg_2(10)) 3511 FormatPrecision = (semantics->precision * 59 + 195) / 196; 3512 } 3513 3514 // Ignore trailing binary zeros. 3515 int trailingZeros = significand.countTrailingZeros(); 3516 exp += trailingZeros; 3517 significand = significand.lshr(trailingZeros); 3518 3519 // Change the exponent from 2^e to 10^e. 3520 if (exp == 0) { 3521 // Nothing to do. 3522 } else if (exp > 0) { 3523 // Just shift left. 3524 significand = significand.zext(semantics->precision + exp); 3525 significand <<= exp; 3526 exp = 0; 3527 } else { /* exp < 0 */ 3528 int texp = -exp; 3529 3530 // We transform this using the identity: 3531 // (N)(2^-e) == (N)(5^e)(10^-e) 3532 // This means we have to multiply N (the significand) by 5^e. 3533 // To avoid overflow, we have to operate on numbers large 3534 // enough to store N * 5^e: 3535 // log2(N * 5^e) == log2(N) + e * log2(5) 3536 // <= semantics->precision + e * 137 / 59 3537 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3538 3539 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3540 3541 // Multiply significand by 5^e. 3542 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3543 significand = significand.zext(precision); 3544 APInt five_to_the_i(precision, 5); 3545 while (true) { 3546 if (texp & 1) significand *= five_to_the_i; 3547 3548 texp >>= 1; 3549 if (!texp) break; 3550 five_to_the_i *= five_to_the_i; 3551 } 3552 } 3553 3554 AdjustToPrecision(significand, exp, FormatPrecision); 3555 3556 SmallVector<char, 256> buffer; 3557 3558 // Fill the buffer. 3559 unsigned precision = significand.getBitWidth(); 3560 APInt ten(precision, 10); 3561 APInt digit(precision, 0); 3562 3563 bool inTrail = true; 3564 while (significand != 0) { 3565 // digit <- significand % 10 3566 // significand <- significand / 10 3567 APInt::udivrem(significand, ten, significand, digit); 3568 3569 unsigned d = digit.getZExtValue(); 3570 3571 // Drop trailing zeros. 3572 if (inTrail && !d) exp++; 3573 else { 3574 buffer.push_back((char) ('0' + d)); 3575 inTrail = false; 3576 } 3577 } 3578 3579 assert(!buffer.empty() && "no characters in buffer!"); 3580 3581 // Drop down to FormatPrecision. 3582 // TODO: don't do more precise calculations above than are required. 3583 AdjustToPrecision(buffer, exp, FormatPrecision); 3584 3585 unsigned NDigits = buffer.size(); 3586 3587 // Check whether we should use scientific notation. 3588 bool FormatScientific; 3589 if (!FormatMaxPadding) 3590 FormatScientific = true; 3591 else { 3592 if (exp >= 0) { 3593 // 765e3 --> 765000 3594 // ^^^ 3595 // But we shouldn't make the number look more precise than it is. 3596 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3597 NDigits + (unsigned) exp > FormatPrecision); 3598 } else { 3599 // Power of the most significant digit. 3600 int MSD = exp + (int) (NDigits - 1); 3601 if (MSD >= 0) { 3602 // 765e-2 == 7.65 3603 FormatScientific = false; 3604 } else { 3605 // 765e-5 == 0.00765 3606 // ^ ^^ 3607 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3608 } 3609 } 3610 } 3611 3612 // Scientific formatting is pretty straightforward. 3613 if (FormatScientific) { 3614 exp += (NDigits - 1); 3615 3616 Str.push_back(buffer[NDigits-1]); 3617 Str.push_back('.'); 3618 if (NDigits == 1) 3619 Str.push_back('0'); 3620 else 3621 for (unsigned I = 1; I != NDigits; ++I) 3622 Str.push_back(buffer[NDigits-1-I]); 3623 Str.push_back('E'); 3624 3625 Str.push_back(exp >= 0 ? '+' : '-'); 3626 if (exp < 0) exp = -exp; 3627 SmallVector<char, 6> expbuf; 3628 do { 3629 expbuf.push_back((char) ('0' + (exp % 10))); 3630 exp /= 10; 3631 } while (exp); 3632 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3633 Str.push_back(expbuf[E-1-I]); 3634 return; 3635 } 3636 3637 // Non-scientific, positive exponents. 3638 if (exp >= 0) { 3639 for (unsigned I = 0; I != NDigits; ++I) 3640 Str.push_back(buffer[NDigits-1-I]); 3641 for (unsigned I = 0; I != (unsigned) exp; ++I) 3642 Str.push_back('0'); 3643 return; 3644 } 3645 3646 // Non-scientific, negative exponents. 3647 3648 // The number of digits to the left of the decimal point. 3649 int NWholeDigits = exp + (int) NDigits; 3650 3651 unsigned I = 0; 3652 if (NWholeDigits > 0) { 3653 for (; I != (unsigned) NWholeDigits; ++I) 3654 Str.push_back(buffer[NDigits-I-1]); 3655 Str.push_back('.'); 3656 } else { 3657 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3658 3659 Str.push_back('0'); 3660 Str.push_back('.'); 3661 for (unsigned Z = 1; Z != NZeros; ++Z) 3662 Str.push_back('0'); 3663 } 3664 3665 for (; I != NDigits; ++I) 3666 Str.push_back(buffer[NDigits-I-1]); 3667 } 3668 3669 bool APFloat::getExactInverse(APFloat *inv) const { 3670 // Special floats and denormals have no exact inverse. 3671 if (category != fcNormal) 3672 return false; 3673 3674 // Check that the number is a power of two by making sure that only the 3675 // integer bit is set in the significand. 3676 if (significandLSB() != semantics->precision - 1) 3677 return false; 3678 3679 // Get the inverse. 3680 APFloat reciprocal(*semantics, 1ULL); 3681 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3682 return false; 3683 3684 // Avoid multiplication with a denormal, it is not safe on all platforms and 3685 // may be slower than a normal division. 3686 if (reciprocal.isDenormal()) 3687 return false; 3688 3689 assert(reciprocal.category == fcNormal && 3690 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3691 3692 if (inv) 3693 *inv = reciprocal; 3694 3695 return true; 3696 } 3697 3698 bool APFloat::isSignaling() const { 3699 if (!isNaN()) 3700 return false; 3701 3702 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the 3703 // first bit of the trailing significand being 0. 3704 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); 3705 } 3706 3707 /// IEEE-754R 2008 5.3.1: nextUp/nextDown. 3708 /// 3709 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with 3710 /// appropriate sign switching before/after the computation. 3711 APFloat::opStatus APFloat::next(bool nextDown) { 3712 // If we are performing nextDown, swap sign so we have -x. 3713 if (nextDown) 3714 changeSign(); 3715 3716 // Compute nextUp(x) 3717 opStatus result = opOK; 3718 3719 // Handle each float category separately. 3720 switch (category) { 3721 case fcInfinity: 3722 // nextUp(+inf) = +inf 3723 if (!isNegative()) 3724 break; 3725 // nextUp(-inf) = -getLargest() 3726 makeLargest(true); 3727 break; 3728 case fcNaN: 3729 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. 3730 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not 3731 // change the payload. 3732 if (isSignaling()) { 3733 result = opInvalidOp; 3734 // For consistency, propogate the sign of the sNaN to the qNaN. 3735 makeNaN(false, isNegative(), 0); 3736 } 3737 break; 3738 case fcZero: 3739 // nextUp(pm 0) = +getSmallest() 3740 makeSmallest(false); 3741 break; 3742 case fcNormal: 3743 // nextUp(-getSmallest()) = -0 3744 if (isSmallest() && isNegative()) { 3745 APInt::tcSet(significandParts(), 0, partCount()); 3746 category = fcZero; 3747 exponent = 0; 3748 break; 3749 } 3750 3751 // nextUp(getLargest()) == INFINITY 3752 if (isLargest() && !isNegative()) { 3753 APInt::tcSet(significandParts(), 0, partCount()); 3754 category = fcInfinity; 3755 exponent = semantics->maxExponent + 1; 3756 break; 3757 } 3758 3759 // nextUp(normal) == normal + inc. 3760 if (isNegative()) { 3761 // If we are negative, we need to decrement the significand. 3762 3763 // We only cross a binade boundary that requires adjusting the exponent 3764 // if: 3765 // 1. exponent != semantics->minExponent. This implies we are not in the 3766 // smallest binade or are dealing with denormals. 3767 // 2. Our significand excluding the integral bit is all zeros. 3768 bool WillCrossBinadeBoundary = 3769 exponent != semantics->minExponent && isSignificandAllZeros(); 3770 3771 // Decrement the significand. 3772 // 3773 // We always do this since: 3774 // 1. If we are dealing with a non binade decrement, by definition we 3775 // just decrement the significand. 3776 // 2. If we are dealing with a normal -> normal binade decrement, since 3777 // we have an explicit integral bit the fact that all bits but the 3778 // integral bit are zero implies that subtracting one will yield a 3779 // significand with 0 integral bit and 1 in all other spots. Thus we 3780 // must just adjust the exponent and set the integral bit to 1. 3781 // 3. If we are dealing with a normal -> denormal binade decrement, 3782 // since we set the integral bit to 0 when we represent denormals, we 3783 // just decrement the significand. 3784 integerPart *Parts = significandParts(); 3785 APInt::tcDecrement(Parts, partCount()); 3786 3787 if (WillCrossBinadeBoundary) { 3788 // Our result is a normal number. Do the following: 3789 // 1. Set the integral bit to 1. 3790 // 2. Decrement the exponent. 3791 APInt::tcSetBit(Parts, semantics->precision - 1); 3792 exponent--; 3793 } 3794 } else { 3795 // If we are positive, we need to increment the significand. 3796 3797 // We only cross a binade boundary that requires adjusting the exponent if 3798 // the input is not a denormal and all of said input's significand bits 3799 // are set. If all of said conditions are true: clear the significand, set 3800 // the integral bit to 1, and increment the exponent. If we have a 3801 // denormal always increment since moving denormals and the numbers in the 3802 // smallest normal binade have the same exponent in our representation. 3803 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); 3804 3805 if (WillCrossBinadeBoundary) { 3806 integerPart *Parts = significandParts(); 3807 APInt::tcSet(Parts, 0, partCount()); 3808 APInt::tcSetBit(Parts, semantics->precision - 1); 3809 assert(exponent != semantics->maxExponent && 3810 "We can not increment an exponent beyond the maxExponent allowed" 3811 " by the given floating point semantics."); 3812 exponent++; 3813 } else { 3814 incrementSignificand(); 3815 } 3816 } 3817 break; 3818 } 3819 3820 // If we are performing nextDown, swap sign so we have -nextUp(-x) 3821 if (nextDown) 3822 changeSign(); 3823 3824 return result; 3825 } 3826