1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/ADT/FoldingSet.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MathExtras.h" 23 #include <cstring> 24 #include <limits.h> 25 26 using namespace llvm; 27 28 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 29 30 /* Assumed in hexadecimal significand parsing, and conversion to 31 hexadecimal strings. */ 32 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] 33 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 34 35 namespace llvm { 36 37 /* Represents floating point arithmetic semantics. */ 38 struct fltSemantics { 39 /* The largest E such that 2^E is representable; this matches the 40 definition of IEEE 754. */ 41 exponent_t maxExponent; 42 43 /* The smallest E such that 2^E is a normalized number; this 44 matches the definition of IEEE 754. */ 45 exponent_t minExponent; 46 47 /* Number of bits in the significand. This includes the integer 48 bit. */ 49 unsigned int precision; 50 }; 51 52 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; 53 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; 54 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; 55 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; 56 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; 57 const fltSemantics APFloat::Bogus = { 0, 0, 0 }; 58 59 /* The PowerPC format consists of two doubles. It does not map cleanly 60 onto the usual format above. It is approximated using twice the 61 mantissa bits. Note that for exponents near the double minimum, 62 we no longer can represent the full 106 mantissa bits, so those 63 will be treated as denormal numbers. 64 65 FIXME: While this approximation is equivalent to what GCC uses for 66 compile-time arithmetic on PPC double-double numbers, it is not able 67 to represent all possible values held by a PPC double-double number, 68 for example: (long double) 1.0 + (long double) 0x1p-106 69 Should this be replaced by a full emulation of PPC double-double? */ 70 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; 71 72 /* A tight upper bound on number of parts required to hold the value 73 pow(5, power) is 74 75 power * 815 / (351 * integerPartWidth) + 1 76 77 However, whilst the result may require only this many parts, 78 because we are multiplying two values to get it, the 79 multiplication may require an extra part with the excess part 80 being zero (consider the trivial case of 1 * 1, tcFullMultiply 81 requires two parts to hold the single-part result). So we add an 82 extra one to guarantee enough space whilst multiplying. */ 83 const unsigned int maxExponent = 16383; 84 const unsigned int maxPrecision = 113; 85 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 86 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 87 / (351 * integerPartWidth)); 88 } 89 90 /* A bunch of private, handy routines. */ 91 92 static inline unsigned int 93 partCountForBits(unsigned int bits) 94 { 95 return ((bits) + integerPartWidth - 1) / integerPartWidth; 96 } 97 98 /* Returns 0U-9U. Return values >= 10U are not digits. */ 99 static inline unsigned int 100 decDigitValue(unsigned int c) 101 { 102 return c - '0'; 103 } 104 105 /* Return the value of a decimal exponent of the form 106 [+-]ddddddd. 107 108 If the exponent overflows, returns a large exponent with the 109 appropriate sign. */ 110 static int 111 readExponent(StringRef::iterator begin, StringRef::iterator end) 112 { 113 bool isNegative; 114 unsigned int absExponent; 115 const unsigned int overlargeExponent = 24000; /* FIXME. */ 116 StringRef::iterator p = begin; 117 118 assert(p != end && "Exponent has no digits"); 119 120 isNegative = (*p == '-'); 121 if (*p == '-' || *p == '+') { 122 p++; 123 assert(p != end && "Exponent has no digits"); 124 } 125 126 absExponent = decDigitValue(*p++); 127 assert(absExponent < 10U && "Invalid character in exponent"); 128 129 for (; p != end; ++p) { 130 unsigned int value; 131 132 value = decDigitValue(*p); 133 assert(value < 10U && "Invalid character in exponent"); 134 135 value += absExponent * 10; 136 if (absExponent >= overlargeExponent) { 137 absExponent = overlargeExponent; 138 p = end; /* outwit assert below */ 139 break; 140 } 141 absExponent = value; 142 } 143 144 assert(p == end && "Invalid exponent in exponent"); 145 146 if (isNegative) 147 return -(int) absExponent; 148 else 149 return (int) absExponent; 150 } 151 152 /* This is ugly and needs cleaning up, but I don't immediately see 153 how whilst remaining safe. */ 154 static int 155 totalExponent(StringRef::iterator p, StringRef::iterator end, 156 int exponentAdjustment) 157 { 158 int unsignedExponent; 159 bool negative, overflow; 160 int exponent = 0; 161 162 assert(p != end && "Exponent has no digits"); 163 164 negative = *p == '-'; 165 if (*p == '-' || *p == '+') { 166 p++; 167 assert(p != end && "Exponent has no digits"); 168 } 169 170 unsignedExponent = 0; 171 overflow = false; 172 for (; p != end; ++p) { 173 unsigned int value; 174 175 value = decDigitValue(*p); 176 assert(value < 10U && "Invalid character in exponent"); 177 178 unsignedExponent = unsignedExponent * 10 + value; 179 if (unsignedExponent > 32767) { 180 overflow = true; 181 break; 182 } 183 } 184 185 if (exponentAdjustment > 32767 || exponentAdjustment < -32768) 186 overflow = true; 187 188 if (!overflow) { 189 exponent = unsignedExponent; 190 if (negative) 191 exponent = -exponent; 192 exponent += exponentAdjustment; 193 if (exponent > 32767 || exponent < -32768) 194 overflow = true; 195 } 196 197 if (overflow) 198 exponent = negative ? -32768: 32767; 199 200 return exponent; 201 } 202 203 static StringRef::iterator 204 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, 205 StringRef::iterator *dot) 206 { 207 StringRef::iterator p = begin; 208 *dot = end; 209 while (*p == '0' && p != end) 210 p++; 211 212 if (*p == '.') { 213 *dot = p++; 214 215 assert(end - begin != 1 && "Significand has no digits"); 216 217 while (*p == '0' && p != end) 218 p++; 219 } 220 221 return p; 222 } 223 224 /* Given a normal decimal floating point number of the form 225 226 dddd.dddd[eE][+-]ddd 227 228 where the decimal point and exponent are optional, fill out the 229 structure D. Exponent is appropriate if the significand is 230 treated as an integer, and normalizedExponent if the significand 231 is taken to have the decimal point after a single leading 232 non-zero digit. 233 234 If the value is zero, V->firstSigDigit points to a non-digit, and 235 the return exponent is zero. 236 */ 237 struct decimalInfo { 238 const char *firstSigDigit; 239 const char *lastSigDigit; 240 int exponent; 241 int normalizedExponent; 242 }; 243 244 static void 245 interpretDecimal(StringRef::iterator begin, StringRef::iterator end, 246 decimalInfo *D) 247 { 248 StringRef::iterator dot = end; 249 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); 250 251 D->firstSigDigit = p; 252 D->exponent = 0; 253 D->normalizedExponent = 0; 254 255 for (; p != end; ++p) { 256 if (*p == '.') { 257 assert(dot == end && "String contains multiple dots"); 258 dot = p++; 259 if (p == end) 260 break; 261 } 262 if (decDigitValue(*p) >= 10U) 263 break; 264 } 265 266 if (p != end) { 267 assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); 268 assert(p != begin && "Significand has no digits"); 269 assert((dot == end || p - begin != 1) && "Significand has no digits"); 270 271 /* p points to the first non-digit in the string */ 272 D->exponent = readExponent(p + 1, end); 273 274 /* Implied decimal point? */ 275 if (dot == end) 276 dot = p; 277 } 278 279 /* If number is all zeroes accept any exponent. */ 280 if (p != D->firstSigDigit) { 281 /* Drop insignificant trailing zeroes. */ 282 if (p != begin) { 283 do 284 do 285 p--; 286 while (p != begin && *p == '0'); 287 while (p != begin && *p == '.'); 288 } 289 290 /* Adjust the exponents for any decimal point. */ 291 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p)); 292 D->normalizedExponent = (D->exponent + 293 static_cast<exponent_t>((p - D->firstSigDigit) 294 - (dot > D->firstSigDigit && dot < p))); 295 } 296 297 D->lastSigDigit = p; 298 } 299 300 /* Return the trailing fraction of a hexadecimal number. 301 DIGITVALUE is the first hex digit of the fraction, P points to 302 the next digit. */ 303 static lostFraction 304 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, 305 unsigned int digitValue) 306 { 307 unsigned int hexDigit; 308 309 /* If the first trailing digit isn't 0 or 8 we can work out the 310 fraction immediately. */ 311 if (digitValue > 8) 312 return lfMoreThanHalf; 313 else if (digitValue < 8 && digitValue > 0) 314 return lfLessThanHalf; 315 316 /* Otherwise we need to find the first non-zero digit. */ 317 while (*p == '0') 318 p++; 319 320 assert(p != end && "Invalid trailing hexadecimal fraction!"); 321 322 hexDigit = hexDigitValue(*p); 323 324 /* If we ran off the end it is exactly zero or one-half, otherwise 325 a little more. */ 326 if (hexDigit == -1U) 327 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 328 else 329 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 330 } 331 332 /* Return the fraction lost were a bignum truncated losing the least 333 significant BITS bits. */ 334 static lostFraction 335 lostFractionThroughTruncation(const integerPart *parts, 336 unsigned int partCount, 337 unsigned int bits) 338 { 339 unsigned int lsb; 340 341 lsb = APInt::tcLSB(parts, partCount); 342 343 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 344 if (bits <= lsb) 345 return lfExactlyZero; 346 if (bits == lsb + 1) 347 return lfExactlyHalf; 348 if (bits <= partCount * integerPartWidth && 349 APInt::tcExtractBit(parts, bits - 1)) 350 return lfMoreThanHalf; 351 352 return lfLessThanHalf; 353 } 354 355 /* Shift DST right BITS bits noting lost fraction. */ 356 static lostFraction 357 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 358 { 359 lostFraction lost_fraction; 360 361 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 362 363 APInt::tcShiftRight(dst, parts, bits); 364 365 return lost_fraction; 366 } 367 368 /* Combine the effect of two lost fractions. */ 369 static lostFraction 370 combineLostFractions(lostFraction moreSignificant, 371 lostFraction lessSignificant) 372 { 373 if (lessSignificant != lfExactlyZero) { 374 if (moreSignificant == lfExactlyZero) 375 moreSignificant = lfLessThanHalf; 376 else if (moreSignificant == lfExactlyHalf) 377 moreSignificant = lfMoreThanHalf; 378 } 379 380 return moreSignificant; 381 } 382 383 /* The error from the true value, in half-ulps, on multiplying two 384 floating point numbers, which differ from the value they 385 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 386 than the returned value. 387 388 See "How to Read Floating Point Numbers Accurately" by William D 389 Clinger. */ 390 static unsigned int 391 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 392 { 393 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 394 395 if (HUerr1 + HUerr2 == 0) 396 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 397 else 398 return inexactMultiply + 2 * (HUerr1 + HUerr2); 399 } 400 401 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 402 when the least significant BITS are truncated. BITS cannot be 403 zero. */ 404 static integerPart 405 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 406 { 407 unsigned int count, partBits; 408 integerPart part, boundary; 409 410 assert(bits != 0); 411 412 bits--; 413 count = bits / integerPartWidth; 414 partBits = bits % integerPartWidth + 1; 415 416 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 417 418 if (isNearest) 419 boundary = (integerPart) 1 << (partBits - 1); 420 else 421 boundary = 0; 422 423 if (count == 0) { 424 if (part - boundary <= boundary - part) 425 return part - boundary; 426 else 427 return boundary - part; 428 } 429 430 if (part == boundary) { 431 while (--count) 432 if (parts[count]) 433 return ~(integerPart) 0; /* A lot. */ 434 435 return parts[0]; 436 } else if (part == boundary - 1) { 437 while (--count) 438 if (~parts[count]) 439 return ~(integerPart) 0; /* A lot. */ 440 441 return -parts[0]; 442 } 443 444 return ~(integerPart) 0; /* A lot. */ 445 } 446 447 /* Place pow(5, power) in DST, and return the number of parts used. 448 DST must be at least one part larger than size of the answer. */ 449 static unsigned int 450 powerOf5(integerPart *dst, unsigned int power) 451 { 452 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 453 15625, 78125 }; 454 integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; 455 pow5s[0] = 78125 * 5; 456 457 unsigned int partsCount[16] = { 1 }; 458 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 459 unsigned int result; 460 assert(power <= maxExponent); 461 462 p1 = dst; 463 p2 = scratch; 464 465 *p1 = firstEightPowers[power & 7]; 466 power >>= 3; 467 468 result = 1; 469 pow5 = pow5s; 470 471 for (unsigned int n = 0; power; power >>= 1, n++) { 472 unsigned int pc; 473 474 pc = partsCount[n]; 475 476 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 477 if (pc == 0) { 478 pc = partsCount[n - 1]; 479 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 480 pc *= 2; 481 if (pow5[pc - 1] == 0) 482 pc--; 483 partsCount[n] = pc; 484 } 485 486 if (power & 1) { 487 integerPart *tmp; 488 489 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 490 result += pc; 491 if (p2[result - 1] == 0) 492 result--; 493 494 /* Now result is in p1 with partsCount parts and p2 is scratch 495 space. */ 496 tmp = p1, p1 = p2, p2 = tmp; 497 } 498 499 pow5 += pc; 500 } 501 502 if (p1 != dst) 503 APInt::tcAssign(dst, p1, result); 504 505 return result; 506 } 507 508 /* Zero at the end to avoid modular arithmetic when adding one; used 509 when rounding up during hexadecimal output. */ 510 static const char hexDigitsLower[] = "0123456789abcdef0"; 511 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 512 static const char infinityL[] = "infinity"; 513 static const char infinityU[] = "INFINITY"; 514 static const char NaNL[] = "nan"; 515 static const char NaNU[] = "NAN"; 516 517 /* Write out an integerPart in hexadecimal, starting with the most 518 significant nibble. Write out exactly COUNT hexdigits, return 519 COUNT. */ 520 static unsigned int 521 partAsHex (char *dst, integerPart part, unsigned int count, 522 const char *hexDigitChars) 523 { 524 unsigned int result = count; 525 526 assert(count != 0 && count <= integerPartWidth / 4); 527 528 part >>= (integerPartWidth - 4 * count); 529 while (count--) { 530 dst[count] = hexDigitChars[part & 0xf]; 531 part >>= 4; 532 } 533 534 return result; 535 } 536 537 /* Write out an unsigned decimal integer. */ 538 static char * 539 writeUnsignedDecimal (char *dst, unsigned int n) 540 { 541 char buff[40], *p; 542 543 p = buff; 544 do 545 *p++ = '0' + n % 10; 546 while (n /= 10); 547 548 do 549 *dst++ = *--p; 550 while (p != buff); 551 552 return dst; 553 } 554 555 /* Write out a signed decimal integer. */ 556 static char * 557 writeSignedDecimal (char *dst, int value) 558 { 559 if (value < 0) { 560 *dst++ = '-'; 561 dst = writeUnsignedDecimal(dst, -(unsigned) value); 562 } else 563 dst = writeUnsignedDecimal(dst, value); 564 565 return dst; 566 } 567 568 /* Constructors. */ 569 void 570 APFloat::initialize(const fltSemantics *ourSemantics) 571 { 572 unsigned int count; 573 574 semantics = ourSemantics; 575 count = partCount(); 576 if (count > 1) 577 significand.parts = new integerPart[count]; 578 } 579 580 void 581 APFloat::freeSignificand() 582 { 583 if (partCount() > 1) 584 delete [] significand.parts; 585 } 586 587 void 588 APFloat::assign(const APFloat &rhs) 589 { 590 assert(semantics == rhs.semantics); 591 592 sign = rhs.sign; 593 category = rhs.category; 594 exponent = rhs.exponent; 595 if (category == fcNormal || category == fcNaN) 596 copySignificand(rhs); 597 } 598 599 void 600 APFloat::copySignificand(const APFloat &rhs) 601 { 602 assert(category == fcNormal || category == fcNaN); 603 assert(rhs.partCount() >= partCount()); 604 605 APInt::tcAssign(significandParts(), rhs.significandParts(), 606 partCount()); 607 } 608 609 /* Make this number a NaN, with an arbitrary but deterministic value 610 for the significand. If double or longer, this is a signalling NaN, 611 which may not be ideal. If float, this is QNaN(0). */ 612 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) 613 { 614 category = fcNaN; 615 sign = Negative; 616 617 integerPart *significand = significandParts(); 618 unsigned numParts = partCount(); 619 620 // Set the significand bits to the fill. 621 if (!fill || fill->getNumWords() < numParts) 622 APInt::tcSet(significand, 0, numParts); 623 if (fill) { 624 APInt::tcAssign(significand, fill->getRawData(), 625 std::min(fill->getNumWords(), numParts)); 626 627 // Zero out the excess bits of the significand. 628 unsigned bitsToPreserve = semantics->precision - 1; 629 unsigned part = bitsToPreserve / 64; 630 bitsToPreserve %= 64; 631 significand[part] &= ((1ULL << bitsToPreserve) - 1); 632 for (part++; part != numParts; ++part) 633 significand[part] = 0; 634 } 635 636 unsigned QNaNBit = semantics->precision - 2; 637 638 if (SNaN) { 639 // We always have to clear the QNaN bit to make it an SNaN. 640 APInt::tcClearBit(significand, QNaNBit); 641 642 // If there are no bits set in the payload, we have to set 643 // *something* to make it a NaN instead of an infinity; 644 // conventionally, this is the next bit down from the QNaN bit. 645 if (APInt::tcIsZero(significand, numParts)) 646 APInt::tcSetBit(significand, QNaNBit - 1); 647 } else { 648 // We always have to set the QNaN bit to make it a QNaN. 649 APInt::tcSetBit(significand, QNaNBit); 650 } 651 652 // For x87 extended precision, we want to make a NaN, not a 653 // pseudo-NaN. Maybe we should expose the ability to make 654 // pseudo-NaNs? 655 if (semantics == &APFloat::x87DoubleExtended) 656 APInt::tcSetBit(significand, QNaNBit + 1); 657 } 658 659 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, 660 const APInt *fill) { 661 APFloat value(Sem, uninitialized); 662 value.makeNaN(SNaN, Negative, fill); 663 return value; 664 } 665 666 APFloat & 667 APFloat::operator=(const APFloat &rhs) 668 { 669 if (this != &rhs) { 670 if (semantics != rhs.semantics) { 671 freeSignificand(); 672 initialize(rhs.semantics); 673 } 674 assign(rhs); 675 } 676 677 return *this; 678 } 679 680 bool 681 APFloat::isDenormal() const { 682 return isNormal() && (exponent == semantics->minExponent) && 683 (APInt::tcExtractBit(significandParts(), 684 semantics->precision - 1) == 0); 685 } 686 687 bool 688 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 689 if (this == &rhs) 690 return true; 691 if (semantics != rhs.semantics || 692 category != rhs.category || 693 sign != rhs.sign) 694 return false; 695 if (category==fcZero || category==fcInfinity) 696 return true; 697 else if (category==fcNormal && exponent!=rhs.exponent) 698 return false; 699 else { 700 int i= partCount(); 701 const integerPart* p=significandParts(); 702 const integerPart* q=rhs.significandParts(); 703 for (; i>0; i--, p++, q++) { 704 if (*p != *q) 705 return false; 706 } 707 return true; 708 } 709 } 710 711 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { 712 initialize(&ourSemantics); 713 sign = 0; 714 zeroSignificand(); 715 exponent = ourSemantics.precision - 1; 716 significandParts()[0] = value; 717 normalize(rmNearestTiesToEven, lfExactlyZero); 718 } 719 720 APFloat::APFloat(const fltSemantics &ourSemantics) { 721 initialize(&ourSemantics); 722 category = fcZero; 723 sign = false; 724 } 725 726 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { 727 // Allocates storage if necessary but does not initialize it. 728 initialize(&ourSemantics); 729 } 730 731 APFloat::APFloat(const fltSemantics &ourSemantics, 732 fltCategory ourCategory, bool negative) { 733 initialize(&ourSemantics); 734 category = ourCategory; 735 sign = negative; 736 if (category == fcNormal) 737 category = fcZero; 738 else if (ourCategory == fcNaN) 739 makeNaN(); 740 } 741 742 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { 743 initialize(&ourSemantics); 744 convertFromString(text, rmNearestTiesToEven); 745 } 746 747 APFloat::APFloat(const APFloat &rhs) { 748 initialize(rhs.semantics); 749 assign(rhs); 750 } 751 752 APFloat::~APFloat() 753 { 754 freeSignificand(); 755 } 756 757 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 758 void APFloat::Profile(FoldingSetNodeID& ID) const { 759 ID.Add(bitcastToAPInt()); 760 } 761 762 unsigned int 763 APFloat::partCount() const 764 { 765 return partCountForBits(semantics->precision + 1); 766 } 767 768 unsigned int 769 APFloat::semanticsPrecision(const fltSemantics &semantics) 770 { 771 return semantics.precision; 772 } 773 774 const integerPart * 775 APFloat::significandParts() const 776 { 777 return const_cast<APFloat *>(this)->significandParts(); 778 } 779 780 integerPart * 781 APFloat::significandParts() 782 { 783 assert(category == fcNormal || category == fcNaN); 784 785 if (partCount() > 1) 786 return significand.parts; 787 else 788 return &significand.part; 789 } 790 791 void 792 APFloat::zeroSignificand() 793 { 794 category = fcNormal; 795 APInt::tcSet(significandParts(), 0, partCount()); 796 } 797 798 /* Increment an fcNormal floating point number's significand. */ 799 void 800 APFloat::incrementSignificand() 801 { 802 integerPart carry; 803 804 carry = APInt::tcIncrement(significandParts(), partCount()); 805 806 /* Our callers should never cause us to overflow. */ 807 assert(carry == 0); 808 (void)carry; 809 } 810 811 /* Add the significand of the RHS. Returns the carry flag. */ 812 integerPart 813 APFloat::addSignificand(const APFloat &rhs) 814 { 815 integerPart *parts; 816 817 parts = significandParts(); 818 819 assert(semantics == rhs.semantics); 820 assert(exponent == rhs.exponent); 821 822 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 823 } 824 825 /* Subtract the significand of the RHS with a borrow flag. Returns 826 the borrow flag. */ 827 integerPart 828 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 829 { 830 integerPart *parts; 831 832 parts = significandParts(); 833 834 assert(semantics == rhs.semantics); 835 assert(exponent == rhs.exponent); 836 837 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 838 partCount()); 839 } 840 841 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 842 on to the full-precision result of the multiplication. Returns the 843 lost fraction. */ 844 lostFraction 845 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 846 { 847 unsigned int omsb; // One, not zero, based MSB. 848 unsigned int partsCount, newPartsCount, precision; 849 integerPart *lhsSignificand; 850 integerPart scratch[4]; 851 integerPart *fullSignificand; 852 lostFraction lost_fraction; 853 bool ignored; 854 855 assert(semantics == rhs.semantics); 856 857 precision = semantics->precision; 858 newPartsCount = partCountForBits(precision * 2); 859 860 if (newPartsCount > 4) 861 fullSignificand = new integerPart[newPartsCount]; 862 else 863 fullSignificand = scratch; 864 865 lhsSignificand = significandParts(); 866 partsCount = partCount(); 867 868 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 869 rhs.significandParts(), partsCount, partsCount); 870 871 lost_fraction = lfExactlyZero; 872 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 873 exponent += rhs.exponent; 874 875 if (addend) { 876 Significand savedSignificand = significand; 877 const fltSemantics *savedSemantics = semantics; 878 fltSemantics extendedSemantics; 879 opStatus status; 880 unsigned int extendedPrecision; 881 882 /* Normalize our MSB. */ 883 extendedPrecision = precision + precision - 1; 884 if (omsb != extendedPrecision) { 885 APInt::tcShiftLeft(fullSignificand, newPartsCount, 886 extendedPrecision - omsb); 887 exponent -= extendedPrecision - omsb; 888 } 889 890 /* Create new semantics. */ 891 extendedSemantics = *semantics; 892 extendedSemantics.precision = extendedPrecision; 893 894 if (newPartsCount == 1) 895 significand.part = fullSignificand[0]; 896 else 897 significand.parts = fullSignificand; 898 semantics = &extendedSemantics; 899 900 APFloat extendedAddend(*addend); 901 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 902 assert(status == opOK); 903 (void)status; 904 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 905 906 /* Restore our state. */ 907 if (newPartsCount == 1) 908 fullSignificand[0] = significand.part; 909 significand = savedSignificand; 910 semantics = savedSemantics; 911 912 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 913 } 914 915 exponent -= (precision - 1); 916 917 if (omsb > precision) { 918 unsigned int bits, significantParts; 919 lostFraction lf; 920 921 bits = omsb - precision; 922 significantParts = partCountForBits(omsb); 923 lf = shiftRight(fullSignificand, significantParts, bits); 924 lost_fraction = combineLostFractions(lf, lost_fraction); 925 exponent += bits; 926 } 927 928 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 929 930 if (newPartsCount > 4) 931 delete [] fullSignificand; 932 933 return lost_fraction; 934 } 935 936 /* Multiply the significands of LHS and RHS to DST. */ 937 lostFraction 938 APFloat::divideSignificand(const APFloat &rhs) 939 { 940 unsigned int bit, i, partsCount; 941 const integerPart *rhsSignificand; 942 integerPart *lhsSignificand, *dividend, *divisor; 943 integerPart scratch[4]; 944 lostFraction lost_fraction; 945 946 assert(semantics == rhs.semantics); 947 948 lhsSignificand = significandParts(); 949 rhsSignificand = rhs.significandParts(); 950 partsCount = partCount(); 951 952 if (partsCount > 2) 953 dividend = new integerPart[partsCount * 2]; 954 else 955 dividend = scratch; 956 957 divisor = dividend + partsCount; 958 959 /* Copy the dividend and divisor as they will be modified in-place. */ 960 for (i = 0; i < partsCount; i++) { 961 dividend[i] = lhsSignificand[i]; 962 divisor[i] = rhsSignificand[i]; 963 lhsSignificand[i] = 0; 964 } 965 966 exponent -= rhs.exponent; 967 968 unsigned int precision = semantics->precision; 969 970 /* Normalize the divisor. */ 971 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 972 if (bit) { 973 exponent += bit; 974 APInt::tcShiftLeft(divisor, partsCount, bit); 975 } 976 977 /* Normalize the dividend. */ 978 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 979 if (bit) { 980 exponent -= bit; 981 APInt::tcShiftLeft(dividend, partsCount, bit); 982 } 983 984 /* Ensure the dividend >= divisor initially for the loop below. 985 Incidentally, this means that the division loop below is 986 guaranteed to set the integer bit to one. */ 987 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { 988 exponent--; 989 APInt::tcShiftLeft(dividend, partsCount, 1); 990 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 991 } 992 993 /* Long division. */ 994 for (bit = precision; bit; bit -= 1) { 995 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 996 APInt::tcSubtract(dividend, divisor, 0, partsCount); 997 APInt::tcSetBit(lhsSignificand, bit - 1); 998 } 999 1000 APInt::tcShiftLeft(dividend, partsCount, 1); 1001 } 1002 1003 /* Figure out the lost fraction. */ 1004 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 1005 1006 if (cmp > 0) 1007 lost_fraction = lfMoreThanHalf; 1008 else if (cmp == 0) 1009 lost_fraction = lfExactlyHalf; 1010 else if (APInt::tcIsZero(dividend, partsCount)) 1011 lost_fraction = lfExactlyZero; 1012 else 1013 lost_fraction = lfLessThanHalf; 1014 1015 if (partsCount > 2) 1016 delete [] dividend; 1017 1018 return lost_fraction; 1019 } 1020 1021 unsigned int 1022 APFloat::significandMSB() const 1023 { 1024 return APInt::tcMSB(significandParts(), partCount()); 1025 } 1026 1027 unsigned int 1028 APFloat::significandLSB() const 1029 { 1030 return APInt::tcLSB(significandParts(), partCount()); 1031 } 1032 1033 /* Note that a zero result is NOT normalized to fcZero. */ 1034 lostFraction 1035 APFloat::shiftSignificandRight(unsigned int bits) 1036 { 1037 /* Our exponent should not overflow. */ 1038 assert((exponent_t) (exponent + bits) >= exponent); 1039 1040 exponent += bits; 1041 1042 return shiftRight(significandParts(), partCount(), bits); 1043 } 1044 1045 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 1046 void 1047 APFloat::shiftSignificandLeft(unsigned int bits) 1048 { 1049 assert(bits < semantics->precision); 1050 1051 if (bits) { 1052 unsigned int partsCount = partCount(); 1053 1054 APInt::tcShiftLeft(significandParts(), partsCount, bits); 1055 exponent -= bits; 1056 1057 assert(!APInt::tcIsZero(significandParts(), partsCount)); 1058 } 1059 } 1060 1061 APFloat::cmpResult 1062 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1063 { 1064 int compare; 1065 1066 assert(semantics == rhs.semantics); 1067 assert(category == fcNormal); 1068 assert(rhs.category == fcNormal); 1069 1070 compare = exponent - rhs.exponent; 1071 1072 /* If exponents are equal, do an unsigned bignum comparison of the 1073 significands. */ 1074 if (compare == 0) 1075 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1076 partCount()); 1077 1078 if (compare > 0) 1079 return cmpGreaterThan; 1080 else if (compare < 0) 1081 return cmpLessThan; 1082 else 1083 return cmpEqual; 1084 } 1085 1086 /* Handle overflow. Sign is preserved. We either become infinity or 1087 the largest finite number. */ 1088 APFloat::opStatus 1089 APFloat::handleOverflow(roundingMode rounding_mode) 1090 { 1091 /* Infinity? */ 1092 if (rounding_mode == rmNearestTiesToEven || 1093 rounding_mode == rmNearestTiesToAway || 1094 (rounding_mode == rmTowardPositive && !sign) || 1095 (rounding_mode == rmTowardNegative && sign)) { 1096 category = fcInfinity; 1097 return (opStatus) (opOverflow | opInexact); 1098 } 1099 1100 /* Otherwise we become the largest finite number. */ 1101 category = fcNormal; 1102 exponent = semantics->maxExponent; 1103 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1104 semantics->precision); 1105 1106 return opInexact; 1107 } 1108 1109 /* Returns TRUE if, when truncating the current number, with BIT the 1110 new LSB, with the given lost fraction and rounding mode, the result 1111 would need to be rounded away from zero (i.e., by increasing the 1112 signficand). This routine must work for fcZero of both signs, and 1113 fcNormal numbers. */ 1114 bool 1115 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1116 lostFraction lost_fraction, 1117 unsigned int bit) const 1118 { 1119 /* NaNs and infinities should not have lost fractions. */ 1120 assert(category == fcNormal || category == fcZero); 1121 1122 /* Current callers never pass this so we don't handle it. */ 1123 assert(lost_fraction != lfExactlyZero); 1124 1125 switch (rounding_mode) { 1126 case rmNearestTiesToAway: 1127 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1128 1129 case rmNearestTiesToEven: 1130 if (lost_fraction == lfMoreThanHalf) 1131 return true; 1132 1133 /* Our zeroes don't have a significand to test. */ 1134 if (lost_fraction == lfExactlyHalf && category != fcZero) 1135 return APInt::tcExtractBit(significandParts(), bit); 1136 1137 return false; 1138 1139 case rmTowardZero: 1140 return false; 1141 1142 case rmTowardPositive: 1143 return sign == false; 1144 1145 case rmTowardNegative: 1146 return sign == true; 1147 } 1148 llvm_unreachable("Invalid rounding mode found"); 1149 } 1150 1151 APFloat::opStatus 1152 APFloat::normalize(roundingMode rounding_mode, 1153 lostFraction lost_fraction) 1154 { 1155 unsigned int omsb; /* One, not zero, based MSB. */ 1156 int exponentChange; 1157 1158 if (category != fcNormal) 1159 return opOK; 1160 1161 /* Before rounding normalize the exponent of fcNormal numbers. */ 1162 omsb = significandMSB() + 1; 1163 1164 if (omsb) { 1165 /* OMSB is numbered from 1. We want to place it in the integer 1166 bit numbered PRECISION if possible, with a compensating change in 1167 the exponent. */ 1168 exponentChange = omsb - semantics->precision; 1169 1170 /* If the resulting exponent is too high, overflow according to 1171 the rounding mode. */ 1172 if (exponent + exponentChange > semantics->maxExponent) 1173 return handleOverflow(rounding_mode); 1174 1175 /* Subnormal numbers have exponent minExponent, and their MSB 1176 is forced based on that. */ 1177 if (exponent + exponentChange < semantics->minExponent) 1178 exponentChange = semantics->minExponent - exponent; 1179 1180 /* Shifting left is easy as we don't lose precision. */ 1181 if (exponentChange < 0) { 1182 assert(lost_fraction == lfExactlyZero); 1183 1184 shiftSignificandLeft(-exponentChange); 1185 1186 return opOK; 1187 } 1188 1189 if (exponentChange > 0) { 1190 lostFraction lf; 1191 1192 /* Shift right and capture any new lost fraction. */ 1193 lf = shiftSignificandRight(exponentChange); 1194 1195 lost_fraction = combineLostFractions(lf, lost_fraction); 1196 1197 /* Keep OMSB up-to-date. */ 1198 if (omsb > (unsigned) exponentChange) 1199 omsb -= exponentChange; 1200 else 1201 omsb = 0; 1202 } 1203 } 1204 1205 /* Now round the number according to rounding_mode given the lost 1206 fraction. */ 1207 1208 /* As specified in IEEE 754, since we do not trap we do not report 1209 underflow for exact results. */ 1210 if (lost_fraction == lfExactlyZero) { 1211 /* Canonicalize zeroes. */ 1212 if (omsb == 0) 1213 category = fcZero; 1214 1215 return opOK; 1216 } 1217 1218 /* Increment the significand if we're rounding away from zero. */ 1219 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1220 if (omsb == 0) 1221 exponent = semantics->minExponent; 1222 1223 incrementSignificand(); 1224 omsb = significandMSB() + 1; 1225 1226 /* Did the significand increment overflow? */ 1227 if (omsb == (unsigned) semantics->precision + 1) { 1228 /* Renormalize by incrementing the exponent and shifting our 1229 significand right one. However if we already have the 1230 maximum exponent we overflow to infinity. */ 1231 if (exponent == semantics->maxExponent) { 1232 category = fcInfinity; 1233 1234 return (opStatus) (opOverflow | opInexact); 1235 } 1236 1237 shiftSignificandRight(1); 1238 1239 return opInexact; 1240 } 1241 } 1242 1243 /* The normal case - we were and are not denormal, and any 1244 significand increment above didn't overflow. */ 1245 if (omsb == semantics->precision) 1246 return opInexact; 1247 1248 /* We have a non-zero denormal. */ 1249 assert(omsb < semantics->precision); 1250 1251 /* Canonicalize zeroes. */ 1252 if (omsb == 0) 1253 category = fcZero; 1254 1255 /* The fcZero case is a denormal that underflowed to zero. */ 1256 return (opStatus) (opUnderflow | opInexact); 1257 } 1258 1259 APFloat::opStatus 1260 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1261 { 1262 switch (convolve(category, rhs.category)) { 1263 default: 1264 llvm_unreachable(0); 1265 1266 case convolve(fcNaN, fcZero): 1267 case convolve(fcNaN, fcNormal): 1268 case convolve(fcNaN, fcInfinity): 1269 case convolve(fcNaN, fcNaN): 1270 case convolve(fcNormal, fcZero): 1271 case convolve(fcInfinity, fcNormal): 1272 case convolve(fcInfinity, fcZero): 1273 return opOK; 1274 1275 case convolve(fcZero, fcNaN): 1276 case convolve(fcNormal, fcNaN): 1277 case convolve(fcInfinity, fcNaN): 1278 category = fcNaN; 1279 copySignificand(rhs); 1280 return opOK; 1281 1282 case convolve(fcNormal, fcInfinity): 1283 case convolve(fcZero, fcInfinity): 1284 category = fcInfinity; 1285 sign = rhs.sign ^ subtract; 1286 return opOK; 1287 1288 case convolve(fcZero, fcNormal): 1289 assign(rhs); 1290 sign = rhs.sign ^ subtract; 1291 return opOK; 1292 1293 case convolve(fcZero, fcZero): 1294 /* Sign depends on rounding mode; handled by caller. */ 1295 return opOK; 1296 1297 case convolve(fcInfinity, fcInfinity): 1298 /* Differently signed infinities can only be validly 1299 subtracted. */ 1300 if (((sign ^ rhs.sign)!=0) != subtract) { 1301 makeNaN(); 1302 return opInvalidOp; 1303 } 1304 1305 return opOK; 1306 1307 case convolve(fcNormal, fcNormal): 1308 return opDivByZero; 1309 } 1310 } 1311 1312 /* Add or subtract two normal numbers. */ 1313 lostFraction 1314 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1315 { 1316 integerPart carry; 1317 lostFraction lost_fraction; 1318 int bits; 1319 1320 /* Determine if the operation on the absolute values is effectively 1321 an addition or subtraction. */ 1322 subtract ^= (sign ^ rhs.sign) ? true : false; 1323 1324 /* Are we bigger exponent-wise than the RHS? */ 1325 bits = exponent - rhs.exponent; 1326 1327 /* Subtraction is more subtle than one might naively expect. */ 1328 if (subtract) { 1329 APFloat temp_rhs(rhs); 1330 bool reverse; 1331 1332 if (bits == 0) { 1333 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1334 lost_fraction = lfExactlyZero; 1335 } else if (bits > 0) { 1336 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1337 shiftSignificandLeft(1); 1338 reverse = false; 1339 } else { 1340 lost_fraction = shiftSignificandRight(-bits - 1); 1341 temp_rhs.shiftSignificandLeft(1); 1342 reverse = true; 1343 } 1344 1345 if (reverse) { 1346 carry = temp_rhs.subtractSignificand 1347 (*this, lost_fraction != lfExactlyZero); 1348 copySignificand(temp_rhs); 1349 sign = !sign; 1350 } else { 1351 carry = subtractSignificand 1352 (temp_rhs, lost_fraction != lfExactlyZero); 1353 } 1354 1355 /* Invert the lost fraction - it was on the RHS and 1356 subtracted. */ 1357 if (lost_fraction == lfLessThanHalf) 1358 lost_fraction = lfMoreThanHalf; 1359 else if (lost_fraction == lfMoreThanHalf) 1360 lost_fraction = lfLessThanHalf; 1361 1362 /* The code above is intended to ensure that no borrow is 1363 necessary. */ 1364 assert(!carry); 1365 (void)carry; 1366 } else { 1367 if (bits > 0) { 1368 APFloat temp_rhs(rhs); 1369 1370 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1371 carry = addSignificand(temp_rhs); 1372 } else { 1373 lost_fraction = shiftSignificandRight(-bits); 1374 carry = addSignificand(rhs); 1375 } 1376 1377 /* We have a guard bit; generating a carry cannot happen. */ 1378 assert(!carry); 1379 (void)carry; 1380 } 1381 1382 return lost_fraction; 1383 } 1384 1385 APFloat::opStatus 1386 APFloat::multiplySpecials(const APFloat &rhs) 1387 { 1388 switch (convolve(category, rhs.category)) { 1389 default: 1390 llvm_unreachable(0); 1391 1392 case convolve(fcNaN, fcZero): 1393 case convolve(fcNaN, fcNormal): 1394 case convolve(fcNaN, fcInfinity): 1395 case convolve(fcNaN, fcNaN): 1396 return opOK; 1397 1398 case convolve(fcZero, fcNaN): 1399 case convolve(fcNormal, fcNaN): 1400 case convolve(fcInfinity, fcNaN): 1401 category = fcNaN; 1402 copySignificand(rhs); 1403 return opOK; 1404 1405 case convolve(fcNormal, fcInfinity): 1406 case convolve(fcInfinity, fcNormal): 1407 case convolve(fcInfinity, fcInfinity): 1408 category = fcInfinity; 1409 return opOK; 1410 1411 case convolve(fcZero, fcNormal): 1412 case convolve(fcNormal, fcZero): 1413 case convolve(fcZero, fcZero): 1414 category = fcZero; 1415 return opOK; 1416 1417 case convolve(fcZero, fcInfinity): 1418 case convolve(fcInfinity, fcZero): 1419 makeNaN(); 1420 return opInvalidOp; 1421 1422 case convolve(fcNormal, fcNormal): 1423 return opOK; 1424 } 1425 } 1426 1427 APFloat::opStatus 1428 APFloat::divideSpecials(const APFloat &rhs) 1429 { 1430 switch (convolve(category, rhs.category)) { 1431 default: 1432 llvm_unreachable(0); 1433 1434 case convolve(fcNaN, fcZero): 1435 case convolve(fcNaN, fcNormal): 1436 case convolve(fcNaN, fcInfinity): 1437 case convolve(fcNaN, fcNaN): 1438 case convolve(fcInfinity, fcZero): 1439 case convolve(fcInfinity, fcNormal): 1440 case convolve(fcZero, fcInfinity): 1441 case convolve(fcZero, fcNormal): 1442 return opOK; 1443 1444 case convolve(fcZero, fcNaN): 1445 case convolve(fcNormal, fcNaN): 1446 case convolve(fcInfinity, fcNaN): 1447 category = fcNaN; 1448 copySignificand(rhs); 1449 return opOK; 1450 1451 case convolve(fcNormal, fcInfinity): 1452 category = fcZero; 1453 return opOK; 1454 1455 case convolve(fcNormal, fcZero): 1456 category = fcInfinity; 1457 return opDivByZero; 1458 1459 case convolve(fcInfinity, fcInfinity): 1460 case convolve(fcZero, fcZero): 1461 makeNaN(); 1462 return opInvalidOp; 1463 1464 case convolve(fcNormal, fcNormal): 1465 return opOK; 1466 } 1467 } 1468 1469 APFloat::opStatus 1470 APFloat::modSpecials(const APFloat &rhs) 1471 { 1472 switch (convolve(category, rhs.category)) { 1473 default: 1474 llvm_unreachable(0); 1475 1476 case convolve(fcNaN, fcZero): 1477 case convolve(fcNaN, fcNormal): 1478 case convolve(fcNaN, fcInfinity): 1479 case convolve(fcNaN, fcNaN): 1480 case convolve(fcZero, fcInfinity): 1481 case convolve(fcZero, fcNormal): 1482 case convolve(fcNormal, fcInfinity): 1483 return opOK; 1484 1485 case convolve(fcZero, fcNaN): 1486 case convolve(fcNormal, fcNaN): 1487 case convolve(fcInfinity, fcNaN): 1488 category = fcNaN; 1489 copySignificand(rhs); 1490 return opOK; 1491 1492 case convolve(fcNormal, fcZero): 1493 case convolve(fcInfinity, fcZero): 1494 case convolve(fcInfinity, fcNormal): 1495 case convolve(fcInfinity, fcInfinity): 1496 case convolve(fcZero, fcZero): 1497 makeNaN(); 1498 return opInvalidOp; 1499 1500 case convolve(fcNormal, fcNormal): 1501 return opOK; 1502 } 1503 } 1504 1505 /* Change sign. */ 1506 void 1507 APFloat::changeSign() 1508 { 1509 /* Look mummy, this one's easy. */ 1510 sign = !sign; 1511 } 1512 1513 void 1514 APFloat::clearSign() 1515 { 1516 /* So is this one. */ 1517 sign = 0; 1518 } 1519 1520 void 1521 APFloat::copySign(const APFloat &rhs) 1522 { 1523 /* And this one. */ 1524 sign = rhs.sign; 1525 } 1526 1527 /* Normalized addition or subtraction. */ 1528 APFloat::opStatus 1529 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1530 bool subtract) 1531 { 1532 opStatus fs; 1533 1534 fs = addOrSubtractSpecials(rhs, subtract); 1535 1536 /* This return code means it was not a simple case. */ 1537 if (fs == opDivByZero) { 1538 lostFraction lost_fraction; 1539 1540 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1541 fs = normalize(rounding_mode, lost_fraction); 1542 1543 /* Can only be zero if we lost no fraction. */ 1544 assert(category != fcZero || lost_fraction == lfExactlyZero); 1545 } 1546 1547 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1548 positive zero unless rounding to minus infinity, except that 1549 adding two like-signed zeroes gives that zero. */ 1550 if (category == fcZero) { 1551 if (rhs.category != fcZero || (sign == rhs.sign) == subtract) 1552 sign = (rounding_mode == rmTowardNegative); 1553 } 1554 1555 return fs; 1556 } 1557 1558 /* Normalized addition. */ 1559 APFloat::opStatus 1560 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1561 { 1562 return addOrSubtract(rhs, rounding_mode, false); 1563 } 1564 1565 /* Normalized subtraction. */ 1566 APFloat::opStatus 1567 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1568 { 1569 return addOrSubtract(rhs, rounding_mode, true); 1570 } 1571 1572 /* Normalized multiply. */ 1573 APFloat::opStatus 1574 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1575 { 1576 opStatus fs; 1577 1578 sign ^= rhs.sign; 1579 fs = multiplySpecials(rhs); 1580 1581 if (category == fcNormal) { 1582 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1583 fs = normalize(rounding_mode, lost_fraction); 1584 if (lost_fraction != lfExactlyZero) 1585 fs = (opStatus) (fs | opInexact); 1586 } 1587 1588 return fs; 1589 } 1590 1591 /* Normalized divide. */ 1592 APFloat::opStatus 1593 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1594 { 1595 opStatus fs; 1596 1597 sign ^= rhs.sign; 1598 fs = divideSpecials(rhs); 1599 1600 if (category == fcNormal) { 1601 lostFraction lost_fraction = divideSignificand(rhs); 1602 fs = normalize(rounding_mode, lost_fraction); 1603 if (lost_fraction != lfExactlyZero) 1604 fs = (opStatus) (fs | opInexact); 1605 } 1606 1607 return fs; 1608 } 1609 1610 /* Normalized remainder. This is not currently correct in all cases. */ 1611 APFloat::opStatus 1612 APFloat::remainder(const APFloat &rhs) 1613 { 1614 opStatus fs; 1615 APFloat V = *this; 1616 unsigned int origSign = sign; 1617 1618 fs = V.divide(rhs, rmNearestTiesToEven); 1619 if (fs == opDivByZero) 1620 return fs; 1621 1622 int parts = partCount(); 1623 integerPart *x = new integerPart[parts]; 1624 bool ignored; 1625 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1626 rmNearestTiesToEven, &ignored); 1627 if (fs==opInvalidOp) 1628 return fs; 1629 1630 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1631 rmNearestTiesToEven); 1632 assert(fs==opOK); // should always work 1633 1634 fs = V.multiply(rhs, rmNearestTiesToEven); 1635 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1636 1637 fs = subtract(V, rmNearestTiesToEven); 1638 assert(fs==opOK || fs==opInexact); // likewise 1639 1640 if (isZero()) 1641 sign = origSign; // IEEE754 requires this 1642 delete[] x; 1643 return fs; 1644 } 1645 1646 /* Normalized llvm frem (C fmod). 1647 This is not currently correct in all cases. */ 1648 APFloat::opStatus 1649 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1650 { 1651 opStatus fs; 1652 fs = modSpecials(rhs); 1653 1654 if (category == fcNormal && rhs.category == fcNormal) { 1655 APFloat V = *this; 1656 unsigned int origSign = sign; 1657 1658 fs = V.divide(rhs, rmNearestTiesToEven); 1659 if (fs == opDivByZero) 1660 return fs; 1661 1662 int parts = partCount(); 1663 integerPart *x = new integerPart[parts]; 1664 bool ignored; 1665 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1666 rmTowardZero, &ignored); 1667 if (fs==opInvalidOp) 1668 return fs; 1669 1670 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1671 rmNearestTiesToEven); 1672 assert(fs==opOK); // should always work 1673 1674 fs = V.multiply(rhs, rounding_mode); 1675 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1676 1677 fs = subtract(V, rounding_mode); 1678 assert(fs==opOK || fs==opInexact); // likewise 1679 1680 if (isZero()) 1681 sign = origSign; // IEEE754 requires this 1682 delete[] x; 1683 } 1684 return fs; 1685 } 1686 1687 /* Normalized fused-multiply-add. */ 1688 APFloat::opStatus 1689 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1690 const APFloat &addend, 1691 roundingMode rounding_mode) 1692 { 1693 opStatus fs; 1694 1695 /* Post-multiplication sign, before addition. */ 1696 sign ^= multiplicand.sign; 1697 1698 /* If and only if all arguments are normal do we need to do an 1699 extended-precision calculation. */ 1700 if (category == fcNormal && 1701 multiplicand.category == fcNormal && 1702 addend.category == fcNormal) { 1703 lostFraction lost_fraction; 1704 1705 lost_fraction = multiplySignificand(multiplicand, &addend); 1706 fs = normalize(rounding_mode, lost_fraction); 1707 if (lost_fraction != lfExactlyZero) 1708 fs = (opStatus) (fs | opInexact); 1709 1710 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1711 positive zero unless rounding to minus infinity, except that 1712 adding two like-signed zeroes gives that zero. */ 1713 if (category == fcZero && sign != addend.sign) 1714 sign = (rounding_mode == rmTowardNegative); 1715 } else { 1716 fs = multiplySpecials(multiplicand); 1717 1718 /* FS can only be opOK or opInvalidOp. There is no more work 1719 to do in the latter case. The IEEE-754R standard says it is 1720 implementation-defined in this case whether, if ADDEND is a 1721 quiet NaN, we raise invalid op; this implementation does so. 1722 1723 If we need to do the addition we can do so with normal 1724 precision. */ 1725 if (fs == opOK) 1726 fs = addOrSubtract(addend, rounding_mode, false); 1727 } 1728 1729 return fs; 1730 } 1731 1732 /* Rounding-mode corrrect round to integral value. */ 1733 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { 1734 opStatus fs; 1735 1736 // If the exponent is large enough, we know that this value is already 1737 // integral, and the arithmetic below would potentially cause it to saturate 1738 // to +/-Inf. Bail out early instead. 1739 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics)) 1740 return opOK; 1741 1742 // The algorithm here is quite simple: we add 2^(p-1), where p is the 1743 // precision of our format, and then subtract it back off again. The choice 1744 // of rounding modes for the addition/subtraction determines the rounding mode 1745 // for our integral rounding as well. 1746 // NOTE: When the input value is negative, we do subtraction followed by 1747 // addition instead. 1748 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); 1749 IntegerConstant <<= semanticsPrecision(*semantics)-1; 1750 APFloat MagicConstant(*semantics); 1751 fs = MagicConstant.convertFromAPInt(IntegerConstant, false, 1752 rmNearestTiesToEven); 1753 MagicConstant.copySign(*this); 1754 1755 if (fs != opOK) 1756 return fs; 1757 1758 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. 1759 bool inputSign = isNegative(); 1760 1761 fs = add(MagicConstant, rounding_mode); 1762 if (fs != opOK && fs != opInexact) 1763 return fs; 1764 1765 fs = subtract(MagicConstant, rounding_mode); 1766 1767 // Restore the input sign. 1768 if (inputSign != isNegative()) 1769 changeSign(); 1770 1771 return fs; 1772 } 1773 1774 1775 /* Comparison requires normalized numbers. */ 1776 APFloat::cmpResult 1777 APFloat::compare(const APFloat &rhs) const 1778 { 1779 cmpResult result; 1780 1781 assert(semantics == rhs.semantics); 1782 1783 switch (convolve(category, rhs.category)) { 1784 default: 1785 llvm_unreachable(0); 1786 1787 case convolve(fcNaN, fcZero): 1788 case convolve(fcNaN, fcNormal): 1789 case convolve(fcNaN, fcInfinity): 1790 case convolve(fcNaN, fcNaN): 1791 case convolve(fcZero, fcNaN): 1792 case convolve(fcNormal, fcNaN): 1793 case convolve(fcInfinity, fcNaN): 1794 return cmpUnordered; 1795 1796 case convolve(fcInfinity, fcNormal): 1797 case convolve(fcInfinity, fcZero): 1798 case convolve(fcNormal, fcZero): 1799 if (sign) 1800 return cmpLessThan; 1801 else 1802 return cmpGreaterThan; 1803 1804 case convolve(fcNormal, fcInfinity): 1805 case convolve(fcZero, fcInfinity): 1806 case convolve(fcZero, fcNormal): 1807 if (rhs.sign) 1808 return cmpGreaterThan; 1809 else 1810 return cmpLessThan; 1811 1812 case convolve(fcInfinity, fcInfinity): 1813 if (sign == rhs.sign) 1814 return cmpEqual; 1815 else if (sign) 1816 return cmpLessThan; 1817 else 1818 return cmpGreaterThan; 1819 1820 case convolve(fcZero, fcZero): 1821 return cmpEqual; 1822 1823 case convolve(fcNormal, fcNormal): 1824 break; 1825 } 1826 1827 /* Two normal numbers. Do they have the same sign? */ 1828 if (sign != rhs.sign) { 1829 if (sign) 1830 result = cmpLessThan; 1831 else 1832 result = cmpGreaterThan; 1833 } else { 1834 /* Compare absolute values; invert result if negative. */ 1835 result = compareAbsoluteValue(rhs); 1836 1837 if (sign) { 1838 if (result == cmpLessThan) 1839 result = cmpGreaterThan; 1840 else if (result == cmpGreaterThan) 1841 result = cmpLessThan; 1842 } 1843 } 1844 1845 return result; 1846 } 1847 1848 /// APFloat::convert - convert a value of one floating point type to another. 1849 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1850 /// records whether the transformation lost information, i.e. whether 1851 /// converting the result back to the original type will produce the 1852 /// original value (this is almost the same as return value==fsOK, but there 1853 /// are edge cases where this is not so). 1854 1855 APFloat::opStatus 1856 APFloat::convert(const fltSemantics &toSemantics, 1857 roundingMode rounding_mode, bool *losesInfo) 1858 { 1859 lostFraction lostFraction; 1860 unsigned int newPartCount, oldPartCount; 1861 opStatus fs; 1862 int shift; 1863 const fltSemantics &fromSemantics = *semantics; 1864 1865 lostFraction = lfExactlyZero; 1866 newPartCount = partCountForBits(toSemantics.precision + 1); 1867 oldPartCount = partCount(); 1868 shift = toSemantics.precision - fromSemantics.precision; 1869 1870 bool X86SpecialNan = false; 1871 if (&fromSemantics == &APFloat::x87DoubleExtended && 1872 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && 1873 (!(*significandParts() & 0x8000000000000000ULL) || 1874 !(*significandParts() & 0x4000000000000000ULL))) { 1875 // x86 has some unusual NaNs which cannot be represented in any other 1876 // format; note them here. 1877 X86SpecialNan = true; 1878 } 1879 1880 // If this is a truncation, perform the shift before we narrow the storage. 1881 if (shift < 0 && (category==fcNormal || category==fcNaN)) 1882 lostFraction = shiftRight(significandParts(), oldPartCount, -shift); 1883 1884 // Fix the storage so it can hold to new value. 1885 if (newPartCount > oldPartCount) { 1886 // The new type requires more storage; make it available. 1887 integerPart *newParts; 1888 newParts = new integerPart[newPartCount]; 1889 APInt::tcSet(newParts, 0, newPartCount); 1890 if (category==fcNormal || category==fcNaN) 1891 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1892 freeSignificand(); 1893 significand.parts = newParts; 1894 } else if (newPartCount == 1 && oldPartCount != 1) { 1895 // Switch to built-in storage for a single part. 1896 integerPart newPart = 0; 1897 if (category==fcNormal || category==fcNaN) 1898 newPart = significandParts()[0]; 1899 freeSignificand(); 1900 significand.part = newPart; 1901 } 1902 1903 // Now that we have the right storage, switch the semantics. 1904 semantics = &toSemantics; 1905 1906 // If this is an extension, perform the shift now that the storage is 1907 // available. 1908 if (shift > 0 && (category==fcNormal || category==fcNaN)) 1909 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1910 1911 if (category == fcNormal) { 1912 fs = normalize(rounding_mode, lostFraction); 1913 *losesInfo = (fs != opOK); 1914 } else if (category == fcNaN) { 1915 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; 1916 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1917 // does not give you back the same bits. This is dubious, and we 1918 // don't currently do it. You're really supposed to get 1919 // an invalid operation signal at runtime, but nobody does that. 1920 fs = opOK; 1921 } else { 1922 *losesInfo = false; 1923 fs = opOK; 1924 } 1925 1926 return fs; 1927 } 1928 1929 /* Convert a floating point number to an integer according to the 1930 rounding mode. If the rounded integer value is out of range this 1931 returns an invalid operation exception and the contents of the 1932 destination parts are unspecified. If the rounded value is in 1933 range but the floating point number is not the exact integer, the C 1934 standard doesn't require an inexact exception to be raised. IEEE 1935 854 does require it so we do that. 1936 1937 Note that for conversions to integer type the C standard requires 1938 round-to-zero to always be used. */ 1939 APFloat::opStatus 1940 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 1941 bool isSigned, 1942 roundingMode rounding_mode, 1943 bool *isExact) const 1944 { 1945 lostFraction lost_fraction; 1946 const integerPart *src; 1947 unsigned int dstPartsCount, truncatedBits; 1948 1949 *isExact = false; 1950 1951 /* Handle the three special cases first. */ 1952 if (category == fcInfinity || category == fcNaN) 1953 return opInvalidOp; 1954 1955 dstPartsCount = partCountForBits(width); 1956 1957 if (category == fcZero) { 1958 APInt::tcSet(parts, 0, dstPartsCount); 1959 // Negative zero can't be represented as an int. 1960 *isExact = !sign; 1961 return opOK; 1962 } 1963 1964 src = significandParts(); 1965 1966 /* Step 1: place our absolute value, with any fraction truncated, in 1967 the destination. */ 1968 if (exponent < 0) { 1969 /* Our absolute value is less than one; truncate everything. */ 1970 APInt::tcSet(parts, 0, dstPartsCount); 1971 /* For exponent -1 the integer bit represents .5, look at that. 1972 For smaller exponents leftmost truncated bit is 0. */ 1973 truncatedBits = semantics->precision -1U - exponent; 1974 } else { 1975 /* We want the most significant (exponent + 1) bits; the rest are 1976 truncated. */ 1977 unsigned int bits = exponent + 1U; 1978 1979 /* Hopelessly large in magnitude? */ 1980 if (bits > width) 1981 return opInvalidOp; 1982 1983 if (bits < semantics->precision) { 1984 /* We truncate (semantics->precision - bits) bits. */ 1985 truncatedBits = semantics->precision - bits; 1986 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 1987 } else { 1988 /* We want at least as many bits as are available. */ 1989 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 1990 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 1991 truncatedBits = 0; 1992 } 1993 } 1994 1995 /* Step 2: work out any lost fraction, and increment the absolute 1996 value if we would round away from zero. */ 1997 if (truncatedBits) { 1998 lost_fraction = lostFractionThroughTruncation(src, partCount(), 1999 truncatedBits); 2000 if (lost_fraction != lfExactlyZero && 2001 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 2002 if (APInt::tcIncrement(parts, dstPartsCount)) 2003 return opInvalidOp; /* Overflow. */ 2004 } 2005 } else { 2006 lost_fraction = lfExactlyZero; 2007 } 2008 2009 /* Step 3: check if we fit in the destination. */ 2010 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 2011 2012 if (sign) { 2013 if (!isSigned) { 2014 /* Negative numbers cannot be represented as unsigned. */ 2015 if (omsb != 0) 2016 return opInvalidOp; 2017 } else { 2018 /* It takes omsb bits to represent the unsigned integer value. 2019 We lose a bit for the sign, but care is needed as the 2020 maximally negative integer is a special case. */ 2021 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 2022 return opInvalidOp; 2023 2024 /* This case can happen because of rounding. */ 2025 if (omsb > width) 2026 return opInvalidOp; 2027 } 2028 2029 APInt::tcNegate (parts, dstPartsCount); 2030 } else { 2031 if (omsb >= width + !isSigned) 2032 return opInvalidOp; 2033 } 2034 2035 if (lost_fraction == lfExactlyZero) { 2036 *isExact = true; 2037 return opOK; 2038 } else 2039 return opInexact; 2040 } 2041 2042 /* Same as convertToSignExtendedInteger, except we provide 2043 deterministic values in case of an invalid operation exception, 2044 namely zero for NaNs and the minimal or maximal value respectively 2045 for underflow or overflow. 2046 The *isExact output tells whether the result is exact, in the sense 2047 that converting it back to the original floating point type produces 2048 the original value. This is almost equivalent to result==opOK, 2049 except for negative zeroes. 2050 */ 2051 APFloat::opStatus 2052 APFloat::convertToInteger(integerPart *parts, unsigned int width, 2053 bool isSigned, 2054 roundingMode rounding_mode, bool *isExact) const 2055 { 2056 opStatus fs; 2057 2058 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 2059 isExact); 2060 2061 if (fs == opInvalidOp) { 2062 unsigned int bits, dstPartsCount; 2063 2064 dstPartsCount = partCountForBits(width); 2065 2066 if (category == fcNaN) 2067 bits = 0; 2068 else if (sign) 2069 bits = isSigned; 2070 else 2071 bits = width - isSigned; 2072 2073 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 2074 if (sign && isSigned) 2075 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 2076 } 2077 2078 return fs; 2079 } 2080 2081 /* Same as convertToInteger(integerPart*, ...), except the result is returned in 2082 an APSInt, whose initial bit-width and signed-ness are used to determine the 2083 precision of the conversion. 2084 */ 2085 APFloat::opStatus 2086 APFloat::convertToInteger(APSInt &result, 2087 roundingMode rounding_mode, bool *isExact) const 2088 { 2089 unsigned bitWidth = result.getBitWidth(); 2090 SmallVector<uint64_t, 4> parts(result.getNumWords()); 2091 opStatus status = convertToInteger( 2092 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); 2093 // Keeps the original signed-ness. 2094 result = APInt(bitWidth, parts); 2095 return status; 2096 } 2097 2098 /* Convert an unsigned integer SRC to a floating point number, 2099 rounding according to ROUNDING_MODE. The sign of the floating 2100 point number is not modified. */ 2101 APFloat::opStatus 2102 APFloat::convertFromUnsignedParts(const integerPart *src, 2103 unsigned int srcCount, 2104 roundingMode rounding_mode) 2105 { 2106 unsigned int omsb, precision, dstCount; 2107 integerPart *dst; 2108 lostFraction lost_fraction; 2109 2110 category = fcNormal; 2111 omsb = APInt::tcMSB(src, srcCount) + 1; 2112 dst = significandParts(); 2113 dstCount = partCount(); 2114 precision = semantics->precision; 2115 2116 /* We want the most significant PRECISION bits of SRC. There may not 2117 be that many; extract what we can. */ 2118 if (precision <= omsb) { 2119 exponent = omsb - 1; 2120 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2121 omsb - precision); 2122 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2123 } else { 2124 exponent = precision - 1; 2125 lost_fraction = lfExactlyZero; 2126 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2127 } 2128 2129 return normalize(rounding_mode, lost_fraction); 2130 } 2131 2132 APFloat::opStatus 2133 APFloat::convertFromAPInt(const APInt &Val, 2134 bool isSigned, 2135 roundingMode rounding_mode) 2136 { 2137 unsigned int partCount = Val.getNumWords(); 2138 APInt api = Val; 2139 2140 sign = false; 2141 if (isSigned && api.isNegative()) { 2142 sign = true; 2143 api = -api; 2144 } 2145 2146 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2147 } 2148 2149 /* Convert a two's complement integer SRC to a floating point number, 2150 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2151 integer is signed, in which case it must be sign-extended. */ 2152 APFloat::opStatus 2153 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2154 unsigned int srcCount, 2155 bool isSigned, 2156 roundingMode rounding_mode) 2157 { 2158 opStatus status; 2159 2160 if (isSigned && 2161 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2162 integerPart *copy; 2163 2164 /* If we're signed and negative negate a copy. */ 2165 sign = true; 2166 copy = new integerPart[srcCount]; 2167 APInt::tcAssign(copy, src, srcCount); 2168 APInt::tcNegate(copy, srcCount); 2169 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2170 delete [] copy; 2171 } else { 2172 sign = false; 2173 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2174 } 2175 2176 return status; 2177 } 2178 2179 /* FIXME: should this just take a const APInt reference? */ 2180 APFloat::opStatus 2181 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2182 unsigned int width, bool isSigned, 2183 roundingMode rounding_mode) 2184 { 2185 unsigned int partCount = partCountForBits(width); 2186 APInt api = APInt(width, makeArrayRef(parts, partCount)); 2187 2188 sign = false; 2189 if (isSigned && APInt::tcExtractBit(parts, width - 1)) { 2190 sign = true; 2191 api = -api; 2192 } 2193 2194 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2195 } 2196 2197 APFloat::opStatus 2198 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) 2199 { 2200 lostFraction lost_fraction = lfExactlyZero; 2201 integerPart *significand; 2202 unsigned int bitPos, partsCount; 2203 StringRef::iterator dot, firstSignificantDigit; 2204 2205 zeroSignificand(); 2206 exponent = 0; 2207 category = fcNormal; 2208 2209 significand = significandParts(); 2210 partsCount = partCount(); 2211 bitPos = partsCount * integerPartWidth; 2212 2213 /* Skip leading zeroes and any (hexa)decimal point. */ 2214 StringRef::iterator begin = s.begin(); 2215 StringRef::iterator end = s.end(); 2216 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); 2217 firstSignificantDigit = p; 2218 2219 for (; p != end;) { 2220 integerPart hex_value; 2221 2222 if (*p == '.') { 2223 assert(dot == end && "String contains multiple dots"); 2224 dot = p++; 2225 if (p == end) { 2226 break; 2227 } 2228 } 2229 2230 hex_value = hexDigitValue(*p); 2231 if (hex_value == -1U) { 2232 break; 2233 } 2234 2235 p++; 2236 2237 if (p == end) { 2238 break; 2239 } else { 2240 /* Store the number whilst 4-bit nibbles remain. */ 2241 if (bitPos) { 2242 bitPos -= 4; 2243 hex_value <<= bitPos % integerPartWidth; 2244 significand[bitPos / integerPartWidth] |= hex_value; 2245 } else { 2246 lost_fraction = trailingHexadecimalFraction(p, end, hex_value); 2247 while (p != end && hexDigitValue(*p) != -1U) 2248 p++; 2249 break; 2250 } 2251 } 2252 } 2253 2254 /* Hex floats require an exponent but not a hexadecimal point. */ 2255 assert(p != end && "Hex strings require an exponent"); 2256 assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); 2257 assert(p != begin && "Significand has no digits"); 2258 assert((dot == end || p - begin != 1) && "Significand has no digits"); 2259 2260 /* Ignore the exponent if we are zero. */ 2261 if (p != firstSignificantDigit) { 2262 int expAdjustment; 2263 2264 /* Implicit hexadecimal point? */ 2265 if (dot == end) 2266 dot = p; 2267 2268 /* Calculate the exponent adjustment implicit in the number of 2269 significant digits. */ 2270 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2271 if (expAdjustment < 0) 2272 expAdjustment++; 2273 expAdjustment = expAdjustment * 4 - 1; 2274 2275 /* Adjust for writing the significand starting at the most 2276 significant nibble. */ 2277 expAdjustment += semantics->precision; 2278 expAdjustment -= partsCount * integerPartWidth; 2279 2280 /* Adjust for the given exponent. */ 2281 exponent = totalExponent(p + 1, end, expAdjustment); 2282 } 2283 2284 return normalize(rounding_mode, lost_fraction); 2285 } 2286 2287 APFloat::opStatus 2288 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2289 unsigned sigPartCount, int exp, 2290 roundingMode rounding_mode) 2291 { 2292 unsigned int parts, pow5PartCount; 2293 fltSemantics calcSemantics = { 32767, -32767, 0 }; 2294 integerPart pow5Parts[maxPowerOfFiveParts]; 2295 bool isNearest; 2296 2297 isNearest = (rounding_mode == rmNearestTiesToEven || 2298 rounding_mode == rmNearestTiesToAway); 2299 2300 parts = partCountForBits(semantics->precision + 11); 2301 2302 /* Calculate pow(5, abs(exp)). */ 2303 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2304 2305 for (;; parts *= 2) { 2306 opStatus sigStatus, powStatus; 2307 unsigned int excessPrecision, truncatedBits; 2308 2309 calcSemantics.precision = parts * integerPartWidth - 1; 2310 excessPrecision = calcSemantics.precision - semantics->precision; 2311 truncatedBits = excessPrecision; 2312 2313 APFloat decSig(calcSemantics, fcZero, sign); 2314 APFloat pow5(calcSemantics, fcZero, false); 2315 2316 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2317 rmNearestTiesToEven); 2318 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2319 rmNearestTiesToEven); 2320 /* Add exp, as 10^n = 5^n * 2^n. */ 2321 decSig.exponent += exp; 2322 2323 lostFraction calcLostFraction; 2324 integerPart HUerr, HUdistance; 2325 unsigned int powHUerr; 2326 2327 if (exp >= 0) { 2328 /* multiplySignificand leaves the precision-th bit set to 1. */ 2329 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2330 powHUerr = powStatus != opOK; 2331 } else { 2332 calcLostFraction = decSig.divideSignificand(pow5); 2333 /* Denormal numbers have less precision. */ 2334 if (decSig.exponent < semantics->minExponent) { 2335 excessPrecision += (semantics->minExponent - decSig.exponent); 2336 truncatedBits = excessPrecision; 2337 if (excessPrecision > calcSemantics.precision) 2338 excessPrecision = calcSemantics.precision; 2339 } 2340 /* Extra half-ulp lost in reciprocal of exponent. */ 2341 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2342 } 2343 2344 /* Both multiplySignificand and divideSignificand return the 2345 result with the integer bit set. */ 2346 assert(APInt::tcExtractBit 2347 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2348 2349 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2350 powHUerr); 2351 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2352 excessPrecision, isNearest); 2353 2354 /* Are we guaranteed to round correctly if we truncate? */ 2355 if (HUdistance >= HUerr) { 2356 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2357 calcSemantics.precision - excessPrecision, 2358 excessPrecision); 2359 /* Take the exponent of decSig. If we tcExtract-ed less bits 2360 above we must adjust our exponent to compensate for the 2361 implicit right shift. */ 2362 exponent = (decSig.exponent + semantics->precision 2363 - (calcSemantics.precision - excessPrecision)); 2364 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2365 decSig.partCount(), 2366 truncatedBits); 2367 return normalize(rounding_mode, calcLostFraction); 2368 } 2369 } 2370 } 2371 2372 APFloat::opStatus 2373 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 2374 { 2375 decimalInfo D; 2376 opStatus fs; 2377 2378 /* Scan the text. */ 2379 StringRef::iterator p = str.begin(); 2380 interpretDecimal(p, str.end(), &D); 2381 2382 /* Handle the quick cases. First the case of no significant digits, 2383 i.e. zero, and then exponents that are obviously too large or too 2384 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2385 definitely overflows if 2386 2387 (exp - 1) * L >= maxExponent 2388 2389 and definitely underflows to zero where 2390 2391 (exp + 1) * L <= minExponent - precision 2392 2393 With integer arithmetic the tightest bounds for L are 2394 2395 93/28 < L < 196/59 [ numerator <= 256 ] 2396 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2397 */ 2398 2399 if (decDigitValue(*D.firstSigDigit) >= 10U) { 2400 category = fcZero; 2401 fs = opOK; 2402 2403 /* Check whether the normalized exponent is high enough to overflow 2404 max during the log-rebasing in the max-exponent check below. */ 2405 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { 2406 fs = handleOverflow(rounding_mode); 2407 2408 /* If it wasn't, then it also wasn't high enough to overflow max 2409 during the log-rebasing in the min-exponent check. Check that it 2410 won't overflow min in either check, then perform the min-exponent 2411 check. */ 2412 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || 2413 (D.normalizedExponent + 1) * 28738 <= 2414 8651 * (semantics->minExponent - (int) semantics->precision)) { 2415 /* Underflow to zero and round. */ 2416 zeroSignificand(); 2417 fs = normalize(rounding_mode, lfLessThanHalf); 2418 2419 /* We can finally safely perform the max-exponent check. */ 2420 } else if ((D.normalizedExponent - 1) * 42039 2421 >= 12655 * semantics->maxExponent) { 2422 /* Overflow and round. */ 2423 fs = handleOverflow(rounding_mode); 2424 } else { 2425 integerPart *decSignificand; 2426 unsigned int partCount; 2427 2428 /* A tight upper bound on number of bits required to hold an 2429 N-digit decimal integer is N * 196 / 59. Allocate enough space 2430 to hold the full significand, and an extra part required by 2431 tcMultiplyPart. */ 2432 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2433 partCount = partCountForBits(1 + 196 * partCount / 59); 2434 decSignificand = new integerPart[partCount + 1]; 2435 partCount = 0; 2436 2437 /* Convert to binary efficiently - we do almost all multiplication 2438 in an integerPart. When this would overflow do we do a single 2439 bignum multiplication, and then revert again to multiplication 2440 in an integerPart. */ 2441 do { 2442 integerPart decValue, val, multiplier; 2443 2444 val = 0; 2445 multiplier = 1; 2446 2447 do { 2448 if (*p == '.') { 2449 p++; 2450 if (p == str.end()) { 2451 break; 2452 } 2453 } 2454 decValue = decDigitValue(*p++); 2455 assert(decValue < 10U && "Invalid character in significand"); 2456 multiplier *= 10; 2457 val = val * 10 + decValue; 2458 /* The maximum number that can be multiplied by ten with any 2459 digit added without overflowing an integerPart. */ 2460 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2461 2462 /* Multiply out the current part. */ 2463 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2464 partCount, partCount + 1, false); 2465 2466 /* If we used another part (likely but not guaranteed), increase 2467 the count. */ 2468 if (decSignificand[partCount]) 2469 partCount++; 2470 } while (p <= D.lastSigDigit); 2471 2472 category = fcNormal; 2473 fs = roundSignificandWithExponent(decSignificand, partCount, 2474 D.exponent, rounding_mode); 2475 2476 delete [] decSignificand; 2477 } 2478 2479 return fs; 2480 } 2481 2482 APFloat::opStatus 2483 APFloat::convertFromString(StringRef str, roundingMode rounding_mode) 2484 { 2485 assert(!str.empty() && "Invalid string length"); 2486 2487 /* Handle a leading minus sign. */ 2488 StringRef::iterator p = str.begin(); 2489 size_t slen = str.size(); 2490 sign = *p == '-' ? 1 : 0; 2491 if (*p == '-' || *p == '+') { 2492 p++; 2493 slen--; 2494 assert(slen && "String has no digits"); 2495 } 2496 2497 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2498 assert(slen - 2 && "Invalid string"); 2499 return convertFromHexadecimalString(StringRef(p + 2, slen - 2), 2500 rounding_mode); 2501 } 2502 2503 return convertFromDecimalString(StringRef(p, slen), rounding_mode); 2504 } 2505 2506 /* Write out a hexadecimal representation of the floating point value 2507 to DST, which must be of sufficient size, in the C99 form 2508 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2509 excluding the terminating NUL. 2510 2511 If UPPERCASE, the output is in upper case, otherwise in lower case. 2512 2513 HEXDIGITS digits appear altogether, rounding the value if 2514 necessary. If HEXDIGITS is 0, the minimal precision to display the 2515 number precisely is used instead. If nothing would appear after 2516 the decimal point it is suppressed. 2517 2518 The decimal exponent is always printed and has at least one digit. 2519 Zero values display an exponent of zero. Infinities and NaNs 2520 appear as "infinity" or "nan" respectively. 2521 2522 The above rules are as specified by C99. There is ambiguity about 2523 what the leading hexadecimal digit should be. This implementation 2524 uses whatever is necessary so that the exponent is displayed as 2525 stored. This implies the exponent will fall within the IEEE format 2526 range, and the leading hexadecimal digit will be 0 (for denormals), 2527 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2528 any other digits zero). 2529 */ 2530 unsigned int 2531 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2532 bool upperCase, roundingMode rounding_mode) const 2533 { 2534 char *p; 2535 2536 p = dst; 2537 if (sign) 2538 *dst++ = '-'; 2539 2540 switch (category) { 2541 case fcInfinity: 2542 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2543 dst += sizeof infinityL - 1; 2544 break; 2545 2546 case fcNaN: 2547 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2548 dst += sizeof NaNU - 1; 2549 break; 2550 2551 case fcZero: 2552 *dst++ = '0'; 2553 *dst++ = upperCase ? 'X': 'x'; 2554 *dst++ = '0'; 2555 if (hexDigits > 1) { 2556 *dst++ = '.'; 2557 memset (dst, '0', hexDigits - 1); 2558 dst += hexDigits - 1; 2559 } 2560 *dst++ = upperCase ? 'P': 'p'; 2561 *dst++ = '0'; 2562 break; 2563 2564 case fcNormal: 2565 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2566 break; 2567 } 2568 2569 *dst = 0; 2570 2571 return static_cast<unsigned int>(dst - p); 2572 } 2573 2574 /* Does the hard work of outputting the correctly rounded hexadecimal 2575 form of a normal floating point number with the specified number of 2576 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2577 digits necessary to print the value precisely is output. */ 2578 char * 2579 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2580 bool upperCase, 2581 roundingMode rounding_mode) const 2582 { 2583 unsigned int count, valueBits, shift, partsCount, outputDigits; 2584 const char *hexDigitChars; 2585 const integerPart *significand; 2586 char *p; 2587 bool roundUp; 2588 2589 *dst++ = '0'; 2590 *dst++ = upperCase ? 'X': 'x'; 2591 2592 roundUp = false; 2593 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2594 2595 significand = significandParts(); 2596 partsCount = partCount(); 2597 2598 /* +3 because the first digit only uses the single integer bit, so 2599 we have 3 virtual zero most-significant-bits. */ 2600 valueBits = semantics->precision + 3; 2601 shift = integerPartWidth - valueBits % integerPartWidth; 2602 2603 /* The natural number of digits required ignoring trailing 2604 insignificant zeroes. */ 2605 outputDigits = (valueBits - significandLSB () + 3) / 4; 2606 2607 /* hexDigits of zero means use the required number for the 2608 precision. Otherwise, see if we are truncating. If we are, 2609 find out if we need to round away from zero. */ 2610 if (hexDigits) { 2611 if (hexDigits < outputDigits) { 2612 /* We are dropping non-zero bits, so need to check how to round. 2613 "bits" is the number of dropped bits. */ 2614 unsigned int bits; 2615 lostFraction fraction; 2616 2617 bits = valueBits - hexDigits * 4; 2618 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2619 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2620 } 2621 outputDigits = hexDigits; 2622 } 2623 2624 /* Write the digits consecutively, and start writing in the location 2625 of the hexadecimal point. We move the most significant digit 2626 left and add the hexadecimal point later. */ 2627 p = ++dst; 2628 2629 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2630 2631 while (outputDigits && count) { 2632 integerPart part; 2633 2634 /* Put the most significant integerPartWidth bits in "part". */ 2635 if (--count == partsCount) 2636 part = 0; /* An imaginary higher zero part. */ 2637 else 2638 part = significand[count] << shift; 2639 2640 if (count && shift) 2641 part |= significand[count - 1] >> (integerPartWidth - shift); 2642 2643 /* Convert as much of "part" to hexdigits as we can. */ 2644 unsigned int curDigits = integerPartWidth / 4; 2645 2646 if (curDigits > outputDigits) 2647 curDigits = outputDigits; 2648 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2649 outputDigits -= curDigits; 2650 } 2651 2652 if (roundUp) { 2653 char *q = dst; 2654 2655 /* Note that hexDigitChars has a trailing '0'. */ 2656 do { 2657 q--; 2658 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2659 } while (*q == '0'); 2660 assert(q >= p); 2661 } else { 2662 /* Add trailing zeroes. */ 2663 memset (dst, '0', outputDigits); 2664 dst += outputDigits; 2665 } 2666 2667 /* Move the most significant digit to before the point, and if there 2668 is something after the decimal point add it. This must come 2669 after rounding above. */ 2670 p[-1] = p[0]; 2671 if (dst -1 == p) 2672 dst--; 2673 else 2674 p[0] = '.'; 2675 2676 /* Finally output the exponent. */ 2677 *dst++ = upperCase ? 'P': 'p'; 2678 2679 return writeSignedDecimal (dst, exponent); 2680 } 2681 2682 hash_code llvm::hash_value(const APFloat &Arg) { 2683 if (Arg.category != APFloat::fcNormal) 2684 return hash_combine((uint8_t)Arg.category, 2685 // NaN has no sign, fix it at zero. 2686 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, 2687 Arg.semantics->precision); 2688 2689 // Normal floats need their exponent and significand hashed. 2690 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, 2691 Arg.semantics->precision, Arg.exponent, 2692 hash_combine_range( 2693 Arg.significandParts(), 2694 Arg.significandParts() + Arg.partCount())); 2695 } 2696 2697 // Conversion from APFloat to/from host float/double. It may eventually be 2698 // possible to eliminate these and have everybody deal with APFloats, but that 2699 // will take a while. This approach will not easily extend to long double. 2700 // Current implementation requires integerPartWidth==64, which is correct at 2701 // the moment but could be made more general. 2702 2703 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2704 // the actual IEEE respresentations. We compensate for that here. 2705 2706 APInt 2707 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2708 { 2709 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2710 assert(partCount()==2); 2711 2712 uint64_t myexponent, mysignificand; 2713 2714 if (category==fcNormal) { 2715 myexponent = exponent+16383; //bias 2716 mysignificand = significandParts()[0]; 2717 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2718 myexponent = 0; // denormal 2719 } else if (category==fcZero) { 2720 myexponent = 0; 2721 mysignificand = 0; 2722 } else if (category==fcInfinity) { 2723 myexponent = 0x7fff; 2724 mysignificand = 0x8000000000000000ULL; 2725 } else { 2726 assert(category == fcNaN && "Unknown category"); 2727 myexponent = 0x7fff; 2728 mysignificand = significandParts()[0]; 2729 } 2730 2731 uint64_t words[2]; 2732 words[0] = mysignificand; 2733 words[1] = ((uint64_t)(sign & 1) << 15) | 2734 (myexponent & 0x7fffLL); 2735 return APInt(80, words); 2736 } 2737 2738 APInt 2739 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2740 { 2741 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2742 assert(partCount()==2); 2743 2744 uint64_t words[2]; 2745 opStatus fs; 2746 bool losesInfo; 2747 2748 // Convert number to double. To avoid spurious underflows, we re- 2749 // normalize against the "double" minExponent first, and only *then* 2750 // truncate the mantissa. The result of that second conversion 2751 // may be inexact, but should never underflow. 2752 // Declare fltSemantics before APFloat that uses it (and 2753 // saves pointer to it) to ensure correct destruction order. 2754 fltSemantics extendedSemantics = *semantics; 2755 extendedSemantics.minExponent = IEEEdouble.minExponent; 2756 APFloat extended(*this); 2757 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2758 assert(fs == opOK && !losesInfo); 2759 (void)fs; 2760 2761 APFloat u(extended); 2762 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2763 assert(fs == opOK || fs == opInexact); 2764 (void)fs; 2765 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); 2766 2767 // If conversion was exact or resulted in a special case, we're done; 2768 // just set the second double to zero. Otherwise, re-convert back to 2769 // the extended format and compute the difference. This now should 2770 // convert exactly to double. 2771 if (u.category == fcNormal && losesInfo) { 2772 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); 2773 assert(fs == opOK && !losesInfo); 2774 (void)fs; 2775 2776 APFloat v(extended); 2777 v.subtract(u, rmNearestTiesToEven); 2778 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); 2779 assert(fs == opOK && !losesInfo); 2780 (void)fs; 2781 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); 2782 } else { 2783 words[1] = 0; 2784 } 2785 2786 return APInt(128, words); 2787 } 2788 2789 APInt 2790 APFloat::convertQuadrupleAPFloatToAPInt() const 2791 { 2792 assert(semantics == (const llvm::fltSemantics*)&IEEEquad); 2793 assert(partCount()==2); 2794 2795 uint64_t myexponent, mysignificand, mysignificand2; 2796 2797 if (category==fcNormal) { 2798 myexponent = exponent+16383; //bias 2799 mysignificand = significandParts()[0]; 2800 mysignificand2 = significandParts()[1]; 2801 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) 2802 myexponent = 0; // denormal 2803 } else if (category==fcZero) { 2804 myexponent = 0; 2805 mysignificand = mysignificand2 = 0; 2806 } else if (category==fcInfinity) { 2807 myexponent = 0x7fff; 2808 mysignificand = mysignificand2 = 0; 2809 } else { 2810 assert(category == fcNaN && "Unknown category!"); 2811 myexponent = 0x7fff; 2812 mysignificand = significandParts()[0]; 2813 mysignificand2 = significandParts()[1]; 2814 } 2815 2816 uint64_t words[2]; 2817 words[0] = mysignificand; 2818 words[1] = ((uint64_t)(sign & 1) << 63) | 2819 ((myexponent & 0x7fff) << 48) | 2820 (mysignificand2 & 0xffffffffffffLL); 2821 2822 return APInt(128, words); 2823 } 2824 2825 APInt 2826 APFloat::convertDoubleAPFloatToAPInt() const 2827 { 2828 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2829 assert(partCount()==1); 2830 2831 uint64_t myexponent, mysignificand; 2832 2833 if (category==fcNormal) { 2834 myexponent = exponent+1023; //bias 2835 mysignificand = *significandParts(); 2836 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2837 myexponent = 0; // denormal 2838 } else if (category==fcZero) { 2839 myexponent = 0; 2840 mysignificand = 0; 2841 } else if (category==fcInfinity) { 2842 myexponent = 0x7ff; 2843 mysignificand = 0; 2844 } else { 2845 assert(category == fcNaN && "Unknown category!"); 2846 myexponent = 0x7ff; 2847 mysignificand = *significandParts(); 2848 } 2849 2850 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2851 ((myexponent & 0x7ff) << 52) | 2852 (mysignificand & 0xfffffffffffffLL)))); 2853 } 2854 2855 APInt 2856 APFloat::convertFloatAPFloatToAPInt() const 2857 { 2858 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2859 assert(partCount()==1); 2860 2861 uint32_t myexponent, mysignificand; 2862 2863 if (category==fcNormal) { 2864 myexponent = exponent+127; //bias 2865 mysignificand = (uint32_t)*significandParts(); 2866 if (myexponent == 1 && !(mysignificand & 0x800000)) 2867 myexponent = 0; // denormal 2868 } else if (category==fcZero) { 2869 myexponent = 0; 2870 mysignificand = 0; 2871 } else if (category==fcInfinity) { 2872 myexponent = 0xff; 2873 mysignificand = 0; 2874 } else { 2875 assert(category == fcNaN && "Unknown category!"); 2876 myexponent = 0xff; 2877 mysignificand = (uint32_t)*significandParts(); 2878 } 2879 2880 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2881 (mysignificand & 0x7fffff))); 2882 } 2883 2884 APInt 2885 APFloat::convertHalfAPFloatToAPInt() const 2886 { 2887 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); 2888 assert(partCount()==1); 2889 2890 uint32_t myexponent, mysignificand; 2891 2892 if (category==fcNormal) { 2893 myexponent = exponent+15; //bias 2894 mysignificand = (uint32_t)*significandParts(); 2895 if (myexponent == 1 && !(mysignificand & 0x400)) 2896 myexponent = 0; // denormal 2897 } else if (category==fcZero) { 2898 myexponent = 0; 2899 mysignificand = 0; 2900 } else if (category==fcInfinity) { 2901 myexponent = 0x1f; 2902 mysignificand = 0; 2903 } else { 2904 assert(category == fcNaN && "Unknown category!"); 2905 myexponent = 0x1f; 2906 mysignificand = (uint32_t)*significandParts(); 2907 } 2908 2909 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | 2910 (mysignificand & 0x3ff))); 2911 } 2912 2913 // This function creates an APInt that is just a bit map of the floating 2914 // point constant as it would appear in memory. It is not a conversion, 2915 // and treating the result as a normal integer is unlikely to be useful. 2916 2917 APInt 2918 APFloat::bitcastToAPInt() const 2919 { 2920 if (semantics == (const llvm::fltSemantics*)&IEEEhalf) 2921 return convertHalfAPFloatToAPInt(); 2922 2923 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 2924 return convertFloatAPFloatToAPInt(); 2925 2926 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 2927 return convertDoubleAPFloatToAPInt(); 2928 2929 if (semantics == (const llvm::fltSemantics*)&IEEEquad) 2930 return convertQuadrupleAPFloatToAPInt(); 2931 2932 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 2933 return convertPPCDoubleDoubleAPFloatToAPInt(); 2934 2935 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 2936 "unknown format!"); 2937 return convertF80LongDoubleAPFloatToAPInt(); 2938 } 2939 2940 float 2941 APFloat::convertToFloat() const 2942 { 2943 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && 2944 "Float semantics are not IEEEsingle"); 2945 APInt api = bitcastToAPInt(); 2946 return api.bitsToFloat(); 2947 } 2948 2949 double 2950 APFloat::convertToDouble() const 2951 { 2952 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && 2953 "Float semantics are not IEEEdouble"); 2954 APInt api = bitcastToAPInt(); 2955 return api.bitsToDouble(); 2956 } 2957 2958 /// Integer bit is explicit in this format. Intel hardware (387 and later) 2959 /// does not support these bit patterns: 2960 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 2961 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 2962 /// exponent = 0, integer bit 1 ("pseudodenormal") 2963 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 2964 /// At the moment, the first two are treated as NaNs, the second two as Normal. 2965 void 2966 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2967 { 2968 assert(api.getBitWidth()==80); 2969 uint64_t i1 = api.getRawData()[0]; 2970 uint64_t i2 = api.getRawData()[1]; 2971 uint64_t myexponent = (i2 & 0x7fff); 2972 uint64_t mysignificand = i1; 2973 2974 initialize(&APFloat::x87DoubleExtended); 2975 assert(partCount()==2); 2976 2977 sign = static_cast<unsigned int>(i2>>15); 2978 if (myexponent==0 && mysignificand==0) { 2979 // exponent, significand meaningless 2980 category = fcZero; 2981 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2982 // exponent, significand meaningless 2983 category = fcInfinity; 2984 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2985 // exponent meaningless 2986 category = fcNaN; 2987 significandParts()[0] = mysignificand; 2988 significandParts()[1] = 0; 2989 } else { 2990 category = fcNormal; 2991 exponent = myexponent - 16383; 2992 significandParts()[0] = mysignificand; 2993 significandParts()[1] = 0; 2994 if (myexponent==0) // denormal 2995 exponent = -16382; 2996 } 2997 } 2998 2999 void 3000 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 3001 { 3002 assert(api.getBitWidth()==128); 3003 uint64_t i1 = api.getRawData()[0]; 3004 uint64_t i2 = api.getRawData()[1]; 3005 opStatus fs; 3006 bool losesInfo; 3007 3008 // Get the first double and convert to our format. 3009 initFromDoubleAPInt(APInt(64, i1)); 3010 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3011 assert(fs == opOK && !losesInfo); 3012 (void)fs; 3013 3014 // Unless we have a special case, add in second double. 3015 if (category == fcNormal) { 3016 APFloat v(IEEEdouble, APInt(64, i2)); 3017 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); 3018 assert(fs == opOK && !losesInfo); 3019 (void)fs; 3020 3021 add(v, rmNearestTiesToEven); 3022 } 3023 } 3024 3025 void 3026 APFloat::initFromQuadrupleAPInt(const APInt &api) 3027 { 3028 assert(api.getBitWidth()==128); 3029 uint64_t i1 = api.getRawData()[0]; 3030 uint64_t i2 = api.getRawData()[1]; 3031 uint64_t myexponent = (i2 >> 48) & 0x7fff; 3032 uint64_t mysignificand = i1; 3033 uint64_t mysignificand2 = i2 & 0xffffffffffffLL; 3034 3035 initialize(&APFloat::IEEEquad); 3036 assert(partCount()==2); 3037 3038 sign = static_cast<unsigned int>(i2>>63); 3039 if (myexponent==0 && 3040 (mysignificand==0 && mysignificand2==0)) { 3041 // exponent, significand meaningless 3042 category = fcZero; 3043 } else if (myexponent==0x7fff && 3044 (mysignificand==0 && mysignificand2==0)) { 3045 // exponent, significand meaningless 3046 category = fcInfinity; 3047 } else if (myexponent==0x7fff && 3048 (mysignificand!=0 || mysignificand2 !=0)) { 3049 // exponent meaningless 3050 category = fcNaN; 3051 significandParts()[0] = mysignificand; 3052 significandParts()[1] = mysignificand2; 3053 } else { 3054 category = fcNormal; 3055 exponent = myexponent - 16383; 3056 significandParts()[0] = mysignificand; 3057 significandParts()[1] = mysignificand2; 3058 if (myexponent==0) // denormal 3059 exponent = -16382; 3060 else 3061 significandParts()[1] |= 0x1000000000000LL; // integer bit 3062 } 3063 } 3064 3065 void 3066 APFloat::initFromDoubleAPInt(const APInt &api) 3067 { 3068 assert(api.getBitWidth()==64); 3069 uint64_t i = *api.getRawData(); 3070 uint64_t myexponent = (i >> 52) & 0x7ff; 3071 uint64_t mysignificand = i & 0xfffffffffffffLL; 3072 3073 initialize(&APFloat::IEEEdouble); 3074 assert(partCount()==1); 3075 3076 sign = static_cast<unsigned int>(i>>63); 3077 if (myexponent==0 && mysignificand==0) { 3078 // exponent, significand meaningless 3079 category = fcZero; 3080 } else if (myexponent==0x7ff && mysignificand==0) { 3081 // exponent, significand meaningless 3082 category = fcInfinity; 3083 } else if (myexponent==0x7ff && mysignificand!=0) { 3084 // exponent meaningless 3085 category = fcNaN; 3086 *significandParts() = mysignificand; 3087 } else { 3088 category = fcNormal; 3089 exponent = myexponent - 1023; 3090 *significandParts() = mysignificand; 3091 if (myexponent==0) // denormal 3092 exponent = -1022; 3093 else 3094 *significandParts() |= 0x10000000000000LL; // integer bit 3095 } 3096 } 3097 3098 void 3099 APFloat::initFromFloatAPInt(const APInt & api) 3100 { 3101 assert(api.getBitWidth()==32); 3102 uint32_t i = (uint32_t)*api.getRawData(); 3103 uint32_t myexponent = (i >> 23) & 0xff; 3104 uint32_t mysignificand = i & 0x7fffff; 3105 3106 initialize(&APFloat::IEEEsingle); 3107 assert(partCount()==1); 3108 3109 sign = i >> 31; 3110 if (myexponent==0 && mysignificand==0) { 3111 // exponent, significand meaningless 3112 category = fcZero; 3113 } else if (myexponent==0xff && mysignificand==0) { 3114 // exponent, significand meaningless 3115 category = fcInfinity; 3116 } else if (myexponent==0xff && mysignificand!=0) { 3117 // sign, exponent, significand meaningless 3118 category = fcNaN; 3119 *significandParts() = mysignificand; 3120 } else { 3121 category = fcNormal; 3122 exponent = myexponent - 127; //bias 3123 *significandParts() = mysignificand; 3124 if (myexponent==0) // denormal 3125 exponent = -126; 3126 else 3127 *significandParts() |= 0x800000; // integer bit 3128 } 3129 } 3130 3131 void 3132 APFloat::initFromHalfAPInt(const APInt & api) 3133 { 3134 assert(api.getBitWidth()==16); 3135 uint32_t i = (uint32_t)*api.getRawData(); 3136 uint32_t myexponent = (i >> 10) & 0x1f; 3137 uint32_t mysignificand = i & 0x3ff; 3138 3139 initialize(&APFloat::IEEEhalf); 3140 assert(partCount()==1); 3141 3142 sign = i >> 15; 3143 if (myexponent==0 && mysignificand==0) { 3144 // exponent, significand meaningless 3145 category = fcZero; 3146 } else if (myexponent==0x1f && mysignificand==0) { 3147 // exponent, significand meaningless 3148 category = fcInfinity; 3149 } else if (myexponent==0x1f && mysignificand!=0) { 3150 // sign, exponent, significand meaningless 3151 category = fcNaN; 3152 *significandParts() = mysignificand; 3153 } else { 3154 category = fcNormal; 3155 exponent = myexponent - 15; //bias 3156 *significandParts() = mysignificand; 3157 if (myexponent==0) // denormal 3158 exponent = -14; 3159 else 3160 *significandParts() |= 0x400; // integer bit 3161 } 3162 } 3163 3164 /// Treat api as containing the bits of a floating point number. Currently 3165 /// we infer the floating point type from the size of the APInt. The 3166 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 3167 /// when the size is anything else). 3168 void 3169 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) 3170 { 3171 if (Sem == &IEEEhalf) 3172 return initFromHalfAPInt(api); 3173 if (Sem == &IEEEsingle) 3174 return initFromFloatAPInt(api); 3175 if (Sem == &IEEEdouble) 3176 return initFromDoubleAPInt(api); 3177 if (Sem == &x87DoubleExtended) 3178 return initFromF80LongDoubleAPInt(api); 3179 if (Sem == &IEEEquad) 3180 return initFromQuadrupleAPInt(api); 3181 if (Sem == &PPCDoubleDouble) 3182 return initFromPPCDoubleDoubleAPInt(api); 3183 3184 llvm_unreachable(0); 3185 } 3186 3187 APFloat 3188 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) 3189 { 3190 switch (BitWidth) { 3191 case 16: 3192 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); 3193 case 32: 3194 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); 3195 case 64: 3196 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); 3197 case 80: 3198 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); 3199 case 128: 3200 if (isIEEE) 3201 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); 3202 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); 3203 default: 3204 llvm_unreachable("Unknown floating bit width"); 3205 } 3206 } 3207 3208 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { 3209 APFloat Val(Sem, fcNormal, Negative); 3210 3211 // We want (in interchange format): 3212 // sign = {Negative} 3213 // exponent = 1..10 3214 // significand = 1..1 3215 3216 Val.exponent = Sem.maxExponent; // unbiased 3217 3218 // 1-initialize all bits.... 3219 Val.zeroSignificand(); 3220 integerPart *significand = Val.significandParts(); 3221 unsigned N = partCountForBits(Sem.precision); 3222 for (unsigned i = 0; i != N; ++i) 3223 significand[i] = ~((integerPart) 0); 3224 3225 // ...and then clear the top bits for internal consistency. 3226 if (Sem.precision % integerPartWidth != 0) 3227 significand[N-1] &= 3228 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1; 3229 3230 return Val; 3231 } 3232 3233 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { 3234 APFloat Val(Sem, fcNormal, Negative); 3235 3236 // We want (in interchange format): 3237 // sign = {Negative} 3238 // exponent = 0..0 3239 // significand = 0..01 3240 3241 Val.exponent = Sem.minExponent; // unbiased 3242 Val.zeroSignificand(); 3243 Val.significandParts()[0] = 1; 3244 return Val; 3245 } 3246 3247 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { 3248 APFloat Val(Sem, fcNormal, Negative); 3249 3250 // We want (in interchange format): 3251 // sign = {Negative} 3252 // exponent = 0..0 3253 // significand = 10..0 3254 3255 Val.exponent = Sem.minExponent; 3256 Val.zeroSignificand(); 3257 Val.significandParts()[partCountForBits(Sem.precision)-1] |= 3258 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); 3259 3260 return Val; 3261 } 3262 3263 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { 3264 initFromAPInt(&Sem, API); 3265 } 3266 3267 APFloat::APFloat(float f) { 3268 initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); 3269 } 3270 3271 APFloat::APFloat(double d) { 3272 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); 3273 } 3274 3275 namespace { 3276 void append(SmallVectorImpl<char> &Buffer, StringRef Str) { 3277 Buffer.append(Str.begin(), Str.end()); 3278 } 3279 3280 /// Removes data from the given significand until it is no more 3281 /// precise than is required for the desired precision. 3282 void AdjustToPrecision(APInt &significand, 3283 int &exp, unsigned FormatPrecision) { 3284 unsigned bits = significand.getActiveBits(); 3285 3286 // 196/59 is a very slight overestimate of lg_2(10). 3287 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; 3288 3289 if (bits <= bitsRequired) return; 3290 3291 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; 3292 if (!tensRemovable) return; 3293 3294 exp += tensRemovable; 3295 3296 APInt divisor(significand.getBitWidth(), 1); 3297 APInt powten(significand.getBitWidth(), 10); 3298 while (true) { 3299 if (tensRemovable & 1) 3300 divisor *= powten; 3301 tensRemovable >>= 1; 3302 if (!tensRemovable) break; 3303 powten *= powten; 3304 } 3305 3306 significand = significand.udiv(divisor); 3307 3308 // Truncate the significand down to its active bit count, but 3309 // don't try to drop below 32. 3310 unsigned newPrecision = std::max(32U, significand.getActiveBits()); 3311 significand = significand.trunc(newPrecision); 3312 } 3313 3314 3315 void AdjustToPrecision(SmallVectorImpl<char> &buffer, 3316 int &exp, unsigned FormatPrecision) { 3317 unsigned N = buffer.size(); 3318 if (N <= FormatPrecision) return; 3319 3320 // The most significant figures are the last ones in the buffer. 3321 unsigned FirstSignificant = N - FormatPrecision; 3322 3323 // Round. 3324 // FIXME: this probably shouldn't use 'round half up'. 3325 3326 // Rounding down is just a truncation, except we also want to drop 3327 // trailing zeros from the new result. 3328 if (buffer[FirstSignificant - 1] < '5') { 3329 while (FirstSignificant < N && buffer[FirstSignificant] == '0') 3330 FirstSignificant++; 3331 3332 exp += FirstSignificant; 3333 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3334 return; 3335 } 3336 3337 // Rounding up requires a decimal add-with-carry. If we continue 3338 // the carry, the newly-introduced zeros will just be truncated. 3339 for (unsigned I = FirstSignificant; I != N; ++I) { 3340 if (buffer[I] == '9') { 3341 FirstSignificant++; 3342 } else { 3343 buffer[I]++; 3344 break; 3345 } 3346 } 3347 3348 // If we carried through, we have exactly one digit of precision. 3349 if (FirstSignificant == N) { 3350 exp += FirstSignificant; 3351 buffer.clear(); 3352 buffer.push_back('1'); 3353 return; 3354 } 3355 3356 exp += FirstSignificant; 3357 buffer.erase(&buffer[0], &buffer[FirstSignificant]); 3358 } 3359 } 3360 3361 void APFloat::toString(SmallVectorImpl<char> &Str, 3362 unsigned FormatPrecision, 3363 unsigned FormatMaxPadding) const { 3364 switch (category) { 3365 case fcInfinity: 3366 if (isNegative()) 3367 return append(Str, "-Inf"); 3368 else 3369 return append(Str, "+Inf"); 3370 3371 case fcNaN: return append(Str, "NaN"); 3372 3373 case fcZero: 3374 if (isNegative()) 3375 Str.push_back('-'); 3376 3377 if (!FormatMaxPadding) 3378 append(Str, "0.0E+0"); 3379 else 3380 Str.push_back('0'); 3381 return; 3382 3383 case fcNormal: 3384 break; 3385 } 3386 3387 if (isNegative()) 3388 Str.push_back('-'); 3389 3390 // Decompose the number into an APInt and an exponent. 3391 int exp = exponent - ((int) semantics->precision - 1); 3392 APInt significand(semantics->precision, 3393 makeArrayRef(significandParts(), 3394 partCountForBits(semantics->precision))); 3395 3396 // Set FormatPrecision if zero. We want to do this before we 3397 // truncate trailing zeros, as those are part of the precision. 3398 if (!FormatPrecision) { 3399 // It's an interesting question whether to use the nominal 3400 // precision or the active precision here for denormals. 3401 3402 // FormatPrecision = ceil(significandBits / lg_2(10)) 3403 FormatPrecision = (semantics->precision * 59 + 195) / 196; 3404 } 3405 3406 // Ignore trailing binary zeros. 3407 int trailingZeros = significand.countTrailingZeros(); 3408 exp += trailingZeros; 3409 significand = significand.lshr(trailingZeros); 3410 3411 // Change the exponent from 2^e to 10^e. 3412 if (exp == 0) { 3413 // Nothing to do. 3414 } else if (exp > 0) { 3415 // Just shift left. 3416 significand = significand.zext(semantics->precision + exp); 3417 significand <<= exp; 3418 exp = 0; 3419 } else { /* exp < 0 */ 3420 int texp = -exp; 3421 3422 // We transform this using the identity: 3423 // (N)(2^-e) == (N)(5^e)(10^-e) 3424 // This means we have to multiply N (the significand) by 5^e. 3425 // To avoid overflow, we have to operate on numbers large 3426 // enough to store N * 5^e: 3427 // log2(N * 5^e) == log2(N) + e * log2(5) 3428 // <= semantics->precision + e * 137 / 59 3429 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) 3430 3431 unsigned precision = semantics->precision + (137 * texp + 136) / 59; 3432 3433 // Multiply significand by 5^e. 3434 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) 3435 significand = significand.zext(precision); 3436 APInt five_to_the_i(precision, 5); 3437 while (true) { 3438 if (texp & 1) significand *= five_to_the_i; 3439 3440 texp >>= 1; 3441 if (!texp) break; 3442 five_to_the_i *= five_to_the_i; 3443 } 3444 } 3445 3446 AdjustToPrecision(significand, exp, FormatPrecision); 3447 3448 SmallVector<char, 256> buffer; 3449 3450 // Fill the buffer. 3451 unsigned precision = significand.getBitWidth(); 3452 APInt ten(precision, 10); 3453 APInt digit(precision, 0); 3454 3455 bool inTrail = true; 3456 while (significand != 0) { 3457 // digit <- significand % 10 3458 // significand <- significand / 10 3459 APInt::udivrem(significand, ten, significand, digit); 3460 3461 unsigned d = digit.getZExtValue(); 3462 3463 // Drop trailing zeros. 3464 if (inTrail && !d) exp++; 3465 else { 3466 buffer.push_back((char) ('0' + d)); 3467 inTrail = false; 3468 } 3469 } 3470 3471 assert(!buffer.empty() && "no characters in buffer!"); 3472 3473 // Drop down to FormatPrecision. 3474 // TODO: don't do more precise calculations above than are required. 3475 AdjustToPrecision(buffer, exp, FormatPrecision); 3476 3477 unsigned NDigits = buffer.size(); 3478 3479 // Check whether we should use scientific notation. 3480 bool FormatScientific; 3481 if (!FormatMaxPadding) 3482 FormatScientific = true; 3483 else { 3484 if (exp >= 0) { 3485 // 765e3 --> 765000 3486 // ^^^ 3487 // But we shouldn't make the number look more precise than it is. 3488 FormatScientific = ((unsigned) exp > FormatMaxPadding || 3489 NDigits + (unsigned) exp > FormatPrecision); 3490 } else { 3491 // Power of the most significant digit. 3492 int MSD = exp + (int) (NDigits - 1); 3493 if (MSD >= 0) { 3494 // 765e-2 == 7.65 3495 FormatScientific = false; 3496 } else { 3497 // 765e-5 == 0.00765 3498 // ^ ^^ 3499 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; 3500 } 3501 } 3502 } 3503 3504 // Scientific formatting is pretty straightforward. 3505 if (FormatScientific) { 3506 exp += (NDigits - 1); 3507 3508 Str.push_back(buffer[NDigits-1]); 3509 Str.push_back('.'); 3510 if (NDigits == 1) 3511 Str.push_back('0'); 3512 else 3513 for (unsigned I = 1; I != NDigits; ++I) 3514 Str.push_back(buffer[NDigits-1-I]); 3515 Str.push_back('E'); 3516 3517 Str.push_back(exp >= 0 ? '+' : '-'); 3518 if (exp < 0) exp = -exp; 3519 SmallVector<char, 6> expbuf; 3520 do { 3521 expbuf.push_back((char) ('0' + (exp % 10))); 3522 exp /= 10; 3523 } while (exp); 3524 for (unsigned I = 0, E = expbuf.size(); I != E; ++I) 3525 Str.push_back(expbuf[E-1-I]); 3526 return; 3527 } 3528 3529 // Non-scientific, positive exponents. 3530 if (exp >= 0) { 3531 for (unsigned I = 0; I != NDigits; ++I) 3532 Str.push_back(buffer[NDigits-1-I]); 3533 for (unsigned I = 0; I != (unsigned) exp; ++I) 3534 Str.push_back('0'); 3535 return; 3536 } 3537 3538 // Non-scientific, negative exponents. 3539 3540 // The number of digits to the left of the decimal point. 3541 int NWholeDigits = exp + (int) NDigits; 3542 3543 unsigned I = 0; 3544 if (NWholeDigits > 0) { 3545 for (; I != (unsigned) NWholeDigits; ++I) 3546 Str.push_back(buffer[NDigits-I-1]); 3547 Str.push_back('.'); 3548 } else { 3549 unsigned NZeros = 1 + (unsigned) -NWholeDigits; 3550 3551 Str.push_back('0'); 3552 Str.push_back('.'); 3553 for (unsigned Z = 1; Z != NZeros; ++Z) 3554 Str.push_back('0'); 3555 } 3556 3557 for (; I != NDigits; ++I) 3558 Str.push_back(buffer[NDigits-I-1]); 3559 } 3560 3561 bool APFloat::getExactInverse(APFloat *inv) const { 3562 // Special floats and denormals have no exact inverse. 3563 if (category != fcNormal) 3564 return false; 3565 3566 // Check that the number is a power of two by making sure that only the 3567 // integer bit is set in the significand. 3568 if (significandLSB() != semantics->precision - 1) 3569 return false; 3570 3571 // Get the inverse. 3572 APFloat reciprocal(*semantics, 1ULL); 3573 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) 3574 return false; 3575 3576 // Avoid multiplication with a denormal, it is not safe on all platforms and 3577 // may be slower than a normal division. 3578 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision) 3579 return false; 3580 3581 assert(reciprocal.category == fcNormal && 3582 reciprocal.significandLSB() == reciprocal.semantics->precision - 1); 3583 3584 if (inv) 3585 *inv = reciprocal; 3586 3587 return true; 3588 } 3589