1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a class to represent arbitrary precision floating 11 // point values and provide a variety of arithmetic operations on them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/Support/MathExtras.h" 18 #include <cstring> 19 20 using namespace llvm; 21 22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) 23 24 /* Assumed in hexadecimal significand parsing, and conversion to 25 hexadecimal strings. */ 26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] 27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); 28 29 namespace llvm { 30 31 /* Represents floating point arithmetic semantics. */ 32 struct fltSemantics { 33 /* The largest E such that 2^E is representable; this matches the 34 definition of IEEE 754. */ 35 exponent_t maxExponent; 36 37 /* The smallest E such that 2^E is a normalized number; this 38 matches the definition of IEEE 754. */ 39 exponent_t minExponent; 40 41 /* Number of bits in the significand. This includes the integer 42 bit. */ 43 unsigned int precision; 44 45 /* True if arithmetic is supported. */ 46 unsigned int arithmeticOK; 47 }; 48 49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; 50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; 51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; 52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; 53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; 54 55 // The PowerPC format consists of two doubles. It does not map cleanly 56 // onto the usual format above. For now only storage of constants of 57 // this type is supported, no arithmetic. 58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; 59 60 /* A tight upper bound on number of parts required to hold the value 61 pow(5, power) is 62 63 power * 815 / (351 * integerPartWidth) + 1 64 65 However, whilst the result may require only this many parts, 66 because we are multiplying two values to get it, the 67 multiplication may require an extra part with the excess part 68 being zero (consider the trivial case of 1 * 1, tcFullMultiply 69 requires two parts to hold the single-part result). So we add an 70 extra one to guarantee enough space whilst multiplying. */ 71 const unsigned int maxExponent = 16383; 72 const unsigned int maxPrecision = 113; 73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; 74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) 75 / (351 * integerPartWidth)); 76 } 77 78 /* Put a bunch of private, handy routines in an anonymous namespace. */ 79 namespace { 80 81 static inline unsigned int 82 partCountForBits(unsigned int bits) 83 { 84 return ((bits) + integerPartWidth - 1) / integerPartWidth; 85 } 86 87 /* Returns 0U-9U. Return values >= 10U are not digits. */ 88 static inline unsigned int 89 decDigitValue(unsigned int c) 90 { 91 return c - '0'; 92 } 93 94 static unsigned int 95 hexDigitValue(unsigned int c) 96 { 97 unsigned int r; 98 99 r = c - '0'; 100 if(r <= 9) 101 return r; 102 103 r = c - 'A'; 104 if(r <= 5) 105 return r + 10; 106 107 r = c - 'a'; 108 if(r <= 5) 109 return r + 10; 110 111 return -1U; 112 } 113 114 static inline void 115 assertArithmeticOK(const llvm::fltSemantics &semantics) { 116 assert(semantics.arithmeticOK 117 && "Compile-time arithmetic does not support these semantics"); 118 } 119 120 /* Return the value of a decimal exponent of the form 121 [+-]ddddddd. 122 123 If the exponent overflows, returns a large exponent with the 124 appropriate sign. */ 125 static int 126 readExponent(const char *p) 127 { 128 bool isNegative; 129 unsigned int absExponent; 130 const unsigned int overlargeExponent = 24000; /* FIXME. */ 131 132 isNegative = (*p == '-'); 133 if (*p == '-' || *p == '+') 134 p++; 135 136 absExponent = decDigitValue(*p++); 137 assert (absExponent < 10U); 138 139 for (;;) { 140 unsigned int value; 141 142 value = decDigitValue(*p); 143 if (value >= 10U) 144 break; 145 146 p++; 147 value += absExponent * 10; 148 if (absExponent >= overlargeExponent) { 149 absExponent = overlargeExponent; 150 break; 151 } 152 absExponent = value; 153 } 154 155 if (isNegative) 156 return -(int) absExponent; 157 else 158 return (int) absExponent; 159 } 160 161 /* This is ugly and needs cleaning up, but I don't immediately see 162 how whilst remaining safe. */ 163 static int 164 totalExponent(const char *p, int exponentAdjustment) 165 { 166 int unsignedExponent; 167 bool negative, overflow; 168 int exponent; 169 170 /* Move past the exponent letter and sign to the digits. */ 171 p++; 172 negative = *p == '-'; 173 if(*p == '-' || *p == '+') 174 p++; 175 176 unsignedExponent = 0; 177 overflow = false; 178 for(;;) { 179 unsigned int value; 180 181 value = decDigitValue(*p); 182 if(value >= 10U) 183 break; 184 185 p++; 186 unsignedExponent = unsignedExponent * 10 + value; 187 if(unsignedExponent > 65535) 188 overflow = true; 189 } 190 191 if(exponentAdjustment > 65535 || exponentAdjustment < -65536) 192 overflow = true; 193 194 if(!overflow) { 195 exponent = unsignedExponent; 196 if(negative) 197 exponent = -exponent; 198 exponent += exponentAdjustment; 199 if(exponent > 65535 || exponent < -65536) 200 overflow = true; 201 } 202 203 if(overflow) 204 exponent = negative ? -65536: 65535; 205 206 return exponent; 207 } 208 209 static const char * 210 skipLeadingZeroesAndAnyDot(const char *p, const char **dot) 211 { 212 *dot = 0; 213 while(*p == '0') 214 p++; 215 216 if(*p == '.') { 217 *dot = p++; 218 while(*p == '0') 219 p++; 220 } 221 222 return p; 223 } 224 225 /* Given a normal decimal floating point number of the form 226 227 dddd.dddd[eE][+-]ddd 228 229 where the decimal point and exponent are optional, fill out the 230 structure D. Exponent is appropriate if the significand is 231 treated as an integer, and normalizedExponent if the significand 232 is taken to have the decimal point after a single leading 233 non-zero digit. 234 235 If the value is zero, V->firstSigDigit points to a non-digit, and 236 the return exponent is zero. 237 */ 238 struct decimalInfo { 239 const char *firstSigDigit; 240 const char *lastSigDigit; 241 int exponent; 242 int normalizedExponent; 243 }; 244 245 static void 246 interpretDecimal(const char *p, decimalInfo *D) 247 { 248 const char *dot; 249 250 p = skipLeadingZeroesAndAnyDot (p, &dot); 251 252 D->firstSigDigit = p; 253 D->exponent = 0; 254 D->normalizedExponent = 0; 255 256 for (;;) { 257 if (*p == '.') { 258 assert(dot == 0); 259 dot = p++; 260 } 261 if (decDigitValue(*p) >= 10U) 262 break; 263 p++; 264 } 265 266 /* If number is all zerooes accept any exponent. */ 267 if (p != D->firstSigDigit) { 268 if (*p == 'e' || *p == 'E') 269 D->exponent = readExponent(p + 1); 270 271 /* Implied decimal point? */ 272 if (!dot) 273 dot = p; 274 275 /* Drop insignificant trailing zeroes. */ 276 do 277 do 278 p--; 279 while (*p == '0'); 280 while (*p == '.'); 281 282 /* Adjust the exponents for any decimal point. */ 283 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p)); 284 D->normalizedExponent = (D->exponent + 285 static_cast<exponent_t>((p - D->firstSigDigit) 286 - (dot > D->firstSigDigit && dot < p))); 287 } 288 289 D->lastSigDigit = p; 290 } 291 292 /* Return the trailing fraction of a hexadecimal number. 293 DIGITVALUE is the first hex digit of the fraction, P points to 294 the next digit. */ 295 static lostFraction 296 trailingHexadecimalFraction(const char *p, unsigned int digitValue) 297 { 298 unsigned int hexDigit; 299 300 /* If the first trailing digit isn't 0 or 8 we can work out the 301 fraction immediately. */ 302 if(digitValue > 8) 303 return lfMoreThanHalf; 304 else if(digitValue < 8 && digitValue > 0) 305 return lfLessThanHalf; 306 307 /* Otherwise we need to find the first non-zero digit. */ 308 while(*p == '0') 309 p++; 310 311 hexDigit = hexDigitValue(*p); 312 313 /* If we ran off the end it is exactly zero or one-half, otherwise 314 a little more. */ 315 if(hexDigit == -1U) 316 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; 317 else 318 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; 319 } 320 321 /* Return the fraction lost were a bignum truncated losing the least 322 significant BITS bits. */ 323 static lostFraction 324 lostFractionThroughTruncation(const integerPart *parts, 325 unsigned int partCount, 326 unsigned int bits) 327 { 328 unsigned int lsb; 329 330 lsb = APInt::tcLSB(parts, partCount); 331 332 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ 333 if(bits <= lsb) 334 return lfExactlyZero; 335 if(bits == lsb + 1) 336 return lfExactlyHalf; 337 if(bits <= partCount * integerPartWidth 338 && APInt::tcExtractBit(parts, bits - 1)) 339 return lfMoreThanHalf; 340 341 return lfLessThanHalf; 342 } 343 344 /* Shift DST right BITS bits noting lost fraction. */ 345 static lostFraction 346 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) 347 { 348 lostFraction lost_fraction; 349 350 lost_fraction = lostFractionThroughTruncation(dst, parts, bits); 351 352 APInt::tcShiftRight(dst, parts, bits); 353 354 return lost_fraction; 355 } 356 357 /* Combine the effect of two lost fractions. */ 358 static lostFraction 359 combineLostFractions(lostFraction moreSignificant, 360 lostFraction lessSignificant) 361 { 362 if(lessSignificant != lfExactlyZero) { 363 if(moreSignificant == lfExactlyZero) 364 moreSignificant = lfLessThanHalf; 365 else if(moreSignificant == lfExactlyHalf) 366 moreSignificant = lfMoreThanHalf; 367 } 368 369 return moreSignificant; 370 } 371 372 /* The error from the true value, in half-ulps, on multiplying two 373 floating point numbers, which differ from the value they 374 approximate by at most HUE1 and HUE2 half-ulps, is strictly less 375 than the returned value. 376 377 See "How to Read Floating Point Numbers Accurately" by William D 378 Clinger. */ 379 static unsigned int 380 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) 381 { 382 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); 383 384 if (HUerr1 + HUerr2 == 0) 385 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ 386 else 387 return inexactMultiply + 2 * (HUerr1 + HUerr2); 388 } 389 390 /* The number of ulps from the boundary (zero, or half if ISNEAREST) 391 when the least significant BITS are truncated. BITS cannot be 392 zero. */ 393 static integerPart 394 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) 395 { 396 unsigned int count, partBits; 397 integerPart part, boundary; 398 399 assert (bits != 0); 400 401 bits--; 402 count = bits / integerPartWidth; 403 partBits = bits % integerPartWidth + 1; 404 405 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); 406 407 if (isNearest) 408 boundary = (integerPart) 1 << (partBits - 1); 409 else 410 boundary = 0; 411 412 if (count == 0) { 413 if (part - boundary <= boundary - part) 414 return part - boundary; 415 else 416 return boundary - part; 417 } 418 419 if (part == boundary) { 420 while (--count) 421 if (parts[count]) 422 return ~(integerPart) 0; /* A lot. */ 423 424 return parts[0]; 425 } else if (part == boundary - 1) { 426 while (--count) 427 if (~parts[count]) 428 return ~(integerPart) 0; /* A lot. */ 429 430 return -parts[0]; 431 } 432 433 return ~(integerPart) 0; /* A lot. */ 434 } 435 436 /* Place pow(5, power) in DST, and return the number of parts used. 437 DST must be at least one part larger than size of the answer. */ 438 static unsigned int 439 powerOf5(integerPart *dst, unsigned int power) 440 { 441 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 442 15625, 78125 }; 443 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; 444 static unsigned int partsCount[16] = { 1 }; 445 446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; 447 unsigned int result; 448 449 assert(power <= maxExponent); 450 451 p1 = dst; 452 p2 = scratch; 453 454 *p1 = firstEightPowers[power & 7]; 455 power >>= 3; 456 457 result = 1; 458 pow5 = pow5s; 459 460 for (unsigned int n = 0; power; power >>= 1, n++) { 461 unsigned int pc; 462 463 pc = partsCount[n]; 464 465 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ 466 if (pc == 0) { 467 pc = partsCount[n - 1]; 468 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); 469 pc *= 2; 470 if (pow5[pc - 1] == 0) 471 pc--; 472 partsCount[n] = pc; 473 } 474 475 if (power & 1) { 476 integerPart *tmp; 477 478 APInt::tcFullMultiply(p2, p1, pow5, result, pc); 479 result += pc; 480 if (p2[result - 1] == 0) 481 result--; 482 483 /* Now result is in p1 with partsCount parts and p2 is scratch 484 space. */ 485 tmp = p1, p1 = p2, p2 = tmp; 486 } 487 488 pow5 += pc; 489 } 490 491 if (p1 != dst) 492 APInt::tcAssign(dst, p1, result); 493 494 return result; 495 } 496 497 /* Zero at the end to avoid modular arithmetic when adding one; used 498 when rounding up during hexadecimal output. */ 499 static const char hexDigitsLower[] = "0123456789abcdef0"; 500 static const char hexDigitsUpper[] = "0123456789ABCDEF0"; 501 static const char infinityL[] = "infinity"; 502 static const char infinityU[] = "INFINITY"; 503 static const char NaNL[] = "nan"; 504 static const char NaNU[] = "NAN"; 505 506 /* Write out an integerPart in hexadecimal, starting with the most 507 significant nibble. Write out exactly COUNT hexdigits, return 508 COUNT. */ 509 static unsigned int 510 partAsHex (char *dst, integerPart part, unsigned int count, 511 const char *hexDigitChars) 512 { 513 unsigned int result = count; 514 515 assert (count != 0 && count <= integerPartWidth / 4); 516 517 part >>= (integerPartWidth - 4 * count); 518 while (count--) { 519 dst[count] = hexDigitChars[part & 0xf]; 520 part >>= 4; 521 } 522 523 return result; 524 } 525 526 /* Write out an unsigned decimal integer. */ 527 static char * 528 writeUnsignedDecimal (char *dst, unsigned int n) 529 { 530 char buff[40], *p; 531 532 p = buff; 533 do 534 *p++ = '0' + n % 10; 535 while (n /= 10); 536 537 do 538 *dst++ = *--p; 539 while (p != buff); 540 541 return dst; 542 } 543 544 /* Write out a signed decimal integer. */ 545 static char * 546 writeSignedDecimal (char *dst, int value) 547 { 548 if (value < 0) { 549 *dst++ = '-'; 550 dst = writeUnsignedDecimal(dst, -(unsigned) value); 551 } else 552 dst = writeUnsignedDecimal(dst, value); 553 554 return dst; 555 } 556 } 557 558 /* Constructors. */ 559 void 560 APFloat::initialize(const fltSemantics *ourSemantics) 561 { 562 unsigned int count; 563 564 semantics = ourSemantics; 565 count = partCount(); 566 if(count > 1) 567 significand.parts = new integerPart[count]; 568 } 569 570 void 571 APFloat::freeSignificand() 572 { 573 if(partCount() > 1) 574 delete [] significand.parts; 575 } 576 577 void 578 APFloat::assign(const APFloat &rhs) 579 { 580 assert(semantics == rhs.semantics); 581 582 sign = rhs.sign; 583 category = rhs.category; 584 exponent = rhs.exponent; 585 sign2 = rhs.sign2; 586 exponent2 = rhs.exponent2; 587 if(category == fcNormal || category == fcNaN) 588 copySignificand(rhs); 589 } 590 591 void 592 APFloat::copySignificand(const APFloat &rhs) 593 { 594 assert(category == fcNormal || category == fcNaN); 595 assert(rhs.partCount() >= partCount()); 596 597 APInt::tcAssign(significandParts(), rhs.significandParts(), 598 partCount()); 599 } 600 601 /* Make this number a NaN, with an arbitrary but deterministic value 602 for the significand. If double or longer, this is a signalling NaN, 603 which may not be ideal. */ 604 void 605 APFloat::makeNaN(void) 606 { 607 category = fcNaN; 608 APInt::tcSet(significandParts(), ~0U, partCount()); 609 } 610 611 APFloat & 612 APFloat::operator=(const APFloat &rhs) 613 { 614 if(this != &rhs) { 615 if(semantics != rhs.semantics) { 616 freeSignificand(); 617 initialize(rhs.semantics); 618 } 619 assign(rhs); 620 } 621 622 return *this; 623 } 624 625 bool 626 APFloat::bitwiseIsEqual(const APFloat &rhs) const { 627 if (this == &rhs) 628 return true; 629 if (semantics != rhs.semantics || 630 category != rhs.category || 631 sign != rhs.sign) 632 return false; 633 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && 634 sign2 != rhs.sign2) 635 return false; 636 if (category==fcZero || category==fcInfinity) 637 return true; 638 else if (category==fcNormal && exponent!=rhs.exponent) 639 return false; 640 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && 641 exponent2!=rhs.exponent2) 642 return false; 643 else { 644 int i= partCount(); 645 const integerPart* p=significandParts(); 646 const integerPart* q=rhs.significandParts(); 647 for (; i>0; i--, p++, q++) { 648 if (*p != *q) 649 return false; 650 } 651 return true; 652 } 653 } 654 655 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) 656 { 657 assertArithmeticOK(ourSemantics); 658 initialize(&ourSemantics); 659 sign = 0; 660 zeroSignificand(); 661 exponent = ourSemantics.precision - 1; 662 significandParts()[0] = value; 663 normalize(rmNearestTiesToEven, lfExactlyZero); 664 } 665 666 APFloat::APFloat(const fltSemantics &ourSemantics, 667 fltCategory ourCategory, bool negative) 668 { 669 assertArithmeticOK(ourSemantics); 670 initialize(&ourSemantics); 671 category = ourCategory; 672 sign = negative; 673 if(category == fcNormal) 674 category = fcZero; 675 else if (ourCategory == fcNaN) 676 makeNaN(); 677 } 678 679 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) 680 { 681 assertArithmeticOK(ourSemantics); 682 initialize(&ourSemantics); 683 convertFromString(text, rmNearestTiesToEven); 684 } 685 686 APFloat::APFloat(const APFloat &rhs) 687 { 688 initialize(rhs.semantics); 689 assign(rhs); 690 } 691 692 APFloat::~APFloat() 693 { 694 freeSignificand(); 695 } 696 697 // Profile - This method 'profiles' an APFloat for use with FoldingSet. 698 void APFloat::Profile(FoldingSetNodeID& ID) const { 699 ID.Add(bitcastToAPInt()); 700 } 701 702 unsigned int 703 APFloat::partCount() const 704 { 705 return partCountForBits(semantics->precision + 1); 706 } 707 708 unsigned int 709 APFloat::semanticsPrecision(const fltSemantics &semantics) 710 { 711 return semantics.precision; 712 } 713 714 const integerPart * 715 APFloat::significandParts() const 716 { 717 return const_cast<APFloat *>(this)->significandParts(); 718 } 719 720 integerPart * 721 APFloat::significandParts() 722 { 723 assert(category == fcNormal || category == fcNaN); 724 725 if(partCount() > 1) 726 return significand.parts; 727 else 728 return &significand.part; 729 } 730 731 void 732 APFloat::zeroSignificand() 733 { 734 category = fcNormal; 735 APInt::tcSet(significandParts(), 0, partCount()); 736 } 737 738 /* Increment an fcNormal floating point number's significand. */ 739 void 740 APFloat::incrementSignificand() 741 { 742 integerPart carry; 743 744 carry = APInt::tcIncrement(significandParts(), partCount()); 745 746 /* Our callers should never cause us to overflow. */ 747 assert(carry == 0); 748 } 749 750 /* Add the significand of the RHS. Returns the carry flag. */ 751 integerPart 752 APFloat::addSignificand(const APFloat &rhs) 753 { 754 integerPart *parts; 755 756 parts = significandParts(); 757 758 assert(semantics == rhs.semantics); 759 assert(exponent == rhs.exponent); 760 761 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount()); 762 } 763 764 /* Subtract the significand of the RHS with a borrow flag. Returns 765 the borrow flag. */ 766 integerPart 767 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) 768 { 769 integerPart *parts; 770 771 parts = significandParts(); 772 773 assert(semantics == rhs.semantics); 774 assert(exponent == rhs.exponent); 775 776 return APInt::tcSubtract(parts, rhs.significandParts(), borrow, 777 partCount()); 778 } 779 780 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it 781 on to the full-precision result of the multiplication. Returns the 782 lost fraction. */ 783 lostFraction 784 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) 785 { 786 unsigned int omsb; // One, not zero, based MSB. 787 unsigned int partsCount, newPartsCount, precision; 788 integerPart *lhsSignificand; 789 integerPart scratch[4]; 790 integerPart *fullSignificand; 791 lostFraction lost_fraction; 792 bool ignored; 793 794 assert(semantics == rhs.semantics); 795 796 precision = semantics->precision; 797 newPartsCount = partCountForBits(precision * 2); 798 799 if(newPartsCount > 4) 800 fullSignificand = new integerPart[newPartsCount]; 801 else 802 fullSignificand = scratch; 803 804 lhsSignificand = significandParts(); 805 partsCount = partCount(); 806 807 APInt::tcFullMultiply(fullSignificand, lhsSignificand, 808 rhs.significandParts(), partsCount, partsCount); 809 810 lost_fraction = lfExactlyZero; 811 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 812 exponent += rhs.exponent; 813 814 if(addend) { 815 Significand savedSignificand = significand; 816 const fltSemantics *savedSemantics = semantics; 817 fltSemantics extendedSemantics; 818 opStatus status; 819 unsigned int extendedPrecision; 820 821 /* Normalize our MSB. */ 822 extendedPrecision = precision + precision - 1; 823 if(omsb != extendedPrecision) 824 { 825 APInt::tcShiftLeft(fullSignificand, newPartsCount, 826 extendedPrecision - omsb); 827 exponent -= extendedPrecision - omsb; 828 } 829 830 /* Create new semantics. */ 831 extendedSemantics = *semantics; 832 extendedSemantics.precision = extendedPrecision; 833 834 if(newPartsCount == 1) 835 significand.part = fullSignificand[0]; 836 else 837 significand.parts = fullSignificand; 838 semantics = &extendedSemantics; 839 840 APFloat extendedAddend(*addend); 841 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); 842 assert(status == opOK); 843 lost_fraction = addOrSubtractSignificand(extendedAddend, false); 844 845 /* Restore our state. */ 846 if(newPartsCount == 1) 847 fullSignificand[0] = significand.part; 848 significand = savedSignificand; 849 semantics = savedSemantics; 850 851 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; 852 } 853 854 exponent -= (precision - 1); 855 856 if(omsb > precision) { 857 unsigned int bits, significantParts; 858 lostFraction lf; 859 860 bits = omsb - precision; 861 significantParts = partCountForBits(omsb); 862 lf = shiftRight(fullSignificand, significantParts, bits); 863 lost_fraction = combineLostFractions(lf, lost_fraction); 864 exponent += bits; 865 } 866 867 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); 868 869 if(newPartsCount > 4) 870 delete [] fullSignificand; 871 872 return lost_fraction; 873 } 874 875 /* Multiply the significands of LHS and RHS to DST. */ 876 lostFraction 877 APFloat::divideSignificand(const APFloat &rhs) 878 { 879 unsigned int bit, i, partsCount; 880 const integerPart *rhsSignificand; 881 integerPart *lhsSignificand, *dividend, *divisor; 882 integerPart scratch[4]; 883 lostFraction lost_fraction; 884 885 assert(semantics == rhs.semantics); 886 887 lhsSignificand = significandParts(); 888 rhsSignificand = rhs.significandParts(); 889 partsCount = partCount(); 890 891 if(partsCount > 2) 892 dividend = new integerPart[partsCount * 2]; 893 else 894 dividend = scratch; 895 896 divisor = dividend + partsCount; 897 898 /* Copy the dividend and divisor as they will be modified in-place. */ 899 for(i = 0; i < partsCount; i++) { 900 dividend[i] = lhsSignificand[i]; 901 divisor[i] = rhsSignificand[i]; 902 lhsSignificand[i] = 0; 903 } 904 905 exponent -= rhs.exponent; 906 907 unsigned int precision = semantics->precision; 908 909 /* Normalize the divisor. */ 910 bit = precision - APInt::tcMSB(divisor, partsCount) - 1; 911 if(bit) { 912 exponent += bit; 913 APInt::tcShiftLeft(divisor, partsCount, bit); 914 } 915 916 /* Normalize the dividend. */ 917 bit = precision - APInt::tcMSB(dividend, partsCount) - 1; 918 if(bit) { 919 exponent -= bit; 920 APInt::tcShiftLeft(dividend, partsCount, bit); 921 } 922 923 /* Ensure the dividend >= divisor initially for the loop below. 924 Incidentally, this means that the division loop below is 925 guaranteed to set the integer bit to one. */ 926 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { 927 exponent--; 928 APInt::tcShiftLeft(dividend, partsCount, 1); 929 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); 930 } 931 932 /* Long division. */ 933 for(bit = precision; bit; bit -= 1) { 934 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { 935 APInt::tcSubtract(dividend, divisor, 0, partsCount); 936 APInt::tcSetBit(lhsSignificand, bit - 1); 937 } 938 939 APInt::tcShiftLeft(dividend, partsCount, 1); 940 } 941 942 /* Figure out the lost fraction. */ 943 int cmp = APInt::tcCompare(dividend, divisor, partsCount); 944 945 if(cmp > 0) 946 lost_fraction = lfMoreThanHalf; 947 else if(cmp == 0) 948 lost_fraction = lfExactlyHalf; 949 else if(APInt::tcIsZero(dividend, partsCount)) 950 lost_fraction = lfExactlyZero; 951 else 952 lost_fraction = lfLessThanHalf; 953 954 if(partsCount > 2) 955 delete [] dividend; 956 957 return lost_fraction; 958 } 959 960 unsigned int 961 APFloat::significandMSB() const 962 { 963 return APInt::tcMSB(significandParts(), partCount()); 964 } 965 966 unsigned int 967 APFloat::significandLSB() const 968 { 969 return APInt::tcLSB(significandParts(), partCount()); 970 } 971 972 /* Note that a zero result is NOT normalized to fcZero. */ 973 lostFraction 974 APFloat::shiftSignificandRight(unsigned int bits) 975 { 976 /* Our exponent should not overflow. */ 977 assert((exponent_t) (exponent + bits) >= exponent); 978 979 exponent += bits; 980 981 return shiftRight(significandParts(), partCount(), bits); 982 } 983 984 /* Shift the significand left BITS bits, subtract BITS from its exponent. */ 985 void 986 APFloat::shiftSignificandLeft(unsigned int bits) 987 { 988 assert(bits < semantics->precision); 989 990 if(bits) { 991 unsigned int partsCount = partCount(); 992 993 APInt::tcShiftLeft(significandParts(), partsCount, bits); 994 exponent -= bits; 995 996 assert(!APInt::tcIsZero(significandParts(), partsCount)); 997 } 998 } 999 1000 APFloat::cmpResult 1001 APFloat::compareAbsoluteValue(const APFloat &rhs) const 1002 { 1003 int compare; 1004 1005 assert(semantics == rhs.semantics); 1006 assert(category == fcNormal); 1007 assert(rhs.category == fcNormal); 1008 1009 compare = exponent - rhs.exponent; 1010 1011 /* If exponents are equal, do an unsigned bignum comparison of the 1012 significands. */ 1013 if(compare == 0) 1014 compare = APInt::tcCompare(significandParts(), rhs.significandParts(), 1015 partCount()); 1016 1017 if(compare > 0) 1018 return cmpGreaterThan; 1019 else if(compare < 0) 1020 return cmpLessThan; 1021 else 1022 return cmpEqual; 1023 } 1024 1025 /* Handle overflow. Sign is preserved. We either become infinity or 1026 the largest finite number. */ 1027 APFloat::opStatus 1028 APFloat::handleOverflow(roundingMode rounding_mode) 1029 { 1030 /* Infinity? */ 1031 if(rounding_mode == rmNearestTiesToEven 1032 || rounding_mode == rmNearestTiesToAway 1033 || (rounding_mode == rmTowardPositive && !sign) 1034 || (rounding_mode == rmTowardNegative && sign)) 1035 { 1036 category = fcInfinity; 1037 return (opStatus) (opOverflow | opInexact); 1038 } 1039 1040 /* Otherwise we become the largest finite number. */ 1041 category = fcNormal; 1042 exponent = semantics->maxExponent; 1043 APInt::tcSetLeastSignificantBits(significandParts(), partCount(), 1044 semantics->precision); 1045 1046 return opInexact; 1047 } 1048 1049 /* Returns TRUE if, when truncating the current number, with BIT the 1050 new LSB, with the given lost fraction and rounding mode, the result 1051 would need to be rounded away from zero (i.e., by increasing the 1052 signficand). This routine must work for fcZero of both signs, and 1053 fcNormal numbers. */ 1054 bool 1055 APFloat::roundAwayFromZero(roundingMode rounding_mode, 1056 lostFraction lost_fraction, 1057 unsigned int bit) const 1058 { 1059 /* NaNs and infinities should not have lost fractions. */ 1060 assert(category == fcNormal || category == fcZero); 1061 1062 /* Current callers never pass this so we don't handle it. */ 1063 assert(lost_fraction != lfExactlyZero); 1064 1065 switch(rounding_mode) { 1066 default: 1067 assert(0); 1068 1069 case rmNearestTiesToAway: 1070 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; 1071 1072 case rmNearestTiesToEven: 1073 if(lost_fraction == lfMoreThanHalf) 1074 return true; 1075 1076 /* Our zeroes don't have a significand to test. */ 1077 if(lost_fraction == lfExactlyHalf && category != fcZero) 1078 return APInt::tcExtractBit(significandParts(), bit); 1079 1080 return false; 1081 1082 case rmTowardZero: 1083 return false; 1084 1085 case rmTowardPositive: 1086 return sign == false; 1087 1088 case rmTowardNegative: 1089 return sign == true; 1090 } 1091 } 1092 1093 APFloat::opStatus 1094 APFloat::normalize(roundingMode rounding_mode, 1095 lostFraction lost_fraction) 1096 { 1097 unsigned int omsb; /* One, not zero, based MSB. */ 1098 int exponentChange; 1099 1100 if(category != fcNormal) 1101 return opOK; 1102 1103 /* Before rounding normalize the exponent of fcNormal numbers. */ 1104 omsb = significandMSB() + 1; 1105 1106 if(omsb) { 1107 /* OMSB is numbered from 1. We want to place it in the integer 1108 bit numbered PRECISON if possible, with a compensating change in 1109 the exponent. */ 1110 exponentChange = omsb - semantics->precision; 1111 1112 /* If the resulting exponent is too high, overflow according to 1113 the rounding mode. */ 1114 if(exponent + exponentChange > semantics->maxExponent) 1115 return handleOverflow(rounding_mode); 1116 1117 /* Subnormal numbers have exponent minExponent, and their MSB 1118 is forced based on that. */ 1119 if(exponent + exponentChange < semantics->minExponent) 1120 exponentChange = semantics->minExponent - exponent; 1121 1122 /* Shifting left is easy as we don't lose precision. */ 1123 if(exponentChange < 0) { 1124 assert(lost_fraction == lfExactlyZero); 1125 1126 shiftSignificandLeft(-exponentChange); 1127 1128 return opOK; 1129 } 1130 1131 if(exponentChange > 0) { 1132 lostFraction lf; 1133 1134 /* Shift right and capture any new lost fraction. */ 1135 lf = shiftSignificandRight(exponentChange); 1136 1137 lost_fraction = combineLostFractions(lf, lost_fraction); 1138 1139 /* Keep OMSB up-to-date. */ 1140 if(omsb > (unsigned) exponentChange) 1141 omsb -= exponentChange; 1142 else 1143 omsb = 0; 1144 } 1145 } 1146 1147 /* Now round the number according to rounding_mode given the lost 1148 fraction. */ 1149 1150 /* As specified in IEEE 754, since we do not trap we do not report 1151 underflow for exact results. */ 1152 if(lost_fraction == lfExactlyZero) { 1153 /* Canonicalize zeroes. */ 1154 if(omsb == 0) 1155 category = fcZero; 1156 1157 return opOK; 1158 } 1159 1160 /* Increment the significand if we're rounding away from zero. */ 1161 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { 1162 if(omsb == 0) 1163 exponent = semantics->minExponent; 1164 1165 incrementSignificand(); 1166 omsb = significandMSB() + 1; 1167 1168 /* Did the significand increment overflow? */ 1169 if(omsb == (unsigned) semantics->precision + 1) { 1170 /* Renormalize by incrementing the exponent and shifting our 1171 significand right one. However if we already have the 1172 maximum exponent we overflow to infinity. */ 1173 if(exponent == semantics->maxExponent) { 1174 category = fcInfinity; 1175 1176 return (opStatus) (opOverflow | opInexact); 1177 } 1178 1179 shiftSignificandRight(1); 1180 1181 return opInexact; 1182 } 1183 } 1184 1185 /* The normal case - we were and are not denormal, and any 1186 significand increment above didn't overflow. */ 1187 if(omsb == semantics->precision) 1188 return opInexact; 1189 1190 /* We have a non-zero denormal. */ 1191 assert(omsb < semantics->precision); 1192 1193 /* Canonicalize zeroes. */ 1194 if(omsb == 0) 1195 category = fcZero; 1196 1197 /* The fcZero case is a denormal that underflowed to zero. */ 1198 return (opStatus) (opUnderflow | opInexact); 1199 } 1200 1201 APFloat::opStatus 1202 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) 1203 { 1204 switch(convolve(category, rhs.category)) { 1205 default: 1206 assert(0); 1207 1208 case convolve(fcNaN, fcZero): 1209 case convolve(fcNaN, fcNormal): 1210 case convolve(fcNaN, fcInfinity): 1211 case convolve(fcNaN, fcNaN): 1212 case convolve(fcNormal, fcZero): 1213 case convolve(fcInfinity, fcNormal): 1214 case convolve(fcInfinity, fcZero): 1215 return opOK; 1216 1217 case convolve(fcZero, fcNaN): 1218 case convolve(fcNormal, fcNaN): 1219 case convolve(fcInfinity, fcNaN): 1220 category = fcNaN; 1221 copySignificand(rhs); 1222 return opOK; 1223 1224 case convolve(fcNormal, fcInfinity): 1225 case convolve(fcZero, fcInfinity): 1226 category = fcInfinity; 1227 sign = rhs.sign ^ subtract; 1228 return opOK; 1229 1230 case convolve(fcZero, fcNormal): 1231 assign(rhs); 1232 sign = rhs.sign ^ subtract; 1233 return opOK; 1234 1235 case convolve(fcZero, fcZero): 1236 /* Sign depends on rounding mode; handled by caller. */ 1237 return opOK; 1238 1239 case convolve(fcInfinity, fcInfinity): 1240 /* Differently signed infinities can only be validly 1241 subtracted. */ 1242 if(((sign ^ rhs.sign)!=0) != subtract) { 1243 makeNaN(); 1244 return opInvalidOp; 1245 } 1246 1247 return opOK; 1248 1249 case convolve(fcNormal, fcNormal): 1250 return opDivByZero; 1251 } 1252 } 1253 1254 /* Add or subtract two normal numbers. */ 1255 lostFraction 1256 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) 1257 { 1258 integerPart carry; 1259 lostFraction lost_fraction; 1260 int bits; 1261 1262 /* Determine if the operation on the absolute values is effectively 1263 an addition or subtraction. */ 1264 subtract ^= (sign ^ rhs.sign) ? true : false; 1265 1266 /* Are we bigger exponent-wise than the RHS? */ 1267 bits = exponent - rhs.exponent; 1268 1269 /* Subtraction is more subtle than one might naively expect. */ 1270 if(subtract) { 1271 APFloat temp_rhs(rhs); 1272 bool reverse; 1273 1274 if (bits == 0) { 1275 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan; 1276 lost_fraction = lfExactlyZero; 1277 } else if (bits > 0) { 1278 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1); 1279 shiftSignificandLeft(1); 1280 reverse = false; 1281 } else { 1282 lost_fraction = shiftSignificandRight(-bits - 1); 1283 temp_rhs.shiftSignificandLeft(1); 1284 reverse = true; 1285 } 1286 1287 if (reverse) { 1288 carry = temp_rhs.subtractSignificand 1289 (*this, lost_fraction != lfExactlyZero); 1290 copySignificand(temp_rhs); 1291 sign = !sign; 1292 } else { 1293 carry = subtractSignificand 1294 (temp_rhs, lost_fraction != lfExactlyZero); 1295 } 1296 1297 /* Invert the lost fraction - it was on the RHS and 1298 subtracted. */ 1299 if(lost_fraction == lfLessThanHalf) 1300 lost_fraction = lfMoreThanHalf; 1301 else if(lost_fraction == lfMoreThanHalf) 1302 lost_fraction = lfLessThanHalf; 1303 1304 /* The code above is intended to ensure that no borrow is 1305 necessary. */ 1306 assert(!carry); 1307 } else { 1308 if(bits > 0) { 1309 APFloat temp_rhs(rhs); 1310 1311 lost_fraction = temp_rhs.shiftSignificandRight(bits); 1312 carry = addSignificand(temp_rhs); 1313 } else { 1314 lost_fraction = shiftSignificandRight(-bits); 1315 carry = addSignificand(rhs); 1316 } 1317 1318 /* We have a guard bit; generating a carry cannot happen. */ 1319 assert(!carry); 1320 } 1321 1322 return lost_fraction; 1323 } 1324 1325 APFloat::opStatus 1326 APFloat::multiplySpecials(const APFloat &rhs) 1327 { 1328 switch(convolve(category, rhs.category)) { 1329 default: 1330 assert(0); 1331 1332 case convolve(fcNaN, fcZero): 1333 case convolve(fcNaN, fcNormal): 1334 case convolve(fcNaN, fcInfinity): 1335 case convolve(fcNaN, fcNaN): 1336 return opOK; 1337 1338 case convolve(fcZero, fcNaN): 1339 case convolve(fcNormal, fcNaN): 1340 case convolve(fcInfinity, fcNaN): 1341 category = fcNaN; 1342 copySignificand(rhs); 1343 return opOK; 1344 1345 case convolve(fcNormal, fcInfinity): 1346 case convolve(fcInfinity, fcNormal): 1347 case convolve(fcInfinity, fcInfinity): 1348 category = fcInfinity; 1349 return opOK; 1350 1351 case convolve(fcZero, fcNormal): 1352 case convolve(fcNormal, fcZero): 1353 case convolve(fcZero, fcZero): 1354 category = fcZero; 1355 return opOK; 1356 1357 case convolve(fcZero, fcInfinity): 1358 case convolve(fcInfinity, fcZero): 1359 makeNaN(); 1360 return opInvalidOp; 1361 1362 case convolve(fcNormal, fcNormal): 1363 return opOK; 1364 } 1365 } 1366 1367 APFloat::opStatus 1368 APFloat::divideSpecials(const APFloat &rhs) 1369 { 1370 switch(convolve(category, rhs.category)) { 1371 default: 1372 assert(0); 1373 1374 case convolve(fcNaN, fcZero): 1375 case convolve(fcNaN, fcNormal): 1376 case convolve(fcNaN, fcInfinity): 1377 case convolve(fcNaN, fcNaN): 1378 case convolve(fcInfinity, fcZero): 1379 case convolve(fcInfinity, fcNormal): 1380 case convolve(fcZero, fcInfinity): 1381 case convolve(fcZero, fcNormal): 1382 return opOK; 1383 1384 case convolve(fcZero, fcNaN): 1385 case convolve(fcNormal, fcNaN): 1386 case convolve(fcInfinity, fcNaN): 1387 category = fcNaN; 1388 copySignificand(rhs); 1389 return opOK; 1390 1391 case convolve(fcNormal, fcInfinity): 1392 category = fcZero; 1393 return opOK; 1394 1395 case convolve(fcNormal, fcZero): 1396 category = fcInfinity; 1397 return opDivByZero; 1398 1399 case convolve(fcInfinity, fcInfinity): 1400 case convolve(fcZero, fcZero): 1401 makeNaN(); 1402 return opInvalidOp; 1403 1404 case convolve(fcNormal, fcNormal): 1405 return opOK; 1406 } 1407 } 1408 1409 APFloat::opStatus 1410 APFloat::modSpecials(const APFloat &rhs) 1411 { 1412 switch(convolve(category, rhs.category)) { 1413 default: 1414 assert(0); 1415 1416 case convolve(fcNaN, fcZero): 1417 case convolve(fcNaN, fcNormal): 1418 case convolve(fcNaN, fcInfinity): 1419 case convolve(fcNaN, fcNaN): 1420 case convolve(fcZero, fcInfinity): 1421 case convolve(fcZero, fcNormal): 1422 case convolve(fcNormal, fcInfinity): 1423 return opOK; 1424 1425 case convolve(fcZero, fcNaN): 1426 case convolve(fcNormal, fcNaN): 1427 case convolve(fcInfinity, fcNaN): 1428 category = fcNaN; 1429 copySignificand(rhs); 1430 return opOK; 1431 1432 case convolve(fcNormal, fcZero): 1433 case convolve(fcInfinity, fcZero): 1434 case convolve(fcInfinity, fcNormal): 1435 case convolve(fcInfinity, fcInfinity): 1436 case convolve(fcZero, fcZero): 1437 makeNaN(); 1438 return opInvalidOp; 1439 1440 case convolve(fcNormal, fcNormal): 1441 return opOK; 1442 } 1443 } 1444 1445 /* Change sign. */ 1446 void 1447 APFloat::changeSign() 1448 { 1449 /* Look mummy, this one's easy. */ 1450 sign = !sign; 1451 } 1452 1453 void 1454 APFloat::clearSign() 1455 { 1456 /* So is this one. */ 1457 sign = 0; 1458 } 1459 1460 void 1461 APFloat::copySign(const APFloat &rhs) 1462 { 1463 /* And this one. */ 1464 sign = rhs.sign; 1465 } 1466 1467 /* Normalized addition or subtraction. */ 1468 APFloat::opStatus 1469 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, 1470 bool subtract) 1471 { 1472 opStatus fs; 1473 1474 assertArithmeticOK(*semantics); 1475 1476 fs = addOrSubtractSpecials(rhs, subtract); 1477 1478 /* This return code means it was not a simple case. */ 1479 if(fs == opDivByZero) { 1480 lostFraction lost_fraction; 1481 1482 lost_fraction = addOrSubtractSignificand(rhs, subtract); 1483 fs = normalize(rounding_mode, lost_fraction); 1484 1485 /* Can only be zero if we lost no fraction. */ 1486 assert(category != fcZero || lost_fraction == lfExactlyZero); 1487 } 1488 1489 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1490 positive zero unless rounding to minus infinity, except that 1491 adding two like-signed zeroes gives that zero. */ 1492 if(category == fcZero) { 1493 if(rhs.category != fcZero || (sign == rhs.sign) == subtract) 1494 sign = (rounding_mode == rmTowardNegative); 1495 } 1496 1497 return fs; 1498 } 1499 1500 /* Normalized addition. */ 1501 APFloat::opStatus 1502 APFloat::add(const APFloat &rhs, roundingMode rounding_mode) 1503 { 1504 return addOrSubtract(rhs, rounding_mode, false); 1505 } 1506 1507 /* Normalized subtraction. */ 1508 APFloat::opStatus 1509 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode) 1510 { 1511 return addOrSubtract(rhs, rounding_mode, true); 1512 } 1513 1514 /* Normalized multiply. */ 1515 APFloat::opStatus 1516 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) 1517 { 1518 opStatus fs; 1519 1520 assertArithmeticOK(*semantics); 1521 sign ^= rhs.sign; 1522 fs = multiplySpecials(rhs); 1523 1524 if(category == fcNormal) { 1525 lostFraction lost_fraction = multiplySignificand(rhs, 0); 1526 fs = normalize(rounding_mode, lost_fraction); 1527 if(lost_fraction != lfExactlyZero) 1528 fs = (opStatus) (fs | opInexact); 1529 } 1530 1531 return fs; 1532 } 1533 1534 /* Normalized divide. */ 1535 APFloat::opStatus 1536 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) 1537 { 1538 opStatus fs; 1539 1540 assertArithmeticOK(*semantics); 1541 sign ^= rhs.sign; 1542 fs = divideSpecials(rhs); 1543 1544 if(category == fcNormal) { 1545 lostFraction lost_fraction = divideSignificand(rhs); 1546 fs = normalize(rounding_mode, lost_fraction); 1547 if(lost_fraction != lfExactlyZero) 1548 fs = (opStatus) (fs | opInexact); 1549 } 1550 1551 return fs; 1552 } 1553 1554 /* Normalized remainder. This is not currently correct in all cases. */ 1555 APFloat::opStatus 1556 APFloat::remainder(const APFloat &rhs) 1557 { 1558 opStatus fs; 1559 APFloat V = *this; 1560 unsigned int origSign = sign; 1561 1562 assertArithmeticOK(*semantics); 1563 fs = V.divide(rhs, rmNearestTiesToEven); 1564 if (fs == opDivByZero) 1565 return fs; 1566 1567 int parts = partCount(); 1568 integerPart *x = new integerPart[parts]; 1569 bool ignored; 1570 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1571 rmNearestTiesToEven, &ignored); 1572 if (fs==opInvalidOp) 1573 return fs; 1574 1575 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1576 rmNearestTiesToEven); 1577 assert(fs==opOK); // should always work 1578 1579 fs = V.multiply(rhs, rmNearestTiesToEven); 1580 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1581 1582 fs = subtract(V, rmNearestTiesToEven); 1583 assert(fs==opOK || fs==opInexact); // likewise 1584 1585 if (isZero()) 1586 sign = origSign; // IEEE754 requires this 1587 delete[] x; 1588 return fs; 1589 } 1590 1591 /* Normalized llvm frem (C fmod). 1592 This is not currently correct in all cases. */ 1593 APFloat::opStatus 1594 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) 1595 { 1596 opStatus fs; 1597 assertArithmeticOK(*semantics); 1598 fs = modSpecials(rhs); 1599 1600 if (category == fcNormal && rhs.category == fcNormal) { 1601 APFloat V = *this; 1602 unsigned int origSign = sign; 1603 1604 fs = V.divide(rhs, rmNearestTiesToEven); 1605 if (fs == opDivByZero) 1606 return fs; 1607 1608 int parts = partCount(); 1609 integerPart *x = new integerPart[parts]; 1610 bool ignored; 1611 fs = V.convertToInteger(x, parts * integerPartWidth, true, 1612 rmTowardZero, &ignored); 1613 if (fs==opInvalidOp) 1614 return fs; 1615 1616 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, 1617 rmNearestTiesToEven); 1618 assert(fs==opOK); // should always work 1619 1620 fs = V.multiply(rhs, rounding_mode); 1621 assert(fs==opOK || fs==opInexact); // should not overflow or underflow 1622 1623 fs = subtract(V, rounding_mode); 1624 assert(fs==opOK || fs==opInexact); // likewise 1625 1626 if (isZero()) 1627 sign = origSign; // IEEE754 requires this 1628 delete[] x; 1629 } 1630 return fs; 1631 } 1632 1633 /* Normalized fused-multiply-add. */ 1634 APFloat::opStatus 1635 APFloat::fusedMultiplyAdd(const APFloat &multiplicand, 1636 const APFloat &addend, 1637 roundingMode rounding_mode) 1638 { 1639 opStatus fs; 1640 1641 assertArithmeticOK(*semantics); 1642 1643 /* Post-multiplication sign, before addition. */ 1644 sign ^= multiplicand.sign; 1645 1646 /* If and only if all arguments are normal do we need to do an 1647 extended-precision calculation. */ 1648 if(category == fcNormal 1649 && multiplicand.category == fcNormal 1650 && addend.category == fcNormal) { 1651 lostFraction lost_fraction; 1652 1653 lost_fraction = multiplySignificand(multiplicand, &addend); 1654 fs = normalize(rounding_mode, lost_fraction); 1655 if(lost_fraction != lfExactlyZero) 1656 fs = (opStatus) (fs | opInexact); 1657 1658 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a 1659 positive zero unless rounding to minus infinity, except that 1660 adding two like-signed zeroes gives that zero. */ 1661 if(category == fcZero && sign != addend.sign) 1662 sign = (rounding_mode == rmTowardNegative); 1663 } else { 1664 fs = multiplySpecials(multiplicand); 1665 1666 /* FS can only be opOK or opInvalidOp. There is no more work 1667 to do in the latter case. The IEEE-754R standard says it is 1668 implementation-defined in this case whether, if ADDEND is a 1669 quiet NaN, we raise invalid op; this implementation does so. 1670 1671 If we need to do the addition we can do so with normal 1672 precision. */ 1673 if(fs == opOK) 1674 fs = addOrSubtract(addend, rounding_mode, false); 1675 } 1676 1677 return fs; 1678 } 1679 1680 /* Comparison requires normalized numbers. */ 1681 APFloat::cmpResult 1682 APFloat::compare(const APFloat &rhs) const 1683 { 1684 cmpResult result; 1685 1686 assertArithmeticOK(*semantics); 1687 assert(semantics == rhs.semantics); 1688 1689 switch(convolve(category, rhs.category)) { 1690 default: 1691 assert(0); 1692 1693 case convolve(fcNaN, fcZero): 1694 case convolve(fcNaN, fcNormal): 1695 case convolve(fcNaN, fcInfinity): 1696 case convolve(fcNaN, fcNaN): 1697 case convolve(fcZero, fcNaN): 1698 case convolve(fcNormal, fcNaN): 1699 case convolve(fcInfinity, fcNaN): 1700 return cmpUnordered; 1701 1702 case convolve(fcInfinity, fcNormal): 1703 case convolve(fcInfinity, fcZero): 1704 case convolve(fcNormal, fcZero): 1705 if(sign) 1706 return cmpLessThan; 1707 else 1708 return cmpGreaterThan; 1709 1710 case convolve(fcNormal, fcInfinity): 1711 case convolve(fcZero, fcInfinity): 1712 case convolve(fcZero, fcNormal): 1713 if(rhs.sign) 1714 return cmpGreaterThan; 1715 else 1716 return cmpLessThan; 1717 1718 case convolve(fcInfinity, fcInfinity): 1719 if(sign == rhs.sign) 1720 return cmpEqual; 1721 else if(sign) 1722 return cmpLessThan; 1723 else 1724 return cmpGreaterThan; 1725 1726 case convolve(fcZero, fcZero): 1727 return cmpEqual; 1728 1729 case convolve(fcNormal, fcNormal): 1730 break; 1731 } 1732 1733 /* Two normal numbers. Do they have the same sign? */ 1734 if(sign != rhs.sign) { 1735 if(sign) 1736 result = cmpLessThan; 1737 else 1738 result = cmpGreaterThan; 1739 } else { 1740 /* Compare absolute values; invert result if negative. */ 1741 result = compareAbsoluteValue(rhs); 1742 1743 if(sign) { 1744 if(result == cmpLessThan) 1745 result = cmpGreaterThan; 1746 else if(result == cmpGreaterThan) 1747 result = cmpLessThan; 1748 } 1749 } 1750 1751 return result; 1752 } 1753 1754 /// APFloat::convert - convert a value of one floating point type to another. 1755 /// The return value corresponds to the IEEE754 exceptions. *losesInfo 1756 /// records whether the transformation lost information, i.e. whether 1757 /// converting the result back to the original type will produce the 1758 /// original value (this is almost the same as return value==fsOK, but there 1759 /// are edge cases where this is not so). 1760 1761 APFloat::opStatus 1762 APFloat::convert(const fltSemantics &toSemantics, 1763 roundingMode rounding_mode, bool *losesInfo) 1764 { 1765 lostFraction lostFraction; 1766 unsigned int newPartCount, oldPartCount; 1767 opStatus fs; 1768 1769 assertArithmeticOK(*semantics); 1770 assertArithmeticOK(toSemantics); 1771 lostFraction = lfExactlyZero; 1772 newPartCount = partCountForBits(toSemantics.precision + 1); 1773 oldPartCount = partCount(); 1774 1775 /* Handle storage complications. If our new form is wider, 1776 re-allocate our bit pattern into wider storage. If it is 1777 narrower, we ignore the excess parts, but if narrowing to a 1778 single part we need to free the old storage. 1779 Be careful not to reference significandParts for zeroes 1780 and infinities, since it aborts. */ 1781 if (newPartCount > oldPartCount) { 1782 integerPart *newParts; 1783 newParts = new integerPart[newPartCount]; 1784 APInt::tcSet(newParts, 0, newPartCount); 1785 if (category==fcNormal || category==fcNaN) 1786 APInt::tcAssign(newParts, significandParts(), oldPartCount); 1787 freeSignificand(); 1788 significand.parts = newParts; 1789 } else if (newPartCount < oldPartCount) { 1790 /* Capture any lost fraction through truncation of parts so we get 1791 correct rounding whilst normalizing. */ 1792 if (category==fcNormal) 1793 lostFraction = lostFractionThroughTruncation 1794 (significandParts(), oldPartCount, toSemantics.precision); 1795 if (newPartCount == 1) { 1796 integerPart newPart = 0; 1797 if (category==fcNormal || category==fcNaN) 1798 newPart = significandParts()[0]; 1799 freeSignificand(); 1800 significand.part = newPart; 1801 } 1802 } 1803 1804 if(category == fcNormal) { 1805 /* Re-interpret our bit-pattern. */ 1806 exponent += toSemantics.precision - semantics->precision; 1807 semantics = &toSemantics; 1808 fs = normalize(rounding_mode, lostFraction); 1809 *losesInfo = (fs != opOK); 1810 } else if (category == fcNaN) { 1811 int shift = toSemantics.precision - semantics->precision; 1812 // Do this now so significandParts gets the right answer 1813 const fltSemantics *oldSemantics = semantics; 1814 semantics = &toSemantics; 1815 *losesInfo = false; 1816 // No normalization here, just truncate 1817 if (shift>0) 1818 APInt::tcShiftLeft(significandParts(), newPartCount, shift); 1819 else if (shift < 0) { 1820 unsigned ushift = -shift; 1821 // Figure out if we are losing information. This happens 1822 // if are shifting out something other than 0s, or if the x87 long 1823 // double input did not have its integer bit set (pseudo-NaN), or if the 1824 // x87 long double input did not have its QNan bit set (because the x87 1825 // hardware sets this bit when converting a lower-precision NaN to 1826 // x87 long double). 1827 if (APInt::tcLSB(significandParts(), newPartCount) < ushift) 1828 *losesInfo = true; 1829 if (oldSemantics == &APFloat::x87DoubleExtended && 1830 (!(*significandParts() & 0x8000000000000000ULL) || 1831 !(*significandParts() & 0x4000000000000000ULL))) 1832 *losesInfo = true; 1833 APInt::tcShiftRight(significandParts(), newPartCount, ushift); 1834 } 1835 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) 1836 // does not give you back the same bits. This is dubious, and we 1837 // don't currently do it. You're really supposed to get 1838 // an invalid operation signal at runtime, but nobody does that. 1839 fs = opOK; 1840 } else { 1841 semantics = &toSemantics; 1842 fs = opOK; 1843 *losesInfo = false; 1844 } 1845 1846 return fs; 1847 } 1848 1849 /* Convert a floating point number to an integer according to the 1850 rounding mode. If the rounded integer value is out of range this 1851 returns an invalid operation exception and the contents of the 1852 destination parts are unspecified. If the rounded value is in 1853 range but the floating point number is not the exact integer, the C 1854 standard doesn't require an inexact exception to be raised. IEEE 1855 854 does require it so we do that. 1856 1857 Note that for conversions to integer type the C standard requires 1858 round-to-zero to always be used. */ 1859 APFloat::opStatus 1860 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, 1861 bool isSigned, 1862 roundingMode rounding_mode, 1863 bool *isExact) const 1864 { 1865 lostFraction lost_fraction; 1866 const integerPart *src; 1867 unsigned int dstPartsCount, truncatedBits; 1868 1869 assertArithmeticOK(*semantics); 1870 1871 *isExact = false; 1872 1873 /* Handle the three special cases first. */ 1874 if(category == fcInfinity || category == fcNaN) 1875 return opInvalidOp; 1876 1877 dstPartsCount = partCountForBits(width); 1878 1879 if(category == fcZero) { 1880 APInt::tcSet(parts, 0, dstPartsCount); 1881 // Negative zero can't be represented as an int. 1882 *isExact = !sign; 1883 return opOK; 1884 } 1885 1886 src = significandParts(); 1887 1888 /* Step 1: place our absolute value, with any fraction truncated, in 1889 the destination. */ 1890 if (exponent < 0) { 1891 /* Our absolute value is less than one; truncate everything. */ 1892 APInt::tcSet(parts, 0, dstPartsCount); 1893 /* For exponent -1 the integer bit represents .5, look at that. 1894 For smaller exponents leftmost truncated bit is 0. */ 1895 truncatedBits = semantics->precision -1U - exponent; 1896 } else { 1897 /* We want the most significant (exponent + 1) bits; the rest are 1898 truncated. */ 1899 unsigned int bits = exponent + 1U; 1900 1901 /* Hopelessly large in magnitude? */ 1902 if (bits > width) 1903 return opInvalidOp; 1904 1905 if (bits < semantics->precision) { 1906 /* We truncate (semantics->precision - bits) bits. */ 1907 truncatedBits = semantics->precision - bits; 1908 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); 1909 } else { 1910 /* We want at least as many bits as are available. */ 1911 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); 1912 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); 1913 truncatedBits = 0; 1914 } 1915 } 1916 1917 /* Step 2: work out any lost fraction, and increment the absolute 1918 value if we would round away from zero. */ 1919 if (truncatedBits) { 1920 lost_fraction = lostFractionThroughTruncation(src, partCount(), 1921 truncatedBits); 1922 if (lost_fraction != lfExactlyZero 1923 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { 1924 if (APInt::tcIncrement(parts, dstPartsCount)) 1925 return opInvalidOp; /* Overflow. */ 1926 } 1927 } else { 1928 lost_fraction = lfExactlyZero; 1929 } 1930 1931 /* Step 3: check if we fit in the destination. */ 1932 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; 1933 1934 if (sign) { 1935 if (!isSigned) { 1936 /* Negative numbers cannot be represented as unsigned. */ 1937 if (omsb != 0) 1938 return opInvalidOp; 1939 } else { 1940 /* It takes omsb bits to represent the unsigned integer value. 1941 We lose a bit for the sign, but care is needed as the 1942 maximally negative integer is a special case. */ 1943 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) 1944 return opInvalidOp; 1945 1946 /* This case can happen because of rounding. */ 1947 if (omsb > width) 1948 return opInvalidOp; 1949 } 1950 1951 APInt::tcNegate (parts, dstPartsCount); 1952 } else { 1953 if (omsb >= width + !isSigned) 1954 return opInvalidOp; 1955 } 1956 1957 if (lost_fraction == lfExactlyZero) { 1958 *isExact = true; 1959 return opOK; 1960 } else 1961 return opInexact; 1962 } 1963 1964 /* Same as convertToSignExtendedInteger, except we provide 1965 deterministic values in case of an invalid operation exception, 1966 namely zero for NaNs and the minimal or maximal value respectively 1967 for underflow or overflow. 1968 The *isExact output tells whether the result is exact, in the sense 1969 that converting it back to the original floating point type produces 1970 the original value. This is almost equivalent to result==opOK, 1971 except for negative zeroes. 1972 */ 1973 APFloat::opStatus 1974 APFloat::convertToInteger(integerPart *parts, unsigned int width, 1975 bool isSigned, 1976 roundingMode rounding_mode, bool *isExact) const 1977 { 1978 opStatus fs; 1979 1980 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, 1981 isExact); 1982 1983 if (fs == opInvalidOp) { 1984 unsigned int bits, dstPartsCount; 1985 1986 dstPartsCount = partCountForBits(width); 1987 1988 if (category == fcNaN) 1989 bits = 0; 1990 else if (sign) 1991 bits = isSigned; 1992 else 1993 bits = width - isSigned; 1994 1995 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); 1996 if (sign && isSigned) 1997 APInt::tcShiftLeft(parts, dstPartsCount, width - 1); 1998 } 1999 2000 return fs; 2001 } 2002 2003 /* Convert an unsigned integer SRC to a floating point number, 2004 rounding according to ROUNDING_MODE. The sign of the floating 2005 point number is not modified. */ 2006 APFloat::opStatus 2007 APFloat::convertFromUnsignedParts(const integerPart *src, 2008 unsigned int srcCount, 2009 roundingMode rounding_mode) 2010 { 2011 unsigned int omsb, precision, dstCount; 2012 integerPart *dst; 2013 lostFraction lost_fraction; 2014 2015 assertArithmeticOK(*semantics); 2016 category = fcNormal; 2017 omsb = APInt::tcMSB(src, srcCount) + 1; 2018 dst = significandParts(); 2019 dstCount = partCount(); 2020 precision = semantics->precision; 2021 2022 /* We want the most significant PRECISON bits of SRC. There may not 2023 be that many; extract what we can. */ 2024 if (precision <= omsb) { 2025 exponent = omsb - 1; 2026 lost_fraction = lostFractionThroughTruncation(src, srcCount, 2027 omsb - precision); 2028 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); 2029 } else { 2030 exponent = precision - 1; 2031 lost_fraction = lfExactlyZero; 2032 APInt::tcExtract(dst, dstCount, src, omsb, 0); 2033 } 2034 2035 return normalize(rounding_mode, lost_fraction); 2036 } 2037 2038 APFloat::opStatus 2039 APFloat::convertFromAPInt(const APInt &Val, 2040 bool isSigned, 2041 roundingMode rounding_mode) 2042 { 2043 unsigned int partCount = Val.getNumWords(); 2044 APInt api = Val; 2045 2046 sign = false; 2047 if (isSigned && api.isNegative()) { 2048 sign = true; 2049 api = -api; 2050 } 2051 2052 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2053 } 2054 2055 /* Convert a two's complement integer SRC to a floating point number, 2056 rounding according to ROUNDING_MODE. ISSIGNED is true if the 2057 integer is signed, in which case it must be sign-extended. */ 2058 APFloat::opStatus 2059 APFloat::convertFromSignExtendedInteger(const integerPart *src, 2060 unsigned int srcCount, 2061 bool isSigned, 2062 roundingMode rounding_mode) 2063 { 2064 opStatus status; 2065 2066 assertArithmeticOK(*semantics); 2067 if (isSigned 2068 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { 2069 integerPart *copy; 2070 2071 /* If we're signed and negative negate a copy. */ 2072 sign = true; 2073 copy = new integerPart[srcCount]; 2074 APInt::tcAssign(copy, src, srcCount); 2075 APInt::tcNegate(copy, srcCount); 2076 status = convertFromUnsignedParts(copy, srcCount, rounding_mode); 2077 delete [] copy; 2078 } else { 2079 sign = false; 2080 status = convertFromUnsignedParts(src, srcCount, rounding_mode); 2081 } 2082 2083 return status; 2084 } 2085 2086 /* FIXME: should this just take a const APInt reference? */ 2087 APFloat::opStatus 2088 APFloat::convertFromZeroExtendedInteger(const integerPart *parts, 2089 unsigned int width, bool isSigned, 2090 roundingMode rounding_mode) 2091 { 2092 unsigned int partCount = partCountForBits(width); 2093 APInt api = APInt(width, partCount, parts); 2094 2095 sign = false; 2096 if(isSigned && APInt::tcExtractBit(parts, width - 1)) { 2097 sign = true; 2098 api = -api; 2099 } 2100 2101 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); 2102 } 2103 2104 APFloat::opStatus 2105 APFloat::convertFromHexadecimalString(const char *p, 2106 roundingMode rounding_mode) 2107 { 2108 lostFraction lost_fraction; 2109 integerPart *significand; 2110 unsigned int bitPos, partsCount; 2111 const char *dot, *firstSignificantDigit; 2112 2113 zeroSignificand(); 2114 exponent = 0; 2115 category = fcNormal; 2116 2117 significand = significandParts(); 2118 partsCount = partCount(); 2119 bitPos = partsCount * integerPartWidth; 2120 2121 /* Skip leading zeroes and any (hexa)decimal point. */ 2122 p = skipLeadingZeroesAndAnyDot(p, &dot); 2123 firstSignificantDigit = p; 2124 2125 for(;;) { 2126 integerPart hex_value; 2127 2128 if(*p == '.') { 2129 assert(dot == 0); 2130 dot = p++; 2131 } 2132 2133 hex_value = hexDigitValue(*p); 2134 if(hex_value == -1U) { 2135 lost_fraction = lfExactlyZero; 2136 break; 2137 } 2138 2139 p++; 2140 2141 /* Store the number whilst 4-bit nibbles remain. */ 2142 if(bitPos) { 2143 bitPos -= 4; 2144 hex_value <<= bitPos % integerPartWidth; 2145 significand[bitPos / integerPartWidth] |= hex_value; 2146 } else { 2147 lost_fraction = trailingHexadecimalFraction(p, hex_value); 2148 while(hexDigitValue(*p) != -1U) 2149 p++; 2150 break; 2151 } 2152 } 2153 2154 /* Hex floats require an exponent but not a hexadecimal point. */ 2155 assert(*p == 'p' || *p == 'P'); 2156 2157 /* Ignore the exponent if we are zero. */ 2158 if(p != firstSignificantDigit) { 2159 int expAdjustment; 2160 2161 /* Implicit hexadecimal point? */ 2162 if(!dot) 2163 dot = p; 2164 2165 /* Calculate the exponent adjustment implicit in the number of 2166 significant digits. */ 2167 expAdjustment = static_cast<int>(dot - firstSignificantDigit); 2168 if(expAdjustment < 0) 2169 expAdjustment++; 2170 expAdjustment = expAdjustment * 4 - 1; 2171 2172 /* Adjust for writing the significand starting at the most 2173 significant nibble. */ 2174 expAdjustment += semantics->precision; 2175 expAdjustment -= partsCount * integerPartWidth; 2176 2177 /* Adjust for the given exponent. */ 2178 exponent = totalExponent(p, expAdjustment); 2179 } 2180 2181 return normalize(rounding_mode, lost_fraction); 2182 } 2183 2184 APFloat::opStatus 2185 APFloat::roundSignificandWithExponent(const integerPart *decSigParts, 2186 unsigned sigPartCount, int exp, 2187 roundingMode rounding_mode) 2188 { 2189 unsigned int parts, pow5PartCount; 2190 fltSemantics calcSemantics = { 32767, -32767, 0, true }; 2191 integerPart pow5Parts[maxPowerOfFiveParts]; 2192 bool isNearest; 2193 2194 isNearest = (rounding_mode == rmNearestTiesToEven 2195 || rounding_mode == rmNearestTiesToAway); 2196 2197 parts = partCountForBits(semantics->precision + 11); 2198 2199 /* Calculate pow(5, abs(exp)). */ 2200 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); 2201 2202 for (;; parts *= 2) { 2203 opStatus sigStatus, powStatus; 2204 unsigned int excessPrecision, truncatedBits; 2205 2206 calcSemantics.precision = parts * integerPartWidth - 1; 2207 excessPrecision = calcSemantics.precision - semantics->precision; 2208 truncatedBits = excessPrecision; 2209 2210 APFloat decSig(calcSemantics, fcZero, sign); 2211 APFloat pow5(calcSemantics, fcZero, false); 2212 2213 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, 2214 rmNearestTiesToEven); 2215 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, 2216 rmNearestTiesToEven); 2217 /* Add exp, as 10^n = 5^n * 2^n. */ 2218 decSig.exponent += exp; 2219 2220 lostFraction calcLostFraction; 2221 integerPart HUerr, HUdistance; 2222 unsigned int powHUerr; 2223 2224 if (exp >= 0) { 2225 /* multiplySignificand leaves the precision-th bit set to 1. */ 2226 calcLostFraction = decSig.multiplySignificand(pow5, NULL); 2227 powHUerr = powStatus != opOK; 2228 } else { 2229 calcLostFraction = decSig.divideSignificand(pow5); 2230 /* Denormal numbers have less precision. */ 2231 if (decSig.exponent < semantics->minExponent) { 2232 excessPrecision += (semantics->minExponent - decSig.exponent); 2233 truncatedBits = excessPrecision; 2234 if (excessPrecision > calcSemantics.precision) 2235 excessPrecision = calcSemantics.precision; 2236 } 2237 /* Extra half-ulp lost in reciprocal of exponent. */ 2238 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2; 2239 } 2240 2241 /* Both multiplySignificand and divideSignificand return the 2242 result with the integer bit set. */ 2243 assert (APInt::tcExtractBit 2244 (decSig.significandParts(), calcSemantics.precision - 1) == 1); 2245 2246 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, 2247 powHUerr); 2248 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), 2249 excessPrecision, isNearest); 2250 2251 /* Are we guaranteed to round correctly if we truncate? */ 2252 if (HUdistance >= HUerr) { 2253 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), 2254 calcSemantics.precision - excessPrecision, 2255 excessPrecision); 2256 /* Take the exponent of decSig. If we tcExtract-ed less bits 2257 above we must adjust our exponent to compensate for the 2258 implicit right shift. */ 2259 exponent = (decSig.exponent + semantics->precision 2260 - (calcSemantics.precision - excessPrecision)); 2261 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), 2262 decSig.partCount(), 2263 truncatedBits); 2264 return normalize(rounding_mode, calcLostFraction); 2265 } 2266 } 2267 } 2268 2269 APFloat::opStatus 2270 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) 2271 { 2272 decimalInfo D; 2273 opStatus fs; 2274 2275 /* Scan the text. */ 2276 interpretDecimal(p, &D); 2277 2278 /* Handle the quick cases. First the case of no significant digits, 2279 i.e. zero, and then exponents that are obviously too large or too 2280 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp 2281 definitely overflows if 2282 2283 (exp - 1) * L >= maxExponent 2284 2285 and definitely underflows to zero where 2286 2287 (exp + 1) * L <= minExponent - precision 2288 2289 With integer arithmetic the tightest bounds for L are 2290 2291 93/28 < L < 196/59 [ numerator <= 256 ] 2292 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] 2293 */ 2294 2295 if (decDigitValue(*D.firstSigDigit) >= 10U) { 2296 category = fcZero; 2297 fs = opOK; 2298 } else if ((D.normalizedExponent + 1) * 28738 2299 <= 8651 * (semantics->minExponent - (int) semantics->precision)) { 2300 /* Underflow to zero and round. */ 2301 zeroSignificand(); 2302 fs = normalize(rounding_mode, lfLessThanHalf); 2303 } else if ((D.normalizedExponent - 1) * 42039 2304 >= 12655 * semantics->maxExponent) { 2305 /* Overflow and round. */ 2306 fs = handleOverflow(rounding_mode); 2307 } else { 2308 integerPart *decSignificand; 2309 unsigned int partCount; 2310 2311 /* A tight upper bound on number of bits required to hold an 2312 N-digit decimal integer is N * 196 / 59. Allocate enough space 2313 to hold the full significand, and an extra part required by 2314 tcMultiplyPart. */ 2315 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1; 2316 partCount = partCountForBits(1 + 196 * partCount / 59); 2317 decSignificand = new integerPart[partCount + 1]; 2318 partCount = 0; 2319 2320 /* Convert to binary efficiently - we do almost all multiplication 2321 in an integerPart. When this would overflow do we do a single 2322 bignum multiplication, and then revert again to multiplication 2323 in an integerPart. */ 2324 do { 2325 integerPart decValue, val, multiplier; 2326 2327 val = 0; 2328 multiplier = 1; 2329 2330 do { 2331 if (*p == '.') 2332 p++; 2333 2334 decValue = decDigitValue(*p++); 2335 multiplier *= 10; 2336 val = val * 10 + decValue; 2337 /* The maximum number that can be multiplied by ten with any 2338 digit added without overflowing an integerPart. */ 2339 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); 2340 2341 /* Multiply out the current part. */ 2342 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, 2343 partCount, partCount + 1, false); 2344 2345 /* If we used another part (likely but not guaranteed), increase 2346 the count. */ 2347 if (decSignificand[partCount]) 2348 partCount++; 2349 } while (p <= D.lastSigDigit); 2350 2351 category = fcNormal; 2352 fs = roundSignificandWithExponent(decSignificand, partCount, 2353 D.exponent, rounding_mode); 2354 2355 delete [] decSignificand; 2356 } 2357 2358 return fs; 2359 } 2360 2361 APFloat::opStatus 2362 APFloat::convertFromString(const char *p, roundingMode rounding_mode) 2363 { 2364 assertArithmeticOK(*semantics); 2365 2366 /* Handle a leading minus sign. */ 2367 if(*p == '-') 2368 sign = 1, p++; 2369 else 2370 sign = 0; 2371 2372 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 2373 return convertFromHexadecimalString(p + 2, rounding_mode); 2374 2375 return convertFromDecimalString(p, rounding_mode); 2376 } 2377 2378 /* Write out a hexadecimal representation of the floating point value 2379 to DST, which must be of sufficient size, in the C99 form 2380 [-]0xh.hhhhp[+-]d. Return the number of characters written, 2381 excluding the terminating NUL. 2382 2383 If UPPERCASE, the output is in upper case, otherwise in lower case. 2384 2385 HEXDIGITS digits appear altogether, rounding the value if 2386 necessary. If HEXDIGITS is 0, the minimal precision to display the 2387 number precisely is used instead. If nothing would appear after 2388 the decimal point it is suppressed. 2389 2390 The decimal exponent is always printed and has at least one digit. 2391 Zero values display an exponent of zero. Infinities and NaNs 2392 appear as "infinity" or "nan" respectively. 2393 2394 The above rules are as specified by C99. There is ambiguity about 2395 what the leading hexadecimal digit should be. This implementation 2396 uses whatever is necessary so that the exponent is displayed as 2397 stored. This implies the exponent will fall within the IEEE format 2398 range, and the leading hexadecimal digit will be 0 (for denormals), 2399 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with 2400 any other digits zero). 2401 */ 2402 unsigned int 2403 APFloat::convertToHexString(char *dst, unsigned int hexDigits, 2404 bool upperCase, roundingMode rounding_mode) const 2405 { 2406 char *p; 2407 2408 assertArithmeticOK(*semantics); 2409 2410 p = dst; 2411 if (sign) 2412 *dst++ = '-'; 2413 2414 switch (category) { 2415 case fcInfinity: 2416 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); 2417 dst += sizeof infinityL - 1; 2418 break; 2419 2420 case fcNaN: 2421 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); 2422 dst += sizeof NaNU - 1; 2423 break; 2424 2425 case fcZero: 2426 *dst++ = '0'; 2427 *dst++ = upperCase ? 'X': 'x'; 2428 *dst++ = '0'; 2429 if (hexDigits > 1) { 2430 *dst++ = '.'; 2431 memset (dst, '0', hexDigits - 1); 2432 dst += hexDigits - 1; 2433 } 2434 *dst++ = upperCase ? 'P': 'p'; 2435 *dst++ = '0'; 2436 break; 2437 2438 case fcNormal: 2439 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); 2440 break; 2441 } 2442 2443 *dst = 0; 2444 2445 return static_cast<unsigned int>(dst - p); 2446 } 2447 2448 /* Does the hard work of outputting the correctly rounded hexadecimal 2449 form of a normal floating point number with the specified number of 2450 hexadecimal digits. If HEXDIGITS is zero the minimum number of 2451 digits necessary to print the value precisely is output. */ 2452 char * 2453 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, 2454 bool upperCase, 2455 roundingMode rounding_mode) const 2456 { 2457 unsigned int count, valueBits, shift, partsCount, outputDigits; 2458 const char *hexDigitChars; 2459 const integerPart *significand; 2460 char *p; 2461 bool roundUp; 2462 2463 *dst++ = '0'; 2464 *dst++ = upperCase ? 'X': 'x'; 2465 2466 roundUp = false; 2467 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; 2468 2469 significand = significandParts(); 2470 partsCount = partCount(); 2471 2472 /* +3 because the first digit only uses the single integer bit, so 2473 we have 3 virtual zero most-significant-bits. */ 2474 valueBits = semantics->precision + 3; 2475 shift = integerPartWidth - valueBits % integerPartWidth; 2476 2477 /* The natural number of digits required ignoring trailing 2478 insignificant zeroes. */ 2479 outputDigits = (valueBits - significandLSB () + 3) / 4; 2480 2481 /* hexDigits of zero means use the required number for the 2482 precision. Otherwise, see if we are truncating. If we are, 2483 find out if we need to round away from zero. */ 2484 if (hexDigits) { 2485 if (hexDigits < outputDigits) { 2486 /* We are dropping non-zero bits, so need to check how to round. 2487 "bits" is the number of dropped bits. */ 2488 unsigned int bits; 2489 lostFraction fraction; 2490 2491 bits = valueBits - hexDigits * 4; 2492 fraction = lostFractionThroughTruncation (significand, partsCount, bits); 2493 roundUp = roundAwayFromZero(rounding_mode, fraction, bits); 2494 } 2495 outputDigits = hexDigits; 2496 } 2497 2498 /* Write the digits consecutively, and start writing in the location 2499 of the hexadecimal point. We move the most significant digit 2500 left and add the hexadecimal point later. */ 2501 p = ++dst; 2502 2503 count = (valueBits + integerPartWidth - 1) / integerPartWidth; 2504 2505 while (outputDigits && count) { 2506 integerPart part; 2507 2508 /* Put the most significant integerPartWidth bits in "part". */ 2509 if (--count == partsCount) 2510 part = 0; /* An imaginary higher zero part. */ 2511 else 2512 part = significand[count] << shift; 2513 2514 if (count && shift) 2515 part |= significand[count - 1] >> (integerPartWidth - shift); 2516 2517 /* Convert as much of "part" to hexdigits as we can. */ 2518 unsigned int curDigits = integerPartWidth / 4; 2519 2520 if (curDigits > outputDigits) 2521 curDigits = outputDigits; 2522 dst += partAsHex (dst, part, curDigits, hexDigitChars); 2523 outputDigits -= curDigits; 2524 } 2525 2526 if (roundUp) { 2527 char *q = dst; 2528 2529 /* Note that hexDigitChars has a trailing '0'. */ 2530 do { 2531 q--; 2532 *q = hexDigitChars[hexDigitValue (*q) + 1]; 2533 } while (*q == '0'); 2534 assert (q >= p); 2535 } else { 2536 /* Add trailing zeroes. */ 2537 memset (dst, '0', outputDigits); 2538 dst += outputDigits; 2539 } 2540 2541 /* Move the most significant digit to before the point, and if there 2542 is something after the decimal point add it. This must come 2543 after rounding above. */ 2544 p[-1] = p[0]; 2545 if (dst -1 == p) 2546 dst--; 2547 else 2548 p[0] = '.'; 2549 2550 /* Finally output the exponent. */ 2551 *dst++ = upperCase ? 'P': 'p'; 2552 2553 return writeSignedDecimal (dst, exponent); 2554 } 2555 2556 // For good performance it is desirable for different APFloats 2557 // to produce different integers. 2558 uint32_t 2559 APFloat::getHashValue() const 2560 { 2561 if (category==fcZero) return sign<<8 | semantics->precision ; 2562 else if (category==fcInfinity) return sign<<9 | semantics->precision; 2563 else if (category==fcNaN) return 1<<10 | semantics->precision; 2564 else { 2565 uint32_t hash = sign<<11 | semantics->precision | exponent<<12; 2566 const integerPart* p = significandParts(); 2567 for (int i=partCount(); i>0; i--, p++) 2568 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32); 2569 return hash; 2570 } 2571 } 2572 2573 // Conversion from APFloat to/from host float/double. It may eventually be 2574 // possible to eliminate these and have everybody deal with APFloats, but that 2575 // will take a while. This approach will not easily extend to long double. 2576 // Current implementation requires integerPartWidth==64, which is correct at 2577 // the moment but could be made more general. 2578 2579 // Denormals have exponent minExponent in APFloat, but minExponent-1 in 2580 // the actual IEEE respresentations. We compensate for that here. 2581 2582 APInt 2583 APFloat::convertF80LongDoubleAPFloatToAPInt() const 2584 { 2585 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); 2586 assert (partCount()==2); 2587 2588 uint64_t myexponent, mysignificand; 2589 2590 if (category==fcNormal) { 2591 myexponent = exponent+16383; //bias 2592 mysignificand = significandParts()[0]; 2593 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) 2594 myexponent = 0; // denormal 2595 } else if (category==fcZero) { 2596 myexponent = 0; 2597 mysignificand = 0; 2598 } else if (category==fcInfinity) { 2599 myexponent = 0x7fff; 2600 mysignificand = 0x8000000000000000ULL; 2601 } else { 2602 assert(category == fcNaN && "Unknown category"); 2603 myexponent = 0x7fff; 2604 mysignificand = significandParts()[0]; 2605 } 2606 2607 uint64_t words[2]; 2608 words[0] = ((uint64_t)(sign & 1) << 63) | 2609 ((myexponent & 0x7fffLL) << 48) | 2610 ((mysignificand >>16) & 0xffffffffffffLL); 2611 words[1] = mysignificand & 0xffff; 2612 return APInt(80, 2, words); 2613 } 2614 2615 APInt 2616 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const 2617 { 2618 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); 2619 assert (partCount()==2); 2620 2621 uint64_t myexponent, mysignificand, myexponent2, mysignificand2; 2622 2623 if (category==fcNormal) { 2624 myexponent = exponent + 1023; //bias 2625 myexponent2 = exponent2 + 1023; 2626 mysignificand = significandParts()[0]; 2627 mysignificand2 = significandParts()[1]; 2628 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2629 myexponent = 0; // denormal 2630 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) 2631 myexponent2 = 0; // denormal 2632 } else if (category==fcZero) { 2633 myexponent = 0; 2634 mysignificand = 0; 2635 myexponent2 = 0; 2636 mysignificand2 = 0; 2637 } else if (category==fcInfinity) { 2638 myexponent = 0x7ff; 2639 myexponent2 = 0; 2640 mysignificand = 0; 2641 mysignificand2 = 0; 2642 } else { 2643 assert(category == fcNaN && "Unknown category"); 2644 myexponent = 0x7ff; 2645 mysignificand = significandParts()[0]; 2646 myexponent2 = exponent2; 2647 mysignificand2 = significandParts()[1]; 2648 } 2649 2650 uint64_t words[2]; 2651 words[0] = ((uint64_t)(sign & 1) << 63) | 2652 ((myexponent & 0x7ff) << 52) | 2653 (mysignificand & 0xfffffffffffffLL); 2654 words[1] = ((uint64_t)(sign2 & 1) << 63) | 2655 ((myexponent2 & 0x7ff) << 52) | 2656 (mysignificand2 & 0xfffffffffffffLL); 2657 return APInt(128, 2, words); 2658 } 2659 2660 APInt 2661 APFloat::convertDoubleAPFloatToAPInt() const 2662 { 2663 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2664 assert (partCount()==1); 2665 2666 uint64_t myexponent, mysignificand; 2667 2668 if (category==fcNormal) { 2669 myexponent = exponent+1023; //bias 2670 mysignificand = *significandParts(); 2671 if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) 2672 myexponent = 0; // denormal 2673 } else if (category==fcZero) { 2674 myexponent = 0; 2675 mysignificand = 0; 2676 } else if (category==fcInfinity) { 2677 myexponent = 0x7ff; 2678 mysignificand = 0; 2679 } else { 2680 assert(category == fcNaN && "Unknown category!"); 2681 myexponent = 0x7ff; 2682 mysignificand = *significandParts(); 2683 } 2684 2685 return APInt(64, ((((uint64_t)(sign & 1) << 63) | 2686 ((myexponent & 0x7ff) << 52) | 2687 (mysignificand & 0xfffffffffffffLL)))); 2688 } 2689 2690 APInt 2691 APFloat::convertFloatAPFloatToAPInt() const 2692 { 2693 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2694 assert (partCount()==1); 2695 2696 uint32_t myexponent, mysignificand; 2697 2698 if (category==fcNormal) { 2699 myexponent = exponent+127; //bias 2700 mysignificand = (uint32_t)*significandParts(); 2701 if (myexponent == 1 && !(mysignificand & 0x800000)) 2702 myexponent = 0; // denormal 2703 } else if (category==fcZero) { 2704 myexponent = 0; 2705 mysignificand = 0; 2706 } else if (category==fcInfinity) { 2707 myexponent = 0xff; 2708 mysignificand = 0; 2709 } else { 2710 assert(category == fcNaN && "Unknown category!"); 2711 myexponent = 0xff; 2712 mysignificand = (uint32_t)*significandParts(); 2713 } 2714 2715 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | 2716 (mysignificand & 0x7fffff))); 2717 } 2718 2719 // This function creates an APInt that is just a bit map of the floating 2720 // point constant as it would appear in memory. It is not a conversion, 2721 // and treating the result as a normal integer is unlikely to be useful. 2722 2723 APInt 2724 APFloat::bitcastToAPInt() const 2725 { 2726 if (semantics == (const llvm::fltSemantics*)&IEEEsingle) 2727 return convertFloatAPFloatToAPInt(); 2728 2729 if (semantics == (const llvm::fltSemantics*)&IEEEdouble) 2730 return convertDoubleAPFloatToAPInt(); 2731 2732 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) 2733 return convertPPCDoubleDoubleAPFloatToAPInt(); 2734 2735 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && 2736 "unknown format!"); 2737 return convertF80LongDoubleAPFloatToAPInt(); 2738 } 2739 2740 float 2741 APFloat::convertToFloat() const 2742 { 2743 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); 2744 APInt api = bitcastToAPInt(); 2745 return api.bitsToFloat(); 2746 } 2747 2748 double 2749 APFloat::convertToDouble() const 2750 { 2751 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); 2752 APInt api = bitcastToAPInt(); 2753 return api.bitsToDouble(); 2754 } 2755 2756 /// Integer bit is explicit in this format. Intel hardware (387 and later) 2757 /// does not support these bit patterns: 2758 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") 2759 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") 2760 /// exponent = 0, integer bit 1 ("pseudodenormal") 2761 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") 2762 /// At the moment, the first two are treated as NaNs, the second two as Normal. 2763 void 2764 APFloat::initFromF80LongDoubleAPInt(const APInt &api) 2765 { 2766 assert(api.getBitWidth()==80); 2767 uint64_t i1 = api.getRawData()[0]; 2768 uint64_t i2 = api.getRawData()[1]; 2769 uint64_t myexponent = (i1 >> 48) & 0x7fff; 2770 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | 2771 (i2 & 0xffff); 2772 2773 initialize(&APFloat::x87DoubleExtended); 2774 assert(partCount()==2); 2775 2776 sign = static_cast<unsigned int>(i1>>63); 2777 if (myexponent==0 && mysignificand==0) { 2778 // exponent, significand meaningless 2779 category = fcZero; 2780 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { 2781 // exponent, significand meaningless 2782 category = fcInfinity; 2783 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { 2784 // exponent meaningless 2785 category = fcNaN; 2786 significandParts()[0] = mysignificand; 2787 significandParts()[1] = 0; 2788 } else { 2789 category = fcNormal; 2790 exponent = myexponent - 16383; 2791 significandParts()[0] = mysignificand; 2792 significandParts()[1] = 0; 2793 if (myexponent==0) // denormal 2794 exponent = -16382; 2795 } 2796 } 2797 2798 void 2799 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) 2800 { 2801 assert(api.getBitWidth()==128); 2802 uint64_t i1 = api.getRawData()[0]; 2803 uint64_t i2 = api.getRawData()[1]; 2804 uint64_t myexponent = (i1 >> 52) & 0x7ff; 2805 uint64_t mysignificand = i1 & 0xfffffffffffffLL; 2806 uint64_t myexponent2 = (i2 >> 52) & 0x7ff; 2807 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; 2808 2809 initialize(&APFloat::PPCDoubleDouble); 2810 assert(partCount()==2); 2811 2812 sign = static_cast<unsigned int>(i1>>63); 2813 sign2 = static_cast<unsigned int>(i2>>63); 2814 if (myexponent==0 && mysignificand==0) { 2815 // exponent, significand meaningless 2816 // exponent2 and significand2 are required to be 0; we don't check 2817 category = fcZero; 2818 } else if (myexponent==0x7ff && mysignificand==0) { 2819 // exponent, significand meaningless 2820 // exponent2 and significand2 are required to be 0; we don't check 2821 category = fcInfinity; 2822 } else if (myexponent==0x7ff && mysignificand!=0) { 2823 // exponent meaningless. So is the whole second word, but keep it 2824 // for determinism. 2825 category = fcNaN; 2826 exponent2 = myexponent2; 2827 significandParts()[0] = mysignificand; 2828 significandParts()[1] = mysignificand2; 2829 } else { 2830 category = fcNormal; 2831 // Note there is no category2; the second word is treated as if it is 2832 // fcNormal, although it might be something else considered by itself. 2833 exponent = myexponent - 1023; 2834 exponent2 = myexponent2 - 1023; 2835 significandParts()[0] = mysignificand; 2836 significandParts()[1] = mysignificand2; 2837 if (myexponent==0) // denormal 2838 exponent = -1022; 2839 else 2840 significandParts()[0] |= 0x10000000000000LL; // integer bit 2841 if (myexponent2==0) 2842 exponent2 = -1022; 2843 else 2844 significandParts()[1] |= 0x10000000000000LL; // integer bit 2845 } 2846 } 2847 2848 void 2849 APFloat::initFromDoubleAPInt(const APInt &api) 2850 { 2851 assert(api.getBitWidth()==64); 2852 uint64_t i = *api.getRawData(); 2853 uint64_t myexponent = (i >> 52) & 0x7ff; 2854 uint64_t mysignificand = i & 0xfffffffffffffLL; 2855 2856 initialize(&APFloat::IEEEdouble); 2857 assert(partCount()==1); 2858 2859 sign = static_cast<unsigned int>(i>>63); 2860 if (myexponent==0 && mysignificand==0) { 2861 // exponent, significand meaningless 2862 category = fcZero; 2863 } else if (myexponent==0x7ff && mysignificand==0) { 2864 // exponent, significand meaningless 2865 category = fcInfinity; 2866 } else if (myexponent==0x7ff && mysignificand!=0) { 2867 // exponent meaningless 2868 category = fcNaN; 2869 *significandParts() = mysignificand; 2870 } else { 2871 category = fcNormal; 2872 exponent = myexponent - 1023; 2873 *significandParts() = mysignificand; 2874 if (myexponent==0) // denormal 2875 exponent = -1022; 2876 else 2877 *significandParts() |= 0x10000000000000LL; // integer bit 2878 } 2879 } 2880 2881 void 2882 APFloat::initFromFloatAPInt(const APInt & api) 2883 { 2884 assert(api.getBitWidth()==32); 2885 uint32_t i = (uint32_t)*api.getRawData(); 2886 uint32_t myexponent = (i >> 23) & 0xff; 2887 uint32_t mysignificand = i & 0x7fffff; 2888 2889 initialize(&APFloat::IEEEsingle); 2890 assert(partCount()==1); 2891 2892 sign = i >> 31; 2893 if (myexponent==0 && mysignificand==0) { 2894 // exponent, significand meaningless 2895 category = fcZero; 2896 } else if (myexponent==0xff && mysignificand==0) { 2897 // exponent, significand meaningless 2898 category = fcInfinity; 2899 } else if (myexponent==0xff && mysignificand!=0) { 2900 // sign, exponent, significand meaningless 2901 category = fcNaN; 2902 *significandParts() = mysignificand; 2903 } else { 2904 category = fcNormal; 2905 exponent = myexponent - 127; //bias 2906 *significandParts() = mysignificand; 2907 if (myexponent==0) // denormal 2908 exponent = -126; 2909 else 2910 *significandParts() |= 0x800000; // integer bit 2911 } 2912 } 2913 2914 /// Treat api as containing the bits of a floating point number. Currently 2915 /// we infer the floating point type from the size of the APInt. The 2916 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful 2917 /// when the size is anything else). 2918 void 2919 APFloat::initFromAPInt(const APInt& api, bool isIEEE) 2920 { 2921 if (api.getBitWidth() == 32) 2922 return initFromFloatAPInt(api); 2923 else if (api.getBitWidth()==64) 2924 return initFromDoubleAPInt(api); 2925 else if (api.getBitWidth()==80) 2926 return initFromF80LongDoubleAPInt(api); 2927 else if (api.getBitWidth()==128 && !isIEEE) 2928 return initFromPPCDoubleDoubleAPInt(api); 2929 else 2930 assert(0); 2931 } 2932 2933 APFloat::APFloat(const APInt& api, bool isIEEE) 2934 { 2935 initFromAPInt(api, isIEEE); 2936 } 2937 2938 APFloat::APFloat(float f) 2939 { 2940 APInt api = APInt(32, 0); 2941 initFromAPInt(api.floatToBits(f)); 2942 } 2943 2944 APFloat::APFloat(double d) 2945 { 2946 APInt api = APInt(64, 0); 2947 initFromAPInt(api.doubleToBits(d)); 2948 } 2949