xref: /llvm-project/llvm/lib/Support/APFloat.cpp (revision 595de12ff307f3f06f4ccd2acafc400cc1262bc6)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 };
72 
73 // How NaN values are represented. This is curently only used in combination
74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75 // while having IEEE non-finite behavior is liable to lead to unexpected
76 // results.
77 enum class fltNanEncoding {
78   // Represents the standard IEEE behavior where a value is NaN if its
79   // exponent is all 1s and the significand is non-zero.
80   IEEE,
81 
82   // Represents the behavior in the Float8E4M3 floating point type where NaN is
83   // represented by having the exponent and mantissa set to all 1s.
84   // This behavior matches the FP8 E4M3 type described in
85   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86   // as non-signalling, although the paper does not state whether the NaN
87   // values are signalling or not.
88   AllOnes,
89 
90   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93   // there is only one NaN value, it is treated as quiet NaN. This matches the
94   // behavior described in https://arxiv.org/abs/2206.02915 .
95   NegativeZero,
96 };
97 
98 /* Represents floating point arithmetic semantics.  */
99 struct fltSemantics {
100   /* The largest E such that 2^E is representable; this matches the
101      definition of IEEE 754.  */
102   APFloatBase::ExponentType maxExponent;
103 
104   /* The smallest E such that 2^E is a normalized number; this
105      matches the definition of IEEE 754.  */
106   APFloatBase::ExponentType minExponent;
107 
108   /* Number of bits in the significand.  This includes the integer
109      bit.  */
110   unsigned int precision;
111 
112   /* Number of bits actually used in the semantics. */
113   unsigned int sizeInBits;
114 
115   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
116 
117   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
118   // Returns true if any number described by this semantics can be precisely
119   // represented by the specified semantics. Does not take into account
120   // the value of fltNonfiniteBehavior.
121   bool isRepresentableBy(const fltSemantics &S) const {
122     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123            precision <= S.precision;
124   }
125 };
126 
127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133 static constexpr fltSemantics semFloat8E5M2FNUZ = {
134     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
135 static constexpr fltSemantics semFloat8E4M3FN = {
136     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
137 static constexpr fltSemantics semFloat8E4M3FNUZ = {
138     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
140     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144 
145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
146    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149    to each other, and two 11-bit exponents.
150 
151    Note: we need to make the value different from semBogus as otherwise
152    an unsafe optimization may collapse both values to a single address,
153    and we heavily rely on them having distinct addresses.             */
154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
155 
156 /* These are legacy semantics for the fallback, inaccrurate implementation of
157    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158    operation. It's equivalent to having an IEEE number with consecutive 106
159    bits of mantissa and 11 bits of exponent.
160 
161    It's not equivalent to IBM double-double. For example, a legit IBM
162    double-double, 1 + epsilon:
163 
164      1 + epsilon = 1 + (1 >> 1076)
165 
166    is not representable by a consecutive 106 bits of mantissa.
167 
168    Currently, these semantics are used in the following way:
169 
170      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172      semPPCDoubleDoubleLegacy -> IEEE operations
173 
174    We use bitcastToAPInt() to get the bit representation (in APInt) of the
175    underlying IEEEdouble, then use the APInt constructor to construct the
176    legacy IEEE float.
177 
178    TODO: Implement all operations in semPPCDoubleDouble, and delete these
179    semantics.  */
180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181                                                           53 + 53, 128};
182 
183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
184   switch (S) {
185   case S_IEEEhalf:
186     return IEEEhalf();
187   case S_BFloat:
188     return BFloat();
189   case S_IEEEsingle:
190     return IEEEsingle();
191   case S_IEEEdouble:
192     return IEEEdouble();
193   case S_IEEEquad:
194     return IEEEquad();
195   case S_PPCDoubleDouble:
196     return PPCDoubleDouble();
197   case S_Float8E5M2:
198     return Float8E5M2();
199   case S_Float8E5M2FNUZ:
200     return Float8E5M2FNUZ();
201   case S_Float8E4M3FN:
202     return Float8E4M3FN();
203   case S_Float8E4M3FNUZ:
204     return Float8E4M3FNUZ();
205   case S_Float8E4M3B11FNUZ:
206     return Float8E4M3B11FNUZ();
207   case S_FloatTF32:
208     return FloatTF32();
209   case S_x87DoubleExtended:
210     return x87DoubleExtended();
211   }
212   llvm_unreachable("Unrecognised floating semantics");
213 }
214 
215 APFloatBase::Semantics
216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
217   if (&Sem == &llvm::APFloat::IEEEhalf())
218     return S_IEEEhalf;
219   else if (&Sem == &llvm::APFloat::BFloat())
220     return S_BFloat;
221   else if (&Sem == &llvm::APFloat::IEEEsingle())
222     return S_IEEEsingle;
223   else if (&Sem == &llvm::APFloat::IEEEdouble())
224     return S_IEEEdouble;
225   else if (&Sem == &llvm::APFloat::IEEEquad())
226     return S_IEEEquad;
227   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228     return S_PPCDoubleDouble;
229   else if (&Sem == &llvm::APFloat::Float8E5M2())
230     return S_Float8E5M2;
231   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232     return S_Float8E5M2FNUZ;
233   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234     return S_Float8E4M3FN;
235   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236     return S_Float8E4M3FNUZ;
237   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238     return S_Float8E4M3B11FNUZ;
239   else if (&Sem == &llvm::APFloat::FloatTF32())
240     return S_FloatTF32;
241   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242     return S_x87DoubleExtended;
243   else
244     llvm_unreachable("Unknown floating semantics");
245 }
246 
247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
252 const fltSemantics &APFloatBase::PPCDoubleDouble() {
253   return semPPCDoubleDouble;
254 }
255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
260   return semFloat8E4M3B11FNUZ;
261 }
262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
263 const fltSemantics &APFloatBase::x87DoubleExtended() {
264   return semX87DoubleExtended;
265 }
266 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
267 
268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
269 constexpr RoundingMode APFloatBase::rmTowardPositive;
270 constexpr RoundingMode APFloatBase::rmTowardNegative;
271 constexpr RoundingMode APFloatBase::rmTowardZero;
272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
273 
274 /* A tight upper bound on number of parts required to hold the value
275    pow(5, power) is
276 
277      power * 815 / (351 * integerPartWidth) + 1
278 
279    However, whilst the result may require only this many parts,
280    because we are multiplying two values to get it, the
281    multiplication may require an extra part with the excess part
282    being zero (consider the trivial case of 1 * 1, tcFullMultiply
283    requires two parts to hold the single-part result).  So we add an
284    extra one to guarantee enough space whilst multiplying.  */
285 const unsigned int maxExponent = 16383;
286 const unsigned int maxPrecision = 113;
287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
288 const unsigned int maxPowerOfFiveParts =
289     2 +
290     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
291 
292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293   return semantics.precision;
294 }
295 APFloatBase::ExponentType
296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
297   return semantics.maxExponent;
298 }
299 APFloatBase::ExponentType
300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
301   return semantics.minExponent;
302 }
303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304   return semantics.sizeInBits;
305 }
306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
307                                                  bool isSigned) {
308   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309   // at least one more bit than the MaxExponent to hold the max FP value.
310   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311   // Extra sign bit needed.
312   if (isSigned)
313     ++MinBitWidth;
314   return MinBitWidth;
315 }
316 
317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
318                                             const fltSemantics &Dst) {
319   // Exponent range must be larger.
320   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321     return false;
322 
323   // If the mantissa is long enough, the result value could still be denormal
324   // with a larger exponent range.
325   //
326   // FIXME: This condition is probably not accurate but also shouldn't be a
327   // practical concern with existing types.
328   return Dst.precision >= Src.precision;
329 }
330 
331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
332   return Sem.sizeInBits;
333 }
334 
335 static constexpr APFloatBase::ExponentType
336 exponentZero(const fltSemantics &semantics) {
337   return semantics.minExponent - 1;
338 }
339 
340 static constexpr APFloatBase::ExponentType
341 exponentInf(const fltSemantics &semantics) {
342   return semantics.maxExponent + 1;
343 }
344 
345 static constexpr APFloatBase::ExponentType
346 exponentNaN(const fltSemantics &semantics) {
347   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
348     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
349       return exponentZero(semantics);
350     return semantics.maxExponent;
351   }
352   return semantics.maxExponent + 1;
353 }
354 
355 /* A bunch of private, handy routines.  */
356 
357 static inline Error createError(const Twine &Err) {
358   return make_error<StringError>(Err, inconvertibleErrorCode());
359 }
360 
361 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
362   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
363 }
364 
365 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
366 static inline unsigned int
367 decDigitValue(unsigned int c)
368 {
369   return c - '0';
370 }
371 
372 /* Return the value of a decimal exponent of the form
373    [+-]ddddddd.
374 
375    If the exponent overflows, returns a large exponent with the
376    appropriate sign.  */
377 static Expected<int> readExponent(StringRef::iterator begin,
378                                   StringRef::iterator end) {
379   bool isNegative;
380   unsigned int absExponent;
381   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
382   StringRef::iterator p = begin;
383 
384   // Treat no exponent as 0 to match binutils
385   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386     return 0;
387   }
388 
389   isNegative = (*p == '-');
390   if (*p == '-' || *p == '+') {
391     p++;
392     if (p == end)
393       return createError("Exponent has no digits");
394   }
395 
396   absExponent = decDigitValue(*p++);
397   if (absExponent >= 10U)
398     return createError("Invalid character in exponent");
399 
400   for (; p != end; ++p) {
401     unsigned int value;
402 
403     value = decDigitValue(*p);
404     if (value >= 10U)
405       return createError("Invalid character in exponent");
406 
407     absExponent = absExponent * 10U + value;
408     if (absExponent >= overlargeExponent) {
409       absExponent = overlargeExponent;
410       break;
411     }
412   }
413 
414   if (isNegative)
415     return -(int) absExponent;
416   else
417     return (int) absExponent;
418 }
419 
420 /* This is ugly and needs cleaning up, but I don't immediately see
421    how whilst remaining safe.  */
422 static Expected<int> totalExponent(StringRef::iterator p,
423                                    StringRef::iterator end,
424                                    int exponentAdjustment) {
425   int unsignedExponent;
426   bool negative, overflow;
427   int exponent = 0;
428 
429   if (p == end)
430     return createError("Exponent has no digits");
431 
432   negative = *p == '-';
433   if (*p == '-' || *p == '+') {
434     p++;
435     if (p == end)
436       return createError("Exponent has no digits");
437   }
438 
439   unsignedExponent = 0;
440   overflow = false;
441   for (; p != end; ++p) {
442     unsigned int value;
443 
444     value = decDigitValue(*p);
445     if (value >= 10U)
446       return createError("Invalid character in exponent");
447 
448     unsignedExponent = unsignedExponent * 10 + value;
449     if (unsignedExponent > 32767) {
450       overflow = true;
451       break;
452     }
453   }
454 
455   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456     overflow = true;
457 
458   if (!overflow) {
459     exponent = unsignedExponent;
460     if (negative)
461       exponent = -exponent;
462     exponent += exponentAdjustment;
463     if (exponent > 32767 || exponent < -32768)
464       overflow = true;
465   }
466 
467   if (overflow)
468     exponent = negative ? -32768: 32767;
469 
470   return exponent;
471 }
472 
473 static Expected<StringRef::iterator>
474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
475                            StringRef::iterator *dot) {
476   StringRef::iterator p = begin;
477   *dot = end;
478   while (p != end && *p == '0')
479     p++;
480 
481   if (p != end && *p == '.') {
482     *dot = p++;
483 
484     if (end - begin == 1)
485       return createError("Significand has no digits");
486 
487     while (p != end && *p == '0')
488       p++;
489   }
490 
491   return p;
492 }
493 
494 /* Given a normal decimal floating point number of the form
495 
496      dddd.dddd[eE][+-]ddd
497 
498    where the decimal point and exponent are optional, fill out the
499    structure D.  Exponent is appropriate if the significand is
500    treated as an integer, and normalizedExponent if the significand
501    is taken to have the decimal point after a single leading
502    non-zero digit.
503 
504    If the value is zero, V->firstSigDigit points to a non-digit, and
505    the return exponent is zero.
506 */
507 struct decimalInfo {
508   const char *firstSigDigit;
509   const char *lastSigDigit;
510   int exponent;
511   int normalizedExponent;
512 };
513 
514 static Error interpretDecimal(StringRef::iterator begin,
515                               StringRef::iterator end, decimalInfo *D) {
516   StringRef::iterator dot = end;
517 
518   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519   if (!PtrOrErr)
520     return PtrOrErr.takeError();
521   StringRef::iterator p = *PtrOrErr;
522 
523   D->firstSigDigit = p;
524   D->exponent = 0;
525   D->normalizedExponent = 0;
526 
527   for (; p != end; ++p) {
528     if (*p == '.') {
529       if (dot != end)
530         return createError("String contains multiple dots");
531       dot = p++;
532       if (p == end)
533         break;
534     }
535     if (decDigitValue(*p) >= 10U)
536       break;
537   }
538 
539   if (p != end) {
540     if (*p != 'e' && *p != 'E')
541       return createError("Invalid character in significand");
542     if (p == begin)
543       return createError("Significand has no digits");
544     if (dot != end && p - begin == 1)
545       return createError("Significand has no digits");
546 
547     /* p points to the first non-digit in the string */
548     auto ExpOrErr = readExponent(p + 1, end);
549     if (!ExpOrErr)
550       return ExpOrErr.takeError();
551     D->exponent = *ExpOrErr;
552 
553     /* Implied decimal point?  */
554     if (dot == end)
555       dot = p;
556   }
557 
558   /* If number is all zeroes accept any exponent.  */
559   if (p != D->firstSigDigit) {
560     /* Drop insignificant trailing zeroes.  */
561     if (p != begin) {
562       do
563         do
564           p--;
565         while (p != begin && *p == '0');
566       while (p != begin && *p == '.');
567     }
568 
569     /* Adjust the exponents for any decimal point.  */
570     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571     D->normalizedExponent = (D->exponent +
572               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573                                       - (dot > D->firstSigDigit && dot < p)));
574   }
575 
576   D->lastSigDigit = p;
577   return Error::success();
578 }
579 
580 /* Return the trailing fraction of a hexadecimal number.
581    DIGITVALUE is the first hex digit of the fraction, P points to
582    the next digit.  */
583 static Expected<lostFraction>
584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
585                             unsigned int digitValue) {
586   unsigned int hexDigit;
587 
588   /* If the first trailing digit isn't 0 or 8 we can work out the
589      fraction immediately.  */
590   if (digitValue > 8)
591     return lfMoreThanHalf;
592   else if (digitValue < 8 && digitValue > 0)
593     return lfLessThanHalf;
594 
595   // Otherwise we need to find the first non-zero digit.
596   while (p != end && (*p == '0' || *p == '.'))
597     p++;
598 
599   if (p == end)
600     return createError("Invalid trailing hexadecimal fraction!");
601 
602   hexDigit = hexDigitValue(*p);
603 
604   /* If we ran off the end it is exactly zero or one-half, otherwise
605      a little more.  */
606   if (hexDigit == UINT_MAX)
607     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608   else
609     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
610 }
611 
612 /* Return the fraction lost were a bignum truncated losing the least
613    significant BITS bits.  */
614 static lostFraction
615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
616                               unsigned int partCount,
617                               unsigned int bits)
618 {
619   unsigned int lsb;
620 
621   lsb = APInt::tcLSB(parts, partCount);
622 
623   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
624   if (bits <= lsb)
625     return lfExactlyZero;
626   if (bits == lsb + 1)
627     return lfExactlyHalf;
628   if (bits <= partCount * APFloatBase::integerPartWidth &&
629       APInt::tcExtractBit(parts, bits - 1))
630     return lfMoreThanHalf;
631 
632   return lfLessThanHalf;
633 }
634 
635 /* Shift DST right BITS bits noting lost fraction.  */
636 static lostFraction
637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
638 {
639   lostFraction lost_fraction;
640 
641   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
642 
643   APInt::tcShiftRight(dst, parts, bits);
644 
645   return lost_fraction;
646 }
647 
648 /* Combine the effect of two lost fractions.  */
649 static lostFraction
650 combineLostFractions(lostFraction moreSignificant,
651                      lostFraction lessSignificant)
652 {
653   if (lessSignificant != lfExactlyZero) {
654     if (moreSignificant == lfExactlyZero)
655       moreSignificant = lfLessThanHalf;
656     else if (moreSignificant == lfExactlyHalf)
657       moreSignificant = lfMoreThanHalf;
658   }
659 
660   return moreSignificant;
661 }
662 
663 /* The error from the true value, in half-ulps, on multiplying two
664    floating point numbers, which differ from the value they
665    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666    than the returned value.
667 
668    See "How to Read Floating Point Numbers Accurately" by William D
669    Clinger.  */
670 static unsigned int
671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
672 {
673   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
674 
675   if (HUerr1 + HUerr2 == 0)
676     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
677   else
678     return inexactMultiply + 2 * (HUerr1 + HUerr2);
679 }
680 
681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
682    when the least significant BITS are truncated.  BITS cannot be
683    zero.  */
684 static APFloatBase::integerPart
685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686                  bool isNearest) {
687   unsigned int count, partBits;
688   APFloatBase::integerPart part, boundary;
689 
690   assert(bits != 0);
691 
692   bits--;
693   count = bits / APFloatBase::integerPartWidth;
694   partBits = bits % APFloatBase::integerPartWidth + 1;
695 
696   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
697 
698   if (isNearest)
699     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700   else
701     boundary = 0;
702 
703   if (count == 0) {
704     if (part - boundary <= boundary - part)
705       return part - boundary;
706     else
707       return boundary - part;
708   }
709 
710   if (part == boundary) {
711     while (--count)
712       if (parts[count])
713         return ~(APFloatBase::integerPart) 0; /* A lot.  */
714 
715     return parts[0];
716   } else if (part == boundary - 1) {
717     while (--count)
718       if (~parts[count])
719         return ~(APFloatBase::integerPart) 0; /* A lot.  */
720 
721     return -parts[0];
722   }
723 
724   return ~(APFloatBase::integerPart) 0; /* A lot.  */
725 }
726 
727 /* Place pow(5, power) in DST, and return the number of parts used.
728    DST must be at least one part larger than size of the answer.  */
729 static unsigned int
730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
732   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
733   pow5s[0] = 78125 * 5;
734 
735   unsigned int partsCount = 1;
736   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737   unsigned int result;
738   assert(power <= maxExponent);
739 
740   p1 = dst;
741   p2 = scratch;
742 
743   *p1 = firstEightPowers[power & 7];
744   power >>= 3;
745 
746   result = 1;
747   pow5 = pow5s;
748 
749   for (unsigned int n = 0; power; power >>= 1, n++) {
750     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
751     if (n != 0) {
752       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
753                             partsCount, partsCount);
754       partsCount *= 2;
755       if (pow5[partsCount - 1] == 0)
756         partsCount--;
757     }
758 
759     if (power & 1) {
760       APFloatBase::integerPart *tmp;
761 
762       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
763       result += partsCount;
764       if (p2[result - 1] == 0)
765         result--;
766 
767       /* Now result is in p1 with partsCount parts and p2 is scratch
768          space.  */
769       tmp = p1;
770       p1 = p2;
771       p2 = tmp;
772     }
773 
774     pow5 += partsCount;
775   }
776 
777   if (p1 != dst)
778     APInt::tcAssign(dst, p1, result);
779 
780   return result;
781 }
782 
783 /* Zero at the end to avoid modular arithmetic when adding one; used
784    when rounding up during hexadecimal output.  */
785 static const char hexDigitsLower[] = "0123456789abcdef0";
786 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
787 static const char infinityL[] = "infinity";
788 static const char infinityU[] = "INFINITY";
789 static const char NaNL[] = "nan";
790 static const char NaNU[] = "NAN";
791 
792 /* Write out an integerPart in hexadecimal, starting with the most
793    significant nibble.  Write out exactly COUNT hexdigits, return
794    COUNT.  */
795 static unsigned int
796 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
797            const char *hexDigitChars)
798 {
799   unsigned int result = count;
800 
801   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
802 
803   part >>= (APFloatBase::integerPartWidth - 4 * count);
804   while (count--) {
805     dst[count] = hexDigitChars[part & 0xf];
806     part >>= 4;
807   }
808 
809   return result;
810 }
811 
812 /* Write out an unsigned decimal integer.  */
813 static char *
814 writeUnsignedDecimal (char *dst, unsigned int n)
815 {
816   char buff[40], *p;
817 
818   p = buff;
819   do
820     *p++ = '0' + n % 10;
821   while (n /= 10);
822 
823   do
824     *dst++ = *--p;
825   while (p != buff);
826 
827   return dst;
828 }
829 
830 /* Write out a signed decimal integer.  */
831 static char *
832 writeSignedDecimal (char *dst, int value)
833 {
834   if (value < 0) {
835     *dst++ = '-';
836     dst = writeUnsignedDecimal(dst, -(unsigned) value);
837   } else
838     dst = writeUnsignedDecimal(dst, value);
839 
840   return dst;
841 }
842 
843 namespace detail {
844 /* Constructors.  */
845 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
846   unsigned int count;
847 
848   semantics = ourSemantics;
849   count = partCount();
850   if (count > 1)
851     significand.parts = new integerPart[count];
852 }
853 
854 void IEEEFloat::freeSignificand() {
855   if (needsCleanup())
856     delete [] significand.parts;
857 }
858 
859 void IEEEFloat::assign(const IEEEFloat &rhs) {
860   assert(semantics == rhs.semantics);
861 
862   sign = rhs.sign;
863   category = rhs.category;
864   exponent = rhs.exponent;
865   if (isFiniteNonZero() || category == fcNaN)
866     copySignificand(rhs);
867 }
868 
869 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
870   assert(isFiniteNonZero() || category == fcNaN);
871   assert(rhs.partCount() >= partCount());
872 
873   APInt::tcAssign(significandParts(), rhs.significandParts(),
874                   partCount());
875 }
876 
877 /* Make this number a NaN, with an arbitrary but deterministic value
878    for the significand.  If double or longer, this is a signalling NaN,
879    which may not be ideal.  If float, this is QNaN(0).  */
880 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
881   category = fcNaN;
882   sign = Negative;
883   exponent = exponentNaN();
884 
885   integerPart *significand = significandParts();
886   unsigned numParts = partCount();
887 
888   APInt fill_storage;
889   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
890     // Finite-only types do not distinguish signalling and quiet NaN, so
891     // make them all signalling.
892     SNaN = false;
893     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
894       sign = true;
895       fill_storage = APInt::getZero(semantics->precision - 1);
896     } else {
897       fill_storage = APInt::getAllOnes(semantics->precision - 1);
898     }
899     fill = &fill_storage;
900   }
901 
902   // Set the significand bits to the fill.
903   if (!fill || fill->getNumWords() < numParts)
904     APInt::tcSet(significand, 0, numParts);
905   if (fill) {
906     APInt::tcAssign(significand, fill->getRawData(),
907                     std::min(fill->getNumWords(), numParts));
908 
909     // Zero out the excess bits of the significand.
910     unsigned bitsToPreserve = semantics->precision - 1;
911     unsigned part = bitsToPreserve / 64;
912     bitsToPreserve %= 64;
913     significand[part] &= ((1ULL << bitsToPreserve) - 1);
914     for (part++; part != numParts; ++part)
915       significand[part] = 0;
916   }
917 
918   unsigned QNaNBit = semantics->precision - 2;
919 
920   if (SNaN) {
921     // We always have to clear the QNaN bit to make it an SNaN.
922     APInt::tcClearBit(significand, QNaNBit);
923 
924     // If there are no bits set in the payload, we have to set
925     // *something* to make it a NaN instead of an infinity;
926     // conventionally, this is the next bit down from the QNaN bit.
927     if (APInt::tcIsZero(significand, numParts))
928       APInt::tcSetBit(significand, QNaNBit - 1);
929   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
930     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
931     // Do nothing.
932   } else {
933     // We always have to set the QNaN bit to make it a QNaN.
934     APInt::tcSetBit(significand, QNaNBit);
935   }
936 
937   // For x87 extended precision, we want to make a NaN, not a
938   // pseudo-NaN.  Maybe we should expose the ability to make
939   // pseudo-NaNs?
940   if (semantics == &semX87DoubleExtended)
941     APInt::tcSetBit(significand, QNaNBit + 1);
942 }
943 
944 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
945   if (this != &rhs) {
946     if (semantics != rhs.semantics) {
947       freeSignificand();
948       initialize(rhs.semantics);
949     }
950     assign(rhs);
951   }
952 
953   return *this;
954 }
955 
956 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
957   freeSignificand();
958 
959   semantics = rhs.semantics;
960   significand = rhs.significand;
961   exponent = rhs.exponent;
962   category = rhs.category;
963   sign = rhs.sign;
964 
965   rhs.semantics = &semBogus;
966   return *this;
967 }
968 
969 bool IEEEFloat::isDenormal() const {
970   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
971          (APInt::tcExtractBit(significandParts(),
972                               semantics->precision - 1) == 0);
973 }
974 
975 bool IEEEFloat::isSmallest() const {
976   // The smallest number by magnitude in our format will be the smallest
977   // denormal, i.e. the floating point number with exponent being minimum
978   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
979   return isFiniteNonZero() && exponent == semantics->minExponent &&
980     significandMSB() == 0;
981 }
982 
983 bool IEEEFloat::isSmallestNormalized() const {
984   return getCategory() == fcNormal && exponent == semantics->minExponent &&
985          isSignificandAllZerosExceptMSB();
986 }
987 
988 bool IEEEFloat::isSignificandAllOnes() const {
989   // Test if the significand excluding the integral bit is all ones. This allows
990   // us to test for binade boundaries.
991   const integerPart *Parts = significandParts();
992   const unsigned PartCount = partCountForBits(semantics->precision);
993   for (unsigned i = 0; i < PartCount - 1; i++)
994     if (~Parts[i])
995       return false;
996 
997   // Set the unused high bits to all ones when we compare.
998   const unsigned NumHighBits =
999     PartCount*integerPartWidth - semantics->precision + 1;
1000   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001          "Can not have more high bits to fill than integerPartWidth");
1002   const integerPart HighBitFill =
1003     ~integerPart(0) << (integerPartWidth - NumHighBits);
1004   if (~(Parts[PartCount - 1] | HighBitFill))
1005     return false;
1006 
1007   return true;
1008 }
1009 
1010 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1011   // Test if the significand excluding the integral bit is all ones except for
1012   // the least significant bit.
1013   const integerPart *Parts = significandParts();
1014 
1015   if (Parts[0] & 1)
1016     return false;
1017 
1018   const unsigned PartCount = partCountForBits(semantics->precision);
1019   for (unsigned i = 0; i < PartCount - 1; i++) {
1020     if (~Parts[i] & ~unsigned{!i})
1021       return false;
1022   }
1023 
1024   // Set the unused high bits to all ones when we compare.
1025   const unsigned NumHighBits =
1026       PartCount * integerPartWidth - semantics->precision + 1;
1027   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1028          "Can not have more high bits to fill than integerPartWidth");
1029   const integerPart HighBitFill = ~integerPart(0)
1030                                   << (integerPartWidth - NumHighBits);
1031   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1032     return false;
1033 
1034   return true;
1035 }
1036 
1037 bool IEEEFloat::isSignificandAllZeros() const {
1038   // Test if the significand excluding the integral bit is all zeros. This
1039   // allows us to test for binade boundaries.
1040   const integerPart *Parts = significandParts();
1041   const unsigned PartCount = partCountForBits(semantics->precision);
1042 
1043   for (unsigned i = 0; i < PartCount - 1; i++)
1044     if (Parts[i])
1045       return false;
1046 
1047   // Compute how many bits are used in the final word.
1048   const unsigned NumHighBits =
1049     PartCount*integerPartWidth - semantics->precision + 1;
1050   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1051          "clear than integerPartWidth");
1052   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1053 
1054   if (Parts[PartCount - 1] & HighBitMask)
1055     return false;
1056 
1057   return true;
1058 }
1059 
1060 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1061   const integerPart *Parts = significandParts();
1062   const unsigned PartCount = partCountForBits(semantics->precision);
1063 
1064   for (unsigned i = 0; i < PartCount - 1; i++) {
1065     if (Parts[i])
1066       return false;
1067   }
1068 
1069   const unsigned NumHighBits =
1070       PartCount * integerPartWidth - semantics->precision + 1;
1071   return Parts[PartCount - 1] == integerPart(1)
1072                                      << (integerPartWidth - NumHighBits);
1073 }
1074 
1075 bool IEEEFloat::isLargest() const {
1076   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1077       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1078     // The largest number by magnitude in our format will be the floating point
1079     // number with maximum exponent and with significand that is all ones except
1080     // the LSB.
1081     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1082            isSignificandAllOnesExceptLSB();
1083   } else {
1084     // The largest number by magnitude in our format will be the floating point
1085     // number with maximum exponent and with significand that is all ones.
1086     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087            isSignificandAllOnes();
1088   }
1089 }
1090 
1091 bool IEEEFloat::isInteger() const {
1092   // This could be made more efficient; I'm going for obviously correct.
1093   if (!isFinite()) return false;
1094   IEEEFloat truncated = *this;
1095   truncated.roundToIntegral(rmTowardZero);
1096   return compare(truncated) == cmpEqual;
1097 }
1098 
1099 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1100   if (this == &rhs)
1101     return true;
1102   if (semantics != rhs.semantics ||
1103       category != rhs.category ||
1104       sign != rhs.sign)
1105     return false;
1106   if (category==fcZero || category==fcInfinity)
1107     return true;
1108 
1109   if (isFiniteNonZero() && exponent != rhs.exponent)
1110     return false;
1111 
1112   return std::equal(significandParts(), significandParts() + partCount(),
1113                     rhs.significandParts());
1114 }
1115 
1116 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1117   initialize(&ourSemantics);
1118   sign = 0;
1119   category = fcNormal;
1120   zeroSignificand();
1121   exponent = ourSemantics.precision - 1;
1122   significandParts()[0] = value;
1123   normalize(rmNearestTiesToEven, lfExactlyZero);
1124 }
1125 
1126 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1127   initialize(&ourSemantics);
1128   makeZero(false);
1129 }
1130 
1131 // Delegate to the previous constructor, because later copy constructor may
1132 // actually inspects category, which can't be garbage.
1133 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1134     : IEEEFloat(ourSemantics) {}
1135 
1136 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1137   initialize(rhs.semantics);
1138   assign(rhs);
1139 }
1140 
1141 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1142   *this = std::move(rhs);
1143 }
1144 
1145 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1146 
1147 unsigned int IEEEFloat::partCount() const {
1148   return partCountForBits(semantics->precision + 1);
1149 }
1150 
1151 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1152   return const_cast<IEEEFloat *>(this)->significandParts();
1153 }
1154 
1155 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1156   if (partCount() > 1)
1157     return significand.parts;
1158   else
1159     return &significand.part;
1160 }
1161 
1162 void IEEEFloat::zeroSignificand() {
1163   APInt::tcSet(significandParts(), 0, partCount());
1164 }
1165 
1166 /* Increment an fcNormal floating point number's significand.  */
1167 void IEEEFloat::incrementSignificand() {
1168   integerPart carry;
1169 
1170   carry = APInt::tcIncrement(significandParts(), partCount());
1171 
1172   /* Our callers should never cause us to overflow.  */
1173   assert(carry == 0);
1174   (void)carry;
1175 }
1176 
1177 /* Add the significand of the RHS.  Returns the carry flag.  */
1178 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1179   integerPart *parts;
1180 
1181   parts = significandParts();
1182 
1183   assert(semantics == rhs.semantics);
1184   assert(exponent == rhs.exponent);
1185 
1186   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1187 }
1188 
1189 /* Subtract the significand of the RHS with a borrow flag.  Returns
1190    the borrow flag.  */
1191 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1192                                                       integerPart borrow) {
1193   integerPart *parts;
1194 
1195   parts = significandParts();
1196 
1197   assert(semantics == rhs.semantics);
1198   assert(exponent == rhs.exponent);
1199 
1200   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1201                            partCount());
1202 }
1203 
1204 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1205    on to the full-precision result of the multiplication.  Returns the
1206    lost fraction.  */
1207 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1208                                             IEEEFloat addend) {
1209   unsigned int omsb;        // One, not zero, based MSB.
1210   unsigned int partsCount, newPartsCount, precision;
1211   integerPart *lhsSignificand;
1212   integerPart scratch[4];
1213   integerPart *fullSignificand;
1214   lostFraction lost_fraction;
1215   bool ignored;
1216 
1217   assert(semantics == rhs.semantics);
1218 
1219   precision = semantics->precision;
1220 
1221   // Allocate space for twice as many bits as the original significand, plus one
1222   // extra bit for the addition to overflow into.
1223   newPartsCount = partCountForBits(precision * 2 + 1);
1224 
1225   if (newPartsCount > 4)
1226     fullSignificand = new integerPart[newPartsCount];
1227   else
1228     fullSignificand = scratch;
1229 
1230   lhsSignificand = significandParts();
1231   partsCount = partCount();
1232 
1233   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1234                         rhs.significandParts(), partsCount, partsCount);
1235 
1236   lost_fraction = lfExactlyZero;
1237   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1238   exponent += rhs.exponent;
1239 
1240   // Assume the operands involved in the multiplication are single-precision
1241   // FP, and the two multiplicants are:
1242   //   *this = a23 . a22 ... a0 * 2^e1
1243   //     rhs = b23 . b22 ... b0 * 2^e2
1244   // the result of multiplication is:
1245   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1246   // Note that there are three significant bits at the left-hand side of the
1247   // radix point: two for the multiplication, and an overflow bit for the
1248   // addition (that will always be zero at this point). Move the radix point
1249   // toward left by two bits, and adjust exponent accordingly.
1250   exponent += 2;
1251 
1252   if (addend.isNonZero()) {
1253     // The intermediate result of the multiplication has "2 * precision"
1254     // signicant bit; adjust the addend to be consistent with mul result.
1255     //
1256     Significand savedSignificand = significand;
1257     const fltSemantics *savedSemantics = semantics;
1258     fltSemantics extendedSemantics;
1259     opStatus status;
1260     unsigned int extendedPrecision;
1261 
1262     // Normalize our MSB to one below the top bit to allow for overflow.
1263     extendedPrecision = 2 * precision + 1;
1264     if (omsb != extendedPrecision - 1) {
1265       assert(extendedPrecision > omsb);
1266       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1267                          (extendedPrecision - 1) - omsb);
1268       exponent -= (extendedPrecision - 1) - omsb;
1269     }
1270 
1271     /* Create new semantics.  */
1272     extendedSemantics = *semantics;
1273     extendedSemantics.precision = extendedPrecision;
1274 
1275     if (newPartsCount == 1)
1276       significand.part = fullSignificand[0];
1277     else
1278       significand.parts = fullSignificand;
1279     semantics = &extendedSemantics;
1280 
1281     // Make a copy so we can convert it to the extended semantics.
1282     // Note that we cannot convert the addend directly, as the extendedSemantics
1283     // is a local variable (which we take a reference to).
1284     IEEEFloat extendedAddend(addend);
1285     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1286     assert(status == opOK);
1287     (void)status;
1288 
1289     // Shift the significand of the addend right by one bit. This guarantees
1290     // that the high bit of the significand is zero (same as fullSignificand),
1291     // so the addition will overflow (if it does overflow at all) into the top bit.
1292     lost_fraction = extendedAddend.shiftSignificandRight(1);
1293     assert(lost_fraction == lfExactlyZero &&
1294            "Lost precision while shifting addend for fused-multiply-add.");
1295 
1296     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1297 
1298     /* Restore our state.  */
1299     if (newPartsCount == 1)
1300       fullSignificand[0] = significand.part;
1301     significand = savedSignificand;
1302     semantics = savedSemantics;
1303 
1304     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1305   }
1306 
1307   // Convert the result having "2 * precision" significant-bits back to the one
1308   // having "precision" significant-bits. First, move the radix point from
1309   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1310   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1311   exponent -= precision + 1;
1312 
1313   // In case MSB resides at the left-hand side of radix point, shift the
1314   // mantissa right by some amount to make sure the MSB reside right before
1315   // the radix point (i.e. "MSB . rest-significant-bits").
1316   //
1317   // Note that the result is not normalized when "omsb < precision". So, the
1318   // caller needs to call IEEEFloat::normalize() if normalized value is
1319   // expected.
1320   if (omsb > precision) {
1321     unsigned int bits, significantParts;
1322     lostFraction lf;
1323 
1324     bits = omsb - precision;
1325     significantParts = partCountForBits(omsb);
1326     lf = shiftRight(fullSignificand, significantParts, bits);
1327     lost_fraction = combineLostFractions(lf, lost_fraction);
1328     exponent += bits;
1329   }
1330 
1331   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1332 
1333   if (newPartsCount > 4)
1334     delete [] fullSignificand;
1335 
1336   return lost_fraction;
1337 }
1338 
1339 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1340   return multiplySignificand(rhs, IEEEFloat(*semantics));
1341 }
1342 
1343 /* Multiply the significands of LHS and RHS to DST.  */
1344 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1345   unsigned int bit, i, partsCount;
1346   const integerPart *rhsSignificand;
1347   integerPart *lhsSignificand, *dividend, *divisor;
1348   integerPart scratch[4];
1349   lostFraction lost_fraction;
1350 
1351   assert(semantics == rhs.semantics);
1352 
1353   lhsSignificand = significandParts();
1354   rhsSignificand = rhs.significandParts();
1355   partsCount = partCount();
1356 
1357   if (partsCount > 2)
1358     dividend = new integerPart[partsCount * 2];
1359   else
1360     dividend = scratch;
1361 
1362   divisor = dividend + partsCount;
1363 
1364   /* Copy the dividend and divisor as they will be modified in-place.  */
1365   for (i = 0; i < partsCount; i++) {
1366     dividend[i] = lhsSignificand[i];
1367     divisor[i] = rhsSignificand[i];
1368     lhsSignificand[i] = 0;
1369   }
1370 
1371   exponent -= rhs.exponent;
1372 
1373   unsigned int precision = semantics->precision;
1374 
1375   /* Normalize the divisor.  */
1376   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1377   if (bit) {
1378     exponent += bit;
1379     APInt::tcShiftLeft(divisor, partsCount, bit);
1380   }
1381 
1382   /* Normalize the dividend.  */
1383   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1384   if (bit) {
1385     exponent -= bit;
1386     APInt::tcShiftLeft(dividend, partsCount, bit);
1387   }
1388 
1389   /* Ensure the dividend >= divisor initially for the loop below.
1390      Incidentally, this means that the division loop below is
1391      guaranteed to set the integer bit to one.  */
1392   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1393     exponent--;
1394     APInt::tcShiftLeft(dividend, partsCount, 1);
1395     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1396   }
1397 
1398   /* Long division.  */
1399   for (bit = precision; bit; bit -= 1) {
1400     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1401       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1402       APInt::tcSetBit(lhsSignificand, bit - 1);
1403     }
1404 
1405     APInt::tcShiftLeft(dividend, partsCount, 1);
1406   }
1407 
1408   /* Figure out the lost fraction.  */
1409   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1410 
1411   if (cmp > 0)
1412     lost_fraction = lfMoreThanHalf;
1413   else if (cmp == 0)
1414     lost_fraction = lfExactlyHalf;
1415   else if (APInt::tcIsZero(dividend, partsCount))
1416     lost_fraction = lfExactlyZero;
1417   else
1418     lost_fraction = lfLessThanHalf;
1419 
1420   if (partsCount > 2)
1421     delete [] dividend;
1422 
1423   return lost_fraction;
1424 }
1425 
1426 unsigned int IEEEFloat::significandMSB() const {
1427   return APInt::tcMSB(significandParts(), partCount());
1428 }
1429 
1430 unsigned int IEEEFloat::significandLSB() const {
1431   return APInt::tcLSB(significandParts(), partCount());
1432 }
1433 
1434 /* Note that a zero result is NOT normalized to fcZero.  */
1435 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1436   /* Our exponent should not overflow.  */
1437   assert((ExponentType) (exponent + bits) >= exponent);
1438 
1439   exponent += bits;
1440 
1441   return shiftRight(significandParts(), partCount(), bits);
1442 }
1443 
1444 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1445 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1446   assert(bits < semantics->precision);
1447 
1448   if (bits) {
1449     unsigned int partsCount = partCount();
1450 
1451     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1452     exponent -= bits;
1453 
1454     assert(!APInt::tcIsZero(significandParts(), partsCount));
1455   }
1456 }
1457 
1458 IEEEFloat::cmpResult
1459 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1460   int compare;
1461 
1462   assert(semantics == rhs.semantics);
1463   assert(isFiniteNonZero());
1464   assert(rhs.isFiniteNonZero());
1465 
1466   compare = exponent - rhs.exponent;
1467 
1468   /* If exponents are equal, do an unsigned bignum comparison of the
1469      significands.  */
1470   if (compare == 0)
1471     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1472                                partCount());
1473 
1474   if (compare > 0)
1475     return cmpGreaterThan;
1476   else if (compare < 0)
1477     return cmpLessThan;
1478   else
1479     return cmpEqual;
1480 }
1481 
1482 /* Set the least significant BITS bits of a bignum, clear the
1483    rest.  */
1484 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1485                                       unsigned bits) {
1486   unsigned i = 0;
1487   while (bits > APInt::APINT_BITS_PER_WORD) {
1488     dst[i++] = ~(APInt::WordType)0;
1489     bits -= APInt::APINT_BITS_PER_WORD;
1490   }
1491 
1492   if (bits)
1493     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1494 
1495   while (i < parts)
1496     dst[i++] = 0;
1497 }
1498 
1499 /* Handle overflow.  Sign is preserved.  We either become infinity or
1500    the largest finite number.  */
1501 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1502   /* Infinity?  */
1503   if (rounding_mode == rmNearestTiesToEven ||
1504       rounding_mode == rmNearestTiesToAway ||
1505       (rounding_mode == rmTowardPositive && !sign) ||
1506       (rounding_mode == rmTowardNegative && sign)) {
1507     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1508       makeNaN(false, sign);
1509     else
1510       category = fcInfinity;
1511     return (opStatus) (opOverflow | opInexact);
1512   }
1513 
1514   /* Otherwise we become the largest finite number.  */
1515   category = fcNormal;
1516   exponent = semantics->maxExponent;
1517   tcSetLeastSignificantBits(significandParts(), partCount(),
1518                             semantics->precision);
1519   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1520       semantics->nanEncoding == fltNanEncoding::AllOnes)
1521     APInt::tcClearBit(significandParts(), 0);
1522 
1523   return opInexact;
1524 }
1525 
1526 /* Returns TRUE if, when truncating the current number, with BIT the
1527    new LSB, with the given lost fraction and rounding mode, the result
1528    would need to be rounded away from zero (i.e., by increasing the
1529    signficand).  This routine must work for fcZero of both signs, and
1530    fcNormal numbers.  */
1531 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1532                                   lostFraction lost_fraction,
1533                                   unsigned int bit) const {
1534   /* NaNs and infinities should not have lost fractions.  */
1535   assert(isFiniteNonZero() || category == fcZero);
1536 
1537   /* Current callers never pass this so we don't handle it.  */
1538   assert(lost_fraction != lfExactlyZero);
1539 
1540   switch (rounding_mode) {
1541   case rmNearestTiesToAway:
1542     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1543 
1544   case rmNearestTiesToEven:
1545     if (lost_fraction == lfMoreThanHalf)
1546       return true;
1547 
1548     /* Our zeroes don't have a significand to test.  */
1549     if (lost_fraction == lfExactlyHalf && category != fcZero)
1550       return APInt::tcExtractBit(significandParts(), bit);
1551 
1552     return false;
1553 
1554   case rmTowardZero:
1555     return false;
1556 
1557   case rmTowardPositive:
1558     return !sign;
1559 
1560   case rmTowardNegative:
1561     return sign;
1562 
1563   default:
1564     break;
1565   }
1566   llvm_unreachable("Invalid rounding mode found");
1567 }
1568 
1569 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1570                                          lostFraction lost_fraction) {
1571   unsigned int omsb;                /* One, not zero, based MSB.  */
1572   int exponentChange;
1573 
1574   if (!isFiniteNonZero())
1575     return opOK;
1576 
1577   /* Before rounding normalize the exponent of fcNormal numbers.  */
1578   omsb = significandMSB() + 1;
1579 
1580   if (omsb) {
1581     /* OMSB is numbered from 1.  We want to place it in the integer
1582        bit numbered PRECISION if possible, with a compensating change in
1583        the exponent.  */
1584     exponentChange = omsb - semantics->precision;
1585 
1586     /* If the resulting exponent is too high, overflow according to
1587        the rounding mode.  */
1588     if (exponent + exponentChange > semantics->maxExponent)
1589       return handleOverflow(rounding_mode);
1590 
1591     /* Subnormal numbers have exponent minExponent, and their MSB
1592        is forced based on that.  */
1593     if (exponent + exponentChange < semantics->minExponent)
1594       exponentChange = semantics->minExponent - exponent;
1595 
1596     /* Shifting left is easy as we don't lose precision.  */
1597     if (exponentChange < 0) {
1598       assert(lost_fraction == lfExactlyZero);
1599 
1600       shiftSignificandLeft(-exponentChange);
1601 
1602       return opOK;
1603     }
1604 
1605     if (exponentChange > 0) {
1606       lostFraction lf;
1607 
1608       /* Shift right and capture any new lost fraction.  */
1609       lf = shiftSignificandRight(exponentChange);
1610 
1611       lost_fraction = combineLostFractions(lf, lost_fraction);
1612 
1613       /* Keep OMSB up-to-date.  */
1614       if (omsb > (unsigned) exponentChange)
1615         omsb -= exponentChange;
1616       else
1617         omsb = 0;
1618     }
1619   }
1620 
1621   // The all-ones values is an overflow if NaN is all ones. If NaN is
1622   // represented by negative zero, then it is a valid finite value.
1623   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1624       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1625       exponent == semantics->maxExponent && isSignificandAllOnes())
1626     return handleOverflow(rounding_mode);
1627 
1628   /* Now round the number according to rounding_mode given the lost
1629      fraction.  */
1630 
1631   /* As specified in IEEE 754, since we do not trap we do not report
1632      underflow for exact results.  */
1633   if (lost_fraction == lfExactlyZero) {
1634     /* Canonicalize zeroes.  */
1635     if (omsb == 0) {
1636       category = fcZero;
1637       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1638         sign = false;
1639     }
1640 
1641     return opOK;
1642   }
1643 
1644   /* Increment the significand if we're rounding away from zero.  */
1645   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1646     if (omsb == 0)
1647       exponent = semantics->minExponent;
1648 
1649     incrementSignificand();
1650     omsb = significandMSB() + 1;
1651 
1652     /* Did the significand increment overflow?  */
1653     if (omsb == (unsigned) semantics->precision + 1) {
1654       /* Renormalize by incrementing the exponent and shifting our
1655          significand right one.  However if we already have the
1656          maximum exponent we overflow to infinity.  */
1657       if (exponent == semantics->maxExponent)
1658         // Invoke overflow handling with a rounding mode that will guarantee
1659         // that the result gets turned into the correct infinity representation.
1660         // This is needed instead of just setting the category to infinity to
1661         // account for 8-bit floating point types that have no inf, only NaN.
1662         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1663 
1664       shiftSignificandRight(1);
1665 
1666       return opInexact;
1667     }
1668 
1669     // The all-ones values is an overflow if NaN is all ones. If NaN is
1670     // represented by negative zero, then it is a valid finite value.
1671     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1672         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1673         exponent == semantics->maxExponent && isSignificandAllOnes())
1674       return handleOverflow(rounding_mode);
1675   }
1676 
1677   /* The normal case - we were and are not denormal, and any
1678      significand increment above didn't overflow.  */
1679   if (omsb == semantics->precision)
1680     return opInexact;
1681 
1682   /* We have a non-zero denormal.  */
1683   assert(omsb < semantics->precision);
1684 
1685   /* Canonicalize zeroes.  */
1686   if (omsb == 0) {
1687     category = fcZero;
1688     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1689       sign = false;
1690   }
1691 
1692   /* The fcZero case is a denormal that underflowed to zero.  */
1693   return (opStatus) (opUnderflow | opInexact);
1694 }
1695 
1696 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1697                                                      bool subtract) {
1698   switch (PackCategoriesIntoKey(category, rhs.category)) {
1699   default:
1700     llvm_unreachable(nullptr);
1701 
1702   case PackCategoriesIntoKey(fcZero, fcNaN):
1703   case PackCategoriesIntoKey(fcNormal, fcNaN):
1704   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1705     assign(rhs);
1706     [[fallthrough]];
1707   case PackCategoriesIntoKey(fcNaN, fcZero):
1708   case PackCategoriesIntoKey(fcNaN, fcNormal):
1709   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1710   case PackCategoriesIntoKey(fcNaN, fcNaN):
1711     if (isSignaling()) {
1712       makeQuiet();
1713       return opInvalidOp;
1714     }
1715     return rhs.isSignaling() ? opInvalidOp : opOK;
1716 
1717   case PackCategoriesIntoKey(fcNormal, fcZero):
1718   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1719   case PackCategoriesIntoKey(fcInfinity, fcZero):
1720     return opOK;
1721 
1722   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1723   case PackCategoriesIntoKey(fcZero, fcInfinity):
1724     category = fcInfinity;
1725     sign = rhs.sign ^ subtract;
1726     return opOK;
1727 
1728   case PackCategoriesIntoKey(fcZero, fcNormal):
1729     assign(rhs);
1730     sign = rhs.sign ^ subtract;
1731     return opOK;
1732 
1733   case PackCategoriesIntoKey(fcZero, fcZero):
1734     /* Sign depends on rounding mode; handled by caller.  */
1735     return opOK;
1736 
1737   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1738     /* Differently signed infinities can only be validly
1739        subtracted.  */
1740     if (((sign ^ rhs.sign)!=0) != subtract) {
1741       makeNaN();
1742       return opInvalidOp;
1743     }
1744 
1745     return opOK;
1746 
1747   case PackCategoriesIntoKey(fcNormal, fcNormal):
1748     return opDivByZero;
1749   }
1750 }
1751 
1752 /* Add or subtract two normal numbers.  */
1753 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1754                                                  bool subtract) {
1755   integerPart carry;
1756   lostFraction lost_fraction;
1757   int bits;
1758 
1759   /* Determine if the operation on the absolute values is effectively
1760      an addition or subtraction.  */
1761   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1762 
1763   /* Are we bigger exponent-wise than the RHS?  */
1764   bits = exponent - rhs.exponent;
1765 
1766   /* Subtraction is more subtle than one might naively expect.  */
1767   if (subtract) {
1768     IEEEFloat temp_rhs(rhs);
1769 
1770     if (bits == 0)
1771       lost_fraction = lfExactlyZero;
1772     else if (bits > 0) {
1773       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1774       shiftSignificandLeft(1);
1775     } else {
1776       lost_fraction = shiftSignificandRight(-bits - 1);
1777       temp_rhs.shiftSignificandLeft(1);
1778     }
1779 
1780     // Should we reverse the subtraction.
1781     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1782       carry = temp_rhs.subtractSignificand
1783         (*this, lost_fraction != lfExactlyZero);
1784       copySignificand(temp_rhs);
1785       sign = !sign;
1786     } else {
1787       carry = subtractSignificand
1788         (temp_rhs, lost_fraction != lfExactlyZero);
1789     }
1790 
1791     /* Invert the lost fraction - it was on the RHS and
1792        subtracted.  */
1793     if (lost_fraction == lfLessThanHalf)
1794       lost_fraction = lfMoreThanHalf;
1795     else if (lost_fraction == lfMoreThanHalf)
1796       lost_fraction = lfLessThanHalf;
1797 
1798     /* The code above is intended to ensure that no borrow is
1799        necessary.  */
1800     assert(!carry);
1801     (void)carry;
1802   } else {
1803     if (bits > 0) {
1804       IEEEFloat temp_rhs(rhs);
1805 
1806       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1807       carry = addSignificand(temp_rhs);
1808     } else {
1809       lost_fraction = shiftSignificandRight(-bits);
1810       carry = addSignificand(rhs);
1811     }
1812 
1813     /* We have a guard bit; generating a carry cannot happen.  */
1814     assert(!carry);
1815     (void)carry;
1816   }
1817 
1818   return lost_fraction;
1819 }
1820 
1821 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1822   switch (PackCategoriesIntoKey(category, rhs.category)) {
1823   default:
1824     llvm_unreachable(nullptr);
1825 
1826   case PackCategoriesIntoKey(fcZero, fcNaN):
1827   case PackCategoriesIntoKey(fcNormal, fcNaN):
1828   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1829     assign(rhs);
1830     sign = false;
1831     [[fallthrough]];
1832   case PackCategoriesIntoKey(fcNaN, fcZero):
1833   case PackCategoriesIntoKey(fcNaN, fcNormal):
1834   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1835   case PackCategoriesIntoKey(fcNaN, fcNaN):
1836     sign ^= rhs.sign; // restore the original sign
1837     if (isSignaling()) {
1838       makeQuiet();
1839       return opInvalidOp;
1840     }
1841     return rhs.isSignaling() ? opInvalidOp : opOK;
1842 
1843   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1844   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1845   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1846     category = fcInfinity;
1847     return opOK;
1848 
1849   case PackCategoriesIntoKey(fcZero, fcNormal):
1850   case PackCategoriesIntoKey(fcNormal, fcZero):
1851   case PackCategoriesIntoKey(fcZero, fcZero):
1852     category = fcZero;
1853     return opOK;
1854 
1855   case PackCategoriesIntoKey(fcZero, fcInfinity):
1856   case PackCategoriesIntoKey(fcInfinity, fcZero):
1857     makeNaN();
1858     return opInvalidOp;
1859 
1860   case PackCategoriesIntoKey(fcNormal, fcNormal):
1861     return opOK;
1862   }
1863 }
1864 
1865 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1866   switch (PackCategoriesIntoKey(category, rhs.category)) {
1867   default:
1868     llvm_unreachable(nullptr);
1869 
1870   case PackCategoriesIntoKey(fcZero, fcNaN):
1871   case PackCategoriesIntoKey(fcNormal, fcNaN):
1872   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1873     assign(rhs);
1874     sign = false;
1875     [[fallthrough]];
1876   case PackCategoriesIntoKey(fcNaN, fcZero):
1877   case PackCategoriesIntoKey(fcNaN, fcNormal):
1878   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1879   case PackCategoriesIntoKey(fcNaN, fcNaN):
1880     sign ^= rhs.sign; // restore the original sign
1881     if (isSignaling()) {
1882       makeQuiet();
1883       return opInvalidOp;
1884     }
1885     return rhs.isSignaling() ? opInvalidOp : opOK;
1886 
1887   case PackCategoriesIntoKey(fcInfinity, fcZero):
1888   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1889   case PackCategoriesIntoKey(fcZero, fcInfinity):
1890   case PackCategoriesIntoKey(fcZero, fcNormal):
1891     return opOK;
1892 
1893   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1894     category = fcZero;
1895     return opOK;
1896 
1897   case PackCategoriesIntoKey(fcNormal, fcZero):
1898     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1899       makeNaN(false, sign);
1900     else
1901       category = fcInfinity;
1902     return opDivByZero;
1903 
1904   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1905   case PackCategoriesIntoKey(fcZero, fcZero):
1906     makeNaN();
1907     return opInvalidOp;
1908 
1909   case PackCategoriesIntoKey(fcNormal, fcNormal):
1910     return opOK;
1911   }
1912 }
1913 
1914 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1915   switch (PackCategoriesIntoKey(category, rhs.category)) {
1916   default:
1917     llvm_unreachable(nullptr);
1918 
1919   case PackCategoriesIntoKey(fcZero, fcNaN):
1920   case PackCategoriesIntoKey(fcNormal, fcNaN):
1921   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1922     assign(rhs);
1923     [[fallthrough]];
1924   case PackCategoriesIntoKey(fcNaN, fcZero):
1925   case PackCategoriesIntoKey(fcNaN, fcNormal):
1926   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1927   case PackCategoriesIntoKey(fcNaN, fcNaN):
1928     if (isSignaling()) {
1929       makeQuiet();
1930       return opInvalidOp;
1931     }
1932     return rhs.isSignaling() ? opInvalidOp : opOK;
1933 
1934   case PackCategoriesIntoKey(fcZero, fcInfinity):
1935   case PackCategoriesIntoKey(fcZero, fcNormal):
1936   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1937     return opOK;
1938 
1939   case PackCategoriesIntoKey(fcNormal, fcZero):
1940   case PackCategoriesIntoKey(fcInfinity, fcZero):
1941   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1942   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1943   case PackCategoriesIntoKey(fcZero, fcZero):
1944     makeNaN();
1945     return opInvalidOp;
1946 
1947   case PackCategoriesIntoKey(fcNormal, fcNormal):
1948     return opOK;
1949   }
1950 }
1951 
1952 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1953   switch (PackCategoriesIntoKey(category, rhs.category)) {
1954   default:
1955     llvm_unreachable(nullptr);
1956 
1957   case PackCategoriesIntoKey(fcZero, fcNaN):
1958   case PackCategoriesIntoKey(fcNormal, fcNaN):
1959   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1960     assign(rhs);
1961     [[fallthrough]];
1962   case PackCategoriesIntoKey(fcNaN, fcZero):
1963   case PackCategoriesIntoKey(fcNaN, fcNormal):
1964   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1965   case PackCategoriesIntoKey(fcNaN, fcNaN):
1966     if (isSignaling()) {
1967       makeQuiet();
1968       return opInvalidOp;
1969     }
1970     return rhs.isSignaling() ? opInvalidOp : opOK;
1971 
1972   case PackCategoriesIntoKey(fcZero, fcInfinity):
1973   case PackCategoriesIntoKey(fcZero, fcNormal):
1974   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1975     return opOK;
1976 
1977   case PackCategoriesIntoKey(fcNormal, fcZero):
1978   case PackCategoriesIntoKey(fcInfinity, fcZero):
1979   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1980   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1981   case PackCategoriesIntoKey(fcZero, fcZero):
1982     makeNaN();
1983     return opInvalidOp;
1984 
1985   case PackCategoriesIntoKey(fcNormal, fcNormal):
1986     return opDivByZero; // fake status, indicating this is not a special case
1987   }
1988 }
1989 
1990 /* Change sign.  */
1991 void IEEEFloat::changeSign() {
1992   // With NaN-as-negative-zero, neither NaN or negative zero can change
1993   // their signs.
1994   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1995       (isZero() || isNaN()))
1996     return;
1997   /* Look mummy, this one's easy.  */
1998   sign = !sign;
1999 }
2000 
2001 /* Normalized addition or subtraction.  */
2002 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2003                                              roundingMode rounding_mode,
2004                                              bool subtract) {
2005   opStatus fs;
2006 
2007   fs = addOrSubtractSpecials(rhs, subtract);
2008 
2009   /* This return code means it was not a simple case.  */
2010   if (fs == opDivByZero) {
2011     lostFraction lost_fraction;
2012 
2013     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2014     fs = normalize(rounding_mode, lost_fraction);
2015 
2016     /* Can only be zero if we lost no fraction.  */
2017     assert(category != fcZero || lost_fraction == lfExactlyZero);
2018   }
2019 
2020   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2021      positive zero unless rounding to minus infinity, except that
2022      adding two like-signed zeroes gives that zero.  */
2023   if (category == fcZero) {
2024     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2025       sign = (rounding_mode == rmTowardNegative);
2026     // NaN-in-negative-zero means zeros need to be normalized to +0.
2027     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2028       sign = false;
2029   }
2030 
2031   return fs;
2032 }
2033 
2034 /* Normalized addition.  */
2035 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2036                                    roundingMode rounding_mode) {
2037   return addOrSubtract(rhs, rounding_mode, false);
2038 }
2039 
2040 /* Normalized subtraction.  */
2041 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2042                                         roundingMode rounding_mode) {
2043   return addOrSubtract(rhs, rounding_mode, true);
2044 }
2045 
2046 /* Normalized multiply.  */
2047 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2048                                         roundingMode rounding_mode) {
2049   opStatus fs;
2050 
2051   sign ^= rhs.sign;
2052   fs = multiplySpecials(rhs);
2053 
2054   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2055     sign = false;
2056   if (isFiniteNonZero()) {
2057     lostFraction lost_fraction = multiplySignificand(rhs);
2058     fs = normalize(rounding_mode, lost_fraction);
2059     if (lost_fraction != lfExactlyZero)
2060       fs = (opStatus) (fs | opInexact);
2061   }
2062 
2063   return fs;
2064 }
2065 
2066 /* Normalized divide.  */
2067 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2068                                       roundingMode rounding_mode) {
2069   opStatus fs;
2070 
2071   sign ^= rhs.sign;
2072   fs = divideSpecials(rhs);
2073 
2074   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2075     sign = false;
2076   if (isFiniteNonZero()) {
2077     lostFraction lost_fraction = divideSignificand(rhs);
2078     fs = normalize(rounding_mode, lost_fraction);
2079     if (lost_fraction != lfExactlyZero)
2080       fs = (opStatus) (fs | opInexact);
2081   }
2082 
2083   return fs;
2084 }
2085 
2086 /* Normalized remainder.  */
2087 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2088   opStatus fs;
2089   unsigned int origSign = sign;
2090 
2091   // First handle the special cases.
2092   fs = remainderSpecials(rhs);
2093   if (fs != opDivByZero)
2094     return fs;
2095 
2096   fs = opOK;
2097 
2098   // Make sure the current value is less than twice the denom. If the addition
2099   // did not succeed (an overflow has happened), which means that the finite
2100   // value we currently posses must be less than twice the denom (as we are
2101   // using the same semantics).
2102   IEEEFloat P2 = rhs;
2103   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2104     fs = mod(P2);
2105     assert(fs == opOK);
2106   }
2107 
2108   // Lets work with absolute numbers.
2109   IEEEFloat P = rhs;
2110   P.sign = false;
2111   sign = false;
2112 
2113   //
2114   // To calculate the remainder we use the following scheme.
2115   //
2116   // The remainder is defained as follows:
2117   //
2118   // remainder = numer - rquot * denom = x - r * p
2119   //
2120   // Where r is the result of: x/p, rounded toward the nearest integral value
2121   // (with halfway cases rounded toward the even number).
2122   //
2123   // Currently, (after x mod 2p):
2124   // r is the number of 2p's present inside x, which is inherently, an even
2125   // number of p's.
2126   //
2127   // We may split the remaining calculation into 4 options:
2128   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2129   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2130   //   are done as well.
2131   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2132   //   to subtract 1p at least once.
2133   // - if x >= p then we must subtract p at least once, as x must be a
2134   //   remainder.
2135   //
2136   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2137   //
2138   // We can now split the remaining calculation to the following 3 options:
2139   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2140   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2141   //   must round up to the next even number. so we must subtract p once more.
2142   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2143   //   integral, and subtract p once more.
2144   //
2145 
2146   // Extend the semantics to prevent an overflow/underflow or inexact result.
2147   bool losesInfo;
2148   fltSemantics extendedSemantics = *semantics;
2149   extendedSemantics.maxExponent++;
2150   extendedSemantics.minExponent--;
2151   extendedSemantics.precision += 2;
2152 
2153   IEEEFloat VEx = *this;
2154   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155   assert(fs == opOK && !losesInfo);
2156   IEEEFloat PEx = P;
2157   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2158   assert(fs == opOK && !losesInfo);
2159 
2160   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2161   // any fraction.
2162   fs = VEx.add(VEx, rmNearestTiesToEven);
2163   assert(fs == opOK);
2164 
2165   if (VEx.compare(PEx) == cmpGreaterThan) {
2166     fs = subtract(P, rmNearestTiesToEven);
2167     assert(fs == opOK);
2168 
2169     // Make VEx = this.add(this), but because we have different semantics, we do
2170     // not want to `convert` again, so we just subtract PEx twice (which equals
2171     // to the desired value).
2172     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2173     assert(fs == opOK);
2174     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2175     assert(fs == opOK);
2176 
2177     cmpResult result = VEx.compare(PEx);
2178     if (result == cmpGreaterThan || result == cmpEqual) {
2179       fs = subtract(P, rmNearestTiesToEven);
2180       assert(fs == opOK);
2181     }
2182   }
2183 
2184   if (isZero()) {
2185     sign = origSign;    // IEEE754 requires this
2186     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2187       // But some 8-bit floats only have positive 0.
2188       sign = false;
2189   }
2190 
2191   else
2192     sign ^= origSign;
2193   return fs;
2194 }
2195 
2196 /* Normalized llvm frem (C fmod). */
2197 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2198   opStatus fs;
2199   fs = modSpecials(rhs);
2200   unsigned int origSign = sign;
2201 
2202   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2203          compareAbsoluteValue(rhs) != cmpLessThan) {
2204     int Exp = ilogb(*this) - ilogb(rhs);
2205     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2206     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2207     // check for it.
2208     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2209       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2210     V.sign = sign;
2211 
2212     fs = subtract(V, rmNearestTiesToEven);
2213     assert(fs==opOK);
2214   }
2215   if (isZero()) {
2216     sign = origSign; // fmod requires this
2217     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2218       sign = false;
2219   }
2220   return fs;
2221 }
2222 
2223 /* Normalized fused-multiply-add.  */
2224 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2225                                                 const IEEEFloat &addend,
2226                                                 roundingMode rounding_mode) {
2227   opStatus fs;
2228 
2229   /* Post-multiplication sign, before addition.  */
2230   sign ^= multiplicand.sign;
2231 
2232   /* If and only if all arguments are normal do we need to do an
2233      extended-precision calculation.  */
2234   if (isFiniteNonZero() &&
2235       multiplicand.isFiniteNonZero() &&
2236       addend.isFinite()) {
2237     lostFraction lost_fraction;
2238 
2239     lost_fraction = multiplySignificand(multiplicand, addend);
2240     fs = normalize(rounding_mode, lost_fraction);
2241     if (lost_fraction != lfExactlyZero)
2242       fs = (opStatus) (fs | opInexact);
2243 
2244     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2245        positive zero unless rounding to minus infinity, except that
2246        adding two like-signed zeroes gives that zero.  */
2247     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2248       sign = (rounding_mode == rmTowardNegative);
2249       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2250         sign = false;
2251     }
2252   } else {
2253     fs = multiplySpecials(multiplicand);
2254 
2255     /* FS can only be opOK or opInvalidOp.  There is no more work
2256        to do in the latter case.  The IEEE-754R standard says it is
2257        implementation-defined in this case whether, if ADDEND is a
2258        quiet NaN, we raise invalid op; this implementation does so.
2259 
2260        If we need to do the addition we can do so with normal
2261        precision.  */
2262     if (fs == opOK)
2263       fs = addOrSubtract(addend, rounding_mode, false);
2264   }
2265 
2266   return fs;
2267 }
2268 
2269 /* Rounding-mode correct round to integral value.  */
2270 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2271   opStatus fs;
2272 
2273   if (isInfinity())
2274     // [IEEE Std 754-2008 6.1]:
2275     // The behavior of infinity in floating-point arithmetic is derived from the
2276     // limiting cases of real arithmetic with operands of arbitrarily
2277     // large magnitude, when such a limit exists.
2278     // ...
2279     // Operations on infinite operands are usually exact and therefore signal no
2280     // exceptions ...
2281     return opOK;
2282 
2283   if (isNaN()) {
2284     if (isSignaling()) {
2285       // [IEEE Std 754-2008 6.2]:
2286       // Under default exception handling, any operation signaling an invalid
2287       // operation exception and for which a floating-point result is to be
2288       // delivered shall deliver a quiet NaN.
2289       makeQuiet();
2290       // [IEEE Std 754-2008 6.2]:
2291       // Signaling NaNs shall be reserved operands that, under default exception
2292       // handling, signal the invalid operation exception(see 7.2) for every
2293       // general-computational and signaling-computational operation except for
2294       // the conversions described in 5.12.
2295       return opInvalidOp;
2296     } else {
2297       // [IEEE Std 754-2008 6.2]:
2298       // For an operation with quiet NaN inputs, other than maximum and minimum
2299       // operations, if a floating-point result is to be delivered the result
2300       // shall be a quiet NaN which should be one of the input NaNs.
2301       // ...
2302       // Every general-computational and quiet-computational operation involving
2303       // one or more input NaNs, none of them signaling, shall signal no
2304       // exception, except fusedMultiplyAdd might signal the invalid operation
2305       // exception(see 7.2).
2306       return opOK;
2307     }
2308   }
2309 
2310   if (isZero()) {
2311     // [IEEE Std 754-2008 6.3]:
2312     // ... the sign of the result of conversions, the quantize operation, the
2313     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2314     // the sign of the first or only operand.
2315     return opOK;
2316   }
2317 
2318   // If the exponent is large enough, we know that this value is already
2319   // integral, and the arithmetic below would potentially cause it to saturate
2320   // to +/-Inf.  Bail out early instead.
2321   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2322     return opOK;
2323 
2324   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2325   // precision of our format, and then subtract it back off again.  The choice
2326   // of rounding modes for the addition/subtraction determines the rounding mode
2327   // for our integral rounding as well.
2328   // NOTE: When the input value is negative, we do subtraction followed by
2329   // addition instead.
2330   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2331   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2332   IEEEFloat MagicConstant(*semantics);
2333   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2334                                       rmNearestTiesToEven);
2335   assert(fs == opOK);
2336   MagicConstant.sign = sign;
2337 
2338   // Preserve the input sign so that we can handle the case of zero result
2339   // correctly.
2340   bool inputSign = isNegative();
2341 
2342   fs = add(MagicConstant, rounding_mode);
2343 
2344   // Current value and 'MagicConstant' are both integers, so the result of the
2345   // subtraction is always exact according to Sterbenz' lemma.
2346   subtract(MagicConstant, rounding_mode);
2347 
2348   // Restore the input sign.
2349   if (inputSign != isNegative())
2350     changeSign();
2351 
2352   return fs;
2353 }
2354 
2355 
2356 /* Comparison requires normalized numbers.  */
2357 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2358   cmpResult result;
2359 
2360   assert(semantics == rhs.semantics);
2361 
2362   switch (PackCategoriesIntoKey(category, rhs.category)) {
2363   default:
2364     llvm_unreachable(nullptr);
2365 
2366   case PackCategoriesIntoKey(fcNaN, fcZero):
2367   case PackCategoriesIntoKey(fcNaN, fcNormal):
2368   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2369   case PackCategoriesIntoKey(fcNaN, fcNaN):
2370   case PackCategoriesIntoKey(fcZero, fcNaN):
2371   case PackCategoriesIntoKey(fcNormal, fcNaN):
2372   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2373     return cmpUnordered;
2374 
2375   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2376   case PackCategoriesIntoKey(fcInfinity, fcZero):
2377   case PackCategoriesIntoKey(fcNormal, fcZero):
2378     if (sign)
2379       return cmpLessThan;
2380     else
2381       return cmpGreaterThan;
2382 
2383   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2384   case PackCategoriesIntoKey(fcZero, fcInfinity):
2385   case PackCategoriesIntoKey(fcZero, fcNormal):
2386     if (rhs.sign)
2387       return cmpGreaterThan;
2388     else
2389       return cmpLessThan;
2390 
2391   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2392     if (sign == rhs.sign)
2393       return cmpEqual;
2394     else if (sign)
2395       return cmpLessThan;
2396     else
2397       return cmpGreaterThan;
2398 
2399   case PackCategoriesIntoKey(fcZero, fcZero):
2400     return cmpEqual;
2401 
2402   case PackCategoriesIntoKey(fcNormal, fcNormal):
2403     break;
2404   }
2405 
2406   /* Two normal numbers.  Do they have the same sign?  */
2407   if (sign != rhs.sign) {
2408     if (sign)
2409       result = cmpLessThan;
2410     else
2411       result = cmpGreaterThan;
2412   } else {
2413     /* Compare absolute values; invert result if negative.  */
2414     result = compareAbsoluteValue(rhs);
2415 
2416     if (sign) {
2417       if (result == cmpLessThan)
2418         result = cmpGreaterThan;
2419       else if (result == cmpGreaterThan)
2420         result = cmpLessThan;
2421     }
2422   }
2423 
2424   return result;
2425 }
2426 
2427 /// IEEEFloat::convert - convert a value of one floating point type to another.
2428 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2429 /// records whether the transformation lost information, i.e. whether
2430 /// converting the result back to the original type will produce the
2431 /// original value (this is almost the same as return value==fsOK, but there
2432 /// are edge cases where this is not so).
2433 
2434 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2435                                        roundingMode rounding_mode,
2436                                        bool *losesInfo) {
2437   lostFraction lostFraction;
2438   unsigned int newPartCount, oldPartCount;
2439   opStatus fs;
2440   int shift;
2441   const fltSemantics &fromSemantics = *semantics;
2442   bool is_signaling = isSignaling();
2443 
2444   lostFraction = lfExactlyZero;
2445   newPartCount = partCountForBits(toSemantics.precision + 1);
2446   oldPartCount = partCount();
2447   shift = toSemantics.precision - fromSemantics.precision;
2448 
2449   bool X86SpecialNan = false;
2450   if (&fromSemantics == &semX87DoubleExtended &&
2451       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2452       (!(*significandParts() & 0x8000000000000000ULL) ||
2453        !(*significandParts() & 0x4000000000000000ULL))) {
2454     // x86 has some unusual NaNs which cannot be represented in any other
2455     // format; note them here.
2456     X86SpecialNan = true;
2457   }
2458 
2459   // If this is a truncation of a denormal number, and the target semantics
2460   // has larger exponent range than the source semantics (this can happen
2461   // when truncating from PowerPC double-double to double format), the
2462   // right shift could lose result mantissa bits.  Adjust exponent instead
2463   // of performing excessive shift.
2464   // Also do a similar trick in case shifting denormal would produce zero
2465   // significand as this case isn't handled correctly by normalize.
2466   if (shift < 0 && isFiniteNonZero()) {
2467     int omsb = significandMSB() + 1;
2468     int exponentChange = omsb - fromSemantics.precision;
2469     if (exponent + exponentChange < toSemantics.minExponent)
2470       exponentChange = toSemantics.minExponent - exponent;
2471     if (exponentChange < shift)
2472       exponentChange = shift;
2473     if (exponentChange < 0) {
2474       shift -= exponentChange;
2475       exponent += exponentChange;
2476     } else if (omsb <= -shift) {
2477       exponentChange = omsb + shift - 1; // leave at least one bit set
2478       shift -= exponentChange;
2479       exponent += exponentChange;
2480     }
2481   }
2482 
2483   // If this is a truncation, perform the shift before we narrow the storage.
2484   if (shift < 0 && (isFiniteNonZero() ||
2485                     (category == fcNaN && semantics->nonFiniteBehavior !=
2486                                               fltNonfiniteBehavior::NanOnly)))
2487     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2488 
2489   // Fix the storage so it can hold to new value.
2490   if (newPartCount > oldPartCount) {
2491     // The new type requires more storage; make it available.
2492     integerPart *newParts;
2493     newParts = new integerPart[newPartCount];
2494     APInt::tcSet(newParts, 0, newPartCount);
2495     if (isFiniteNonZero() || category==fcNaN)
2496       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2497     freeSignificand();
2498     significand.parts = newParts;
2499   } else if (newPartCount == 1 && oldPartCount != 1) {
2500     // Switch to built-in storage for a single part.
2501     integerPart newPart = 0;
2502     if (isFiniteNonZero() || category==fcNaN)
2503       newPart = significandParts()[0];
2504     freeSignificand();
2505     significand.part = newPart;
2506   }
2507 
2508   // Now that we have the right storage, switch the semantics.
2509   semantics = &toSemantics;
2510 
2511   // If this is an extension, perform the shift now that the storage is
2512   // available.
2513   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2514     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2515 
2516   if (isFiniteNonZero()) {
2517     fs = normalize(rounding_mode, lostFraction);
2518     *losesInfo = (fs != opOK);
2519   } else if (category == fcNaN) {
2520     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2521       *losesInfo =
2522           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2523       makeNaN(false, sign);
2524       return is_signaling ? opInvalidOp : opOK;
2525     }
2526 
2527     // If NaN is negative zero, we need to create a new NaN to avoid converting
2528     // NaN to -Inf.
2529     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2530         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2531       makeNaN(false, false);
2532 
2533     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2534 
2535     // For x87 extended precision, we want to make a NaN, not a special NaN if
2536     // the input wasn't special either.
2537     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2538       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2539 
2540     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2541     // This also guarantees that a sNaN does not become Inf on a truncation
2542     // that loses all payload bits.
2543     if (is_signaling) {
2544       makeQuiet();
2545       fs = opInvalidOp;
2546     } else {
2547       fs = opOK;
2548     }
2549   } else if (category == fcInfinity &&
2550              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2551     makeNaN(false, sign);
2552     *losesInfo = true;
2553     fs = opInexact;
2554   } else if (category == fcZero &&
2555              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2556     // Negative zero loses info, but positive zero doesn't.
2557     *losesInfo =
2558         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2559     fs = *losesInfo ? opInexact : opOK;
2560     // NaN is negative zero means -0 -> +0, which can lose information
2561     sign = false;
2562   } else {
2563     *losesInfo = false;
2564     fs = opOK;
2565   }
2566 
2567   return fs;
2568 }
2569 
2570 /* Convert a floating point number to an integer according to the
2571    rounding mode.  If the rounded integer value is out of range this
2572    returns an invalid operation exception and the contents of the
2573    destination parts are unspecified.  If the rounded value is in
2574    range but the floating point number is not the exact integer, the C
2575    standard doesn't require an inexact exception to be raised.  IEEE
2576    854 does require it so we do that.
2577 
2578    Note that for conversions to integer type the C standard requires
2579    round-to-zero to always be used.  */
2580 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2581     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2582     roundingMode rounding_mode, bool *isExact) const {
2583   lostFraction lost_fraction;
2584   const integerPart *src;
2585   unsigned int dstPartsCount, truncatedBits;
2586 
2587   *isExact = false;
2588 
2589   /* Handle the three special cases first.  */
2590   if (category == fcInfinity || category == fcNaN)
2591     return opInvalidOp;
2592 
2593   dstPartsCount = partCountForBits(width);
2594   assert(dstPartsCount <= parts.size() && "Integer too big");
2595 
2596   if (category == fcZero) {
2597     APInt::tcSet(parts.data(), 0, dstPartsCount);
2598     // Negative zero can't be represented as an int.
2599     *isExact = !sign;
2600     return opOK;
2601   }
2602 
2603   src = significandParts();
2604 
2605   /* Step 1: place our absolute value, with any fraction truncated, in
2606      the destination.  */
2607   if (exponent < 0) {
2608     /* Our absolute value is less than one; truncate everything.  */
2609     APInt::tcSet(parts.data(), 0, dstPartsCount);
2610     /* For exponent -1 the integer bit represents .5, look at that.
2611        For smaller exponents leftmost truncated bit is 0. */
2612     truncatedBits = semantics->precision -1U - exponent;
2613   } else {
2614     /* We want the most significant (exponent + 1) bits; the rest are
2615        truncated.  */
2616     unsigned int bits = exponent + 1U;
2617 
2618     /* Hopelessly large in magnitude?  */
2619     if (bits > width)
2620       return opInvalidOp;
2621 
2622     if (bits < semantics->precision) {
2623       /* We truncate (semantics->precision - bits) bits.  */
2624       truncatedBits = semantics->precision - bits;
2625       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2626     } else {
2627       /* We want at least as many bits as are available.  */
2628       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2629                        0);
2630       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2631                          bits - semantics->precision);
2632       truncatedBits = 0;
2633     }
2634   }
2635 
2636   /* Step 2: work out any lost fraction, and increment the absolute
2637      value if we would round away from zero.  */
2638   if (truncatedBits) {
2639     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2640                                                   truncatedBits);
2641     if (lost_fraction != lfExactlyZero &&
2642         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2643       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2644         return opInvalidOp;     /* Overflow.  */
2645     }
2646   } else {
2647     lost_fraction = lfExactlyZero;
2648   }
2649 
2650   /* Step 3: check if we fit in the destination.  */
2651   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2652 
2653   if (sign) {
2654     if (!isSigned) {
2655       /* Negative numbers cannot be represented as unsigned.  */
2656       if (omsb != 0)
2657         return opInvalidOp;
2658     } else {
2659       /* It takes omsb bits to represent the unsigned integer value.
2660          We lose a bit for the sign, but care is needed as the
2661          maximally negative integer is a special case.  */
2662       if (omsb == width &&
2663           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2664         return opInvalidOp;
2665 
2666       /* This case can happen because of rounding.  */
2667       if (omsb > width)
2668         return opInvalidOp;
2669     }
2670 
2671     APInt::tcNegate (parts.data(), dstPartsCount);
2672   } else {
2673     if (omsb >= width + !isSigned)
2674       return opInvalidOp;
2675   }
2676 
2677   if (lost_fraction == lfExactlyZero) {
2678     *isExact = true;
2679     return opOK;
2680   } else
2681     return opInexact;
2682 }
2683 
2684 /* Same as convertToSignExtendedInteger, except we provide
2685    deterministic values in case of an invalid operation exception,
2686    namely zero for NaNs and the minimal or maximal value respectively
2687    for underflow or overflow.
2688    The *isExact output tells whether the result is exact, in the sense
2689    that converting it back to the original floating point type produces
2690    the original value.  This is almost equivalent to result==opOK,
2691    except for negative zeroes.
2692 */
2693 IEEEFloat::opStatus
2694 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2695                             unsigned int width, bool isSigned,
2696                             roundingMode rounding_mode, bool *isExact) const {
2697   opStatus fs;
2698 
2699   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2700                                     isExact);
2701 
2702   if (fs == opInvalidOp) {
2703     unsigned int bits, dstPartsCount;
2704 
2705     dstPartsCount = partCountForBits(width);
2706     assert(dstPartsCount <= parts.size() && "Integer too big");
2707 
2708     if (category == fcNaN)
2709       bits = 0;
2710     else if (sign)
2711       bits = isSigned;
2712     else
2713       bits = width - isSigned;
2714 
2715     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2716     if (sign && isSigned)
2717       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2718   }
2719 
2720   return fs;
2721 }
2722 
2723 /* Convert an unsigned integer SRC to a floating point number,
2724    rounding according to ROUNDING_MODE.  The sign of the floating
2725    point number is not modified.  */
2726 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2727     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2728   unsigned int omsb, precision, dstCount;
2729   integerPart *dst;
2730   lostFraction lost_fraction;
2731 
2732   category = fcNormal;
2733   omsb = APInt::tcMSB(src, srcCount) + 1;
2734   dst = significandParts();
2735   dstCount = partCount();
2736   precision = semantics->precision;
2737 
2738   /* We want the most significant PRECISION bits of SRC.  There may not
2739      be that many; extract what we can.  */
2740   if (precision <= omsb) {
2741     exponent = omsb - 1;
2742     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2743                                                   omsb - precision);
2744     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2745   } else {
2746     exponent = precision - 1;
2747     lost_fraction = lfExactlyZero;
2748     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2749   }
2750 
2751   return normalize(rounding_mode, lost_fraction);
2752 }
2753 
2754 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2755                                                 roundingMode rounding_mode) {
2756   unsigned int partCount = Val.getNumWords();
2757   APInt api = Val;
2758 
2759   sign = false;
2760   if (isSigned && api.isNegative()) {
2761     sign = true;
2762     api = -api;
2763   }
2764 
2765   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2766 }
2767 
2768 /* Convert a two's complement integer SRC to a floating point number,
2769    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2770    integer is signed, in which case it must be sign-extended.  */
2771 IEEEFloat::opStatus
2772 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2773                                           unsigned int srcCount, bool isSigned,
2774                                           roundingMode rounding_mode) {
2775   opStatus status;
2776 
2777   if (isSigned &&
2778       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2779     integerPart *copy;
2780 
2781     /* If we're signed and negative negate a copy.  */
2782     sign = true;
2783     copy = new integerPart[srcCount];
2784     APInt::tcAssign(copy, src, srcCount);
2785     APInt::tcNegate(copy, srcCount);
2786     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2787     delete [] copy;
2788   } else {
2789     sign = false;
2790     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2791   }
2792 
2793   return status;
2794 }
2795 
2796 /* FIXME: should this just take a const APInt reference?  */
2797 IEEEFloat::opStatus
2798 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2799                                           unsigned int width, bool isSigned,
2800                                           roundingMode rounding_mode) {
2801   unsigned int partCount = partCountForBits(width);
2802   APInt api = APInt(width, ArrayRef(parts, partCount));
2803 
2804   sign = false;
2805   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2806     sign = true;
2807     api = -api;
2808   }
2809 
2810   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2811 }
2812 
2813 Expected<IEEEFloat::opStatus>
2814 IEEEFloat::convertFromHexadecimalString(StringRef s,
2815                                         roundingMode rounding_mode) {
2816   lostFraction lost_fraction = lfExactlyZero;
2817 
2818   category = fcNormal;
2819   zeroSignificand();
2820   exponent = 0;
2821 
2822   integerPart *significand = significandParts();
2823   unsigned partsCount = partCount();
2824   unsigned bitPos = partsCount * integerPartWidth;
2825   bool computedTrailingFraction = false;
2826 
2827   // Skip leading zeroes and any (hexa)decimal point.
2828   StringRef::iterator begin = s.begin();
2829   StringRef::iterator end = s.end();
2830   StringRef::iterator dot;
2831   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2832   if (!PtrOrErr)
2833     return PtrOrErr.takeError();
2834   StringRef::iterator p = *PtrOrErr;
2835   StringRef::iterator firstSignificantDigit = p;
2836 
2837   while (p != end) {
2838     integerPart hex_value;
2839 
2840     if (*p == '.') {
2841       if (dot != end)
2842         return createError("String contains multiple dots");
2843       dot = p++;
2844       continue;
2845     }
2846 
2847     hex_value = hexDigitValue(*p);
2848     if (hex_value == UINT_MAX)
2849       break;
2850 
2851     p++;
2852 
2853     // Store the number while we have space.
2854     if (bitPos) {
2855       bitPos -= 4;
2856       hex_value <<= bitPos % integerPartWidth;
2857       significand[bitPos / integerPartWidth] |= hex_value;
2858     } else if (!computedTrailingFraction) {
2859       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2860       if (!FractOrErr)
2861         return FractOrErr.takeError();
2862       lost_fraction = *FractOrErr;
2863       computedTrailingFraction = true;
2864     }
2865   }
2866 
2867   /* Hex floats require an exponent but not a hexadecimal point.  */
2868   if (p == end)
2869     return createError("Hex strings require an exponent");
2870   if (*p != 'p' && *p != 'P')
2871     return createError("Invalid character in significand");
2872   if (p == begin)
2873     return createError("Significand has no digits");
2874   if (dot != end && p - begin == 1)
2875     return createError("Significand has no digits");
2876 
2877   /* Ignore the exponent if we are zero.  */
2878   if (p != firstSignificantDigit) {
2879     int expAdjustment;
2880 
2881     /* Implicit hexadecimal point?  */
2882     if (dot == end)
2883       dot = p;
2884 
2885     /* Calculate the exponent adjustment implicit in the number of
2886        significant digits.  */
2887     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2888     if (expAdjustment < 0)
2889       expAdjustment++;
2890     expAdjustment = expAdjustment * 4 - 1;
2891 
2892     /* Adjust for writing the significand starting at the most
2893        significant nibble.  */
2894     expAdjustment += semantics->precision;
2895     expAdjustment -= partsCount * integerPartWidth;
2896 
2897     /* Adjust for the given exponent.  */
2898     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2899     if (!ExpOrErr)
2900       return ExpOrErr.takeError();
2901     exponent = *ExpOrErr;
2902   }
2903 
2904   return normalize(rounding_mode, lost_fraction);
2905 }
2906 
2907 IEEEFloat::opStatus
2908 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2909                                         unsigned sigPartCount, int exp,
2910                                         roundingMode rounding_mode) {
2911   unsigned int parts, pow5PartCount;
2912   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2913   integerPart pow5Parts[maxPowerOfFiveParts];
2914   bool isNearest;
2915 
2916   isNearest = (rounding_mode == rmNearestTiesToEven ||
2917                rounding_mode == rmNearestTiesToAway);
2918 
2919   parts = partCountForBits(semantics->precision + 11);
2920 
2921   /* Calculate pow(5, abs(exp)).  */
2922   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2923 
2924   for (;; parts *= 2) {
2925     opStatus sigStatus, powStatus;
2926     unsigned int excessPrecision, truncatedBits;
2927 
2928     calcSemantics.precision = parts * integerPartWidth - 1;
2929     excessPrecision = calcSemantics.precision - semantics->precision;
2930     truncatedBits = excessPrecision;
2931 
2932     IEEEFloat decSig(calcSemantics, uninitialized);
2933     decSig.makeZero(sign);
2934     IEEEFloat pow5(calcSemantics);
2935 
2936     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2937                                                 rmNearestTiesToEven);
2938     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2939                                               rmNearestTiesToEven);
2940     /* Add exp, as 10^n = 5^n * 2^n.  */
2941     decSig.exponent += exp;
2942 
2943     lostFraction calcLostFraction;
2944     integerPart HUerr, HUdistance;
2945     unsigned int powHUerr;
2946 
2947     if (exp >= 0) {
2948       /* multiplySignificand leaves the precision-th bit set to 1.  */
2949       calcLostFraction = decSig.multiplySignificand(pow5);
2950       powHUerr = powStatus != opOK;
2951     } else {
2952       calcLostFraction = decSig.divideSignificand(pow5);
2953       /* Denormal numbers have less precision.  */
2954       if (decSig.exponent < semantics->minExponent) {
2955         excessPrecision += (semantics->minExponent - decSig.exponent);
2956         truncatedBits = excessPrecision;
2957         if (excessPrecision > calcSemantics.precision)
2958           excessPrecision = calcSemantics.precision;
2959       }
2960       /* Extra half-ulp lost in reciprocal of exponent.  */
2961       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2962     }
2963 
2964     /* Both multiplySignificand and divideSignificand return the
2965        result with the integer bit set.  */
2966     assert(APInt::tcExtractBit
2967            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2968 
2969     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2970                        powHUerr);
2971     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2972                                       excessPrecision, isNearest);
2973 
2974     /* Are we guaranteed to round correctly if we truncate?  */
2975     if (HUdistance >= HUerr) {
2976       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2977                        calcSemantics.precision - excessPrecision,
2978                        excessPrecision);
2979       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2980          above we must adjust our exponent to compensate for the
2981          implicit right shift.  */
2982       exponent = (decSig.exponent + semantics->precision
2983                   - (calcSemantics.precision - excessPrecision));
2984       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2985                                                        decSig.partCount(),
2986                                                        truncatedBits);
2987       return normalize(rounding_mode, calcLostFraction);
2988     }
2989   }
2990 }
2991 
2992 Expected<IEEEFloat::opStatus>
2993 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2994   decimalInfo D;
2995   opStatus fs;
2996 
2997   /* Scan the text.  */
2998   StringRef::iterator p = str.begin();
2999   if (Error Err = interpretDecimal(p, str.end(), &D))
3000     return std::move(Err);
3001 
3002   /* Handle the quick cases.  First the case of no significant digits,
3003      i.e. zero, and then exponents that are obviously too large or too
3004      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3005      definitely overflows if
3006 
3007            (exp - 1) * L >= maxExponent
3008 
3009      and definitely underflows to zero where
3010 
3011            (exp + 1) * L <= minExponent - precision
3012 
3013      With integer arithmetic the tightest bounds for L are
3014 
3015            93/28 < L < 196/59            [ numerator <= 256 ]
3016            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3017   */
3018 
3019   // Test if we have a zero number allowing for strings with no null terminators
3020   // and zero decimals with non-zero exponents.
3021   //
3022   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3023   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3024   // be at most one dot. On the other hand, if we have a zero with a non-zero
3025   // exponent, then we know that D.firstSigDigit will be non-numeric.
3026   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3027     category = fcZero;
3028     fs = opOK;
3029     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3030       sign = false;
3031 
3032     /* Check whether the normalized exponent is high enough to overflow
3033        max during the log-rebasing in the max-exponent check below. */
3034   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3035     fs = handleOverflow(rounding_mode);
3036 
3037   /* If it wasn't, then it also wasn't high enough to overflow max
3038      during the log-rebasing in the min-exponent check.  Check that it
3039      won't overflow min in either check, then perform the min-exponent
3040      check. */
3041   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3042              (D.normalizedExponent + 1) * 28738 <=
3043                8651 * (semantics->minExponent - (int) semantics->precision)) {
3044     /* Underflow to zero and round.  */
3045     category = fcNormal;
3046     zeroSignificand();
3047     fs = normalize(rounding_mode, lfLessThanHalf);
3048 
3049   /* We can finally safely perform the max-exponent check. */
3050   } else if ((D.normalizedExponent - 1) * 42039
3051              >= 12655 * semantics->maxExponent) {
3052     /* Overflow and round.  */
3053     fs = handleOverflow(rounding_mode);
3054   } else {
3055     integerPart *decSignificand;
3056     unsigned int partCount;
3057 
3058     /* A tight upper bound on number of bits required to hold an
3059        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3060        to hold the full significand, and an extra part required by
3061        tcMultiplyPart.  */
3062     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3063     partCount = partCountForBits(1 + 196 * partCount / 59);
3064     decSignificand = new integerPart[partCount + 1];
3065     partCount = 0;
3066 
3067     /* Convert to binary efficiently - we do almost all multiplication
3068        in an integerPart.  When this would overflow do we do a single
3069        bignum multiplication, and then revert again to multiplication
3070        in an integerPart.  */
3071     do {
3072       integerPart decValue, val, multiplier;
3073 
3074       val = 0;
3075       multiplier = 1;
3076 
3077       do {
3078         if (*p == '.') {
3079           p++;
3080           if (p == str.end()) {
3081             break;
3082           }
3083         }
3084         decValue = decDigitValue(*p++);
3085         if (decValue >= 10U) {
3086           delete[] decSignificand;
3087           return createError("Invalid character in significand");
3088         }
3089         multiplier *= 10;
3090         val = val * 10 + decValue;
3091         /* The maximum number that can be multiplied by ten with any
3092            digit added without overflowing an integerPart.  */
3093       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3094 
3095       /* Multiply out the current part.  */
3096       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3097                             partCount, partCount + 1, false);
3098 
3099       /* If we used another part (likely but not guaranteed), increase
3100          the count.  */
3101       if (decSignificand[partCount])
3102         partCount++;
3103     } while (p <= D.lastSigDigit);
3104 
3105     category = fcNormal;
3106     fs = roundSignificandWithExponent(decSignificand, partCount,
3107                                       D.exponent, rounding_mode);
3108 
3109     delete [] decSignificand;
3110   }
3111 
3112   return fs;
3113 }
3114 
3115 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3116   const size_t MIN_NAME_SIZE = 3;
3117 
3118   if (str.size() < MIN_NAME_SIZE)
3119     return false;
3120 
3121   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3122     makeInf(false);
3123     return true;
3124   }
3125 
3126   bool IsNegative = str.front() == '-';
3127   if (IsNegative) {
3128     str = str.drop_front();
3129     if (str.size() < MIN_NAME_SIZE)
3130       return false;
3131 
3132     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3133       makeInf(true);
3134       return true;
3135     }
3136   }
3137 
3138   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3139   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3140   if (IsSignaling) {
3141     str = str.drop_front();
3142     if (str.size() < MIN_NAME_SIZE)
3143       return false;
3144   }
3145 
3146   if (str.starts_with("nan") || str.starts_with("NaN")) {
3147     str = str.drop_front(3);
3148 
3149     // A NaN without payload.
3150     if (str.empty()) {
3151       makeNaN(IsSignaling, IsNegative);
3152       return true;
3153     }
3154 
3155     // Allow the payload to be inside parentheses.
3156     if (str.front() == '(') {
3157       // Parentheses should be balanced (and not empty).
3158       if (str.size() <= 2 || str.back() != ')')
3159         return false;
3160 
3161       str = str.slice(1, str.size() - 1);
3162     }
3163 
3164     // Determine the payload number's radix.
3165     unsigned Radix = 10;
3166     if (str[0] == '0') {
3167       if (str.size() > 1 && tolower(str[1]) == 'x') {
3168         str = str.drop_front(2);
3169         Radix = 16;
3170       } else
3171         Radix = 8;
3172     }
3173 
3174     // Parse the payload and make the NaN.
3175     APInt Payload;
3176     if (!str.getAsInteger(Radix, Payload)) {
3177       makeNaN(IsSignaling, IsNegative, &Payload);
3178       return true;
3179     }
3180   }
3181 
3182   return false;
3183 }
3184 
3185 Expected<IEEEFloat::opStatus>
3186 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3187   if (str.empty())
3188     return createError("Invalid string length");
3189 
3190   // Handle special cases.
3191   if (convertFromStringSpecials(str))
3192     return opOK;
3193 
3194   /* Handle a leading minus sign.  */
3195   StringRef::iterator p = str.begin();
3196   size_t slen = str.size();
3197   sign = *p == '-' ? 1 : 0;
3198   if (*p == '-' || *p == '+') {
3199     p++;
3200     slen--;
3201     if (!slen)
3202       return createError("String has no digits");
3203   }
3204 
3205   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3206     if (slen == 2)
3207       return createError("Invalid string");
3208     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3209                                         rounding_mode);
3210   }
3211 
3212   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3213 }
3214 
3215 /* Write out a hexadecimal representation of the floating point value
3216    to DST, which must be of sufficient size, in the C99 form
3217    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3218    excluding the terminating NUL.
3219 
3220    If UPPERCASE, the output is in upper case, otherwise in lower case.
3221 
3222    HEXDIGITS digits appear altogether, rounding the value if
3223    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3224    number precisely is used instead.  If nothing would appear after
3225    the decimal point it is suppressed.
3226 
3227    The decimal exponent is always printed and has at least one digit.
3228    Zero values display an exponent of zero.  Infinities and NaNs
3229    appear as "infinity" or "nan" respectively.
3230 
3231    The above rules are as specified by C99.  There is ambiguity about
3232    what the leading hexadecimal digit should be.  This implementation
3233    uses whatever is necessary so that the exponent is displayed as
3234    stored.  This implies the exponent will fall within the IEEE format
3235    range, and the leading hexadecimal digit will be 0 (for denormals),
3236    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3237    any other digits zero).
3238 */
3239 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3240                                            bool upperCase,
3241                                            roundingMode rounding_mode) const {
3242   char *p;
3243 
3244   p = dst;
3245   if (sign)
3246     *dst++ = '-';
3247 
3248   switch (category) {
3249   case fcInfinity:
3250     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3251     dst += sizeof infinityL - 1;
3252     break;
3253 
3254   case fcNaN:
3255     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3256     dst += sizeof NaNU - 1;
3257     break;
3258 
3259   case fcZero:
3260     *dst++ = '0';
3261     *dst++ = upperCase ? 'X': 'x';
3262     *dst++ = '0';
3263     if (hexDigits > 1) {
3264       *dst++ = '.';
3265       memset (dst, '0', hexDigits - 1);
3266       dst += hexDigits - 1;
3267     }
3268     *dst++ = upperCase ? 'P': 'p';
3269     *dst++ = '0';
3270     break;
3271 
3272   case fcNormal:
3273     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3274     break;
3275   }
3276 
3277   *dst = 0;
3278 
3279   return static_cast<unsigned int>(dst - p);
3280 }
3281 
3282 /* Does the hard work of outputting the correctly rounded hexadecimal
3283    form of a normal floating point number with the specified number of
3284    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3285    digits necessary to print the value precisely is output.  */
3286 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3287                                           bool upperCase,
3288                                           roundingMode rounding_mode) const {
3289   unsigned int count, valueBits, shift, partsCount, outputDigits;
3290   const char *hexDigitChars;
3291   const integerPart *significand;
3292   char *p;
3293   bool roundUp;
3294 
3295   *dst++ = '0';
3296   *dst++ = upperCase ? 'X': 'x';
3297 
3298   roundUp = false;
3299   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3300 
3301   significand = significandParts();
3302   partsCount = partCount();
3303 
3304   /* +3 because the first digit only uses the single integer bit, so
3305      we have 3 virtual zero most-significant-bits.  */
3306   valueBits = semantics->precision + 3;
3307   shift = integerPartWidth - valueBits % integerPartWidth;
3308 
3309   /* The natural number of digits required ignoring trailing
3310      insignificant zeroes.  */
3311   outputDigits = (valueBits - significandLSB () + 3) / 4;
3312 
3313   /* hexDigits of zero means use the required number for the
3314      precision.  Otherwise, see if we are truncating.  If we are,
3315      find out if we need to round away from zero.  */
3316   if (hexDigits) {
3317     if (hexDigits < outputDigits) {
3318       /* We are dropping non-zero bits, so need to check how to round.
3319          "bits" is the number of dropped bits.  */
3320       unsigned int bits;
3321       lostFraction fraction;
3322 
3323       bits = valueBits - hexDigits * 4;
3324       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3325       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3326     }
3327     outputDigits = hexDigits;
3328   }
3329 
3330   /* Write the digits consecutively, and start writing in the location
3331      of the hexadecimal point.  We move the most significant digit
3332      left and add the hexadecimal point later.  */
3333   p = ++dst;
3334 
3335   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3336 
3337   while (outputDigits && count) {
3338     integerPart part;
3339 
3340     /* Put the most significant integerPartWidth bits in "part".  */
3341     if (--count == partsCount)
3342       part = 0;  /* An imaginary higher zero part.  */
3343     else
3344       part = significand[count] << shift;
3345 
3346     if (count && shift)
3347       part |= significand[count - 1] >> (integerPartWidth - shift);
3348 
3349     /* Convert as much of "part" to hexdigits as we can.  */
3350     unsigned int curDigits = integerPartWidth / 4;
3351 
3352     if (curDigits > outputDigits)
3353       curDigits = outputDigits;
3354     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3355     outputDigits -= curDigits;
3356   }
3357 
3358   if (roundUp) {
3359     char *q = dst;
3360 
3361     /* Note that hexDigitChars has a trailing '0'.  */
3362     do {
3363       q--;
3364       *q = hexDigitChars[hexDigitValue (*q) + 1];
3365     } while (*q == '0');
3366     assert(q >= p);
3367   } else {
3368     /* Add trailing zeroes.  */
3369     memset (dst, '0', outputDigits);
3370     dst += outputDigits;
3371   }
3372 
3373   /* Move the most significant digit to before the point, and if there
3374      is something after the decimal point add it.  This must come
3375      after rounding above.  */
3376   p[-1] = p[0];
3377   if (dst -1 == p)
3378     dst--;
3379   else
3380     p[0] = '.';
3381 
3382   /* Finally output the exponent.  */
3383   *dst++ = upperCase ? 'P': 'p';
3384 
3385   return writeSignedDecimal (dst, exponent);
3386 }
3387 
3388 hash_code hash_value(const IEEEFloat &Arg) {
3389   if (!Arg.isFiniteNonZero())
3390     return hash_combine((uint8_t)Arg.category,
3391                         // NaN has no sign, fix it at zero.
3392                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3393                         Arg.semantics->precision);
3394 
3395   // Normal floats need their exponent and significand hashed.
3396   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3397                       Arg.semantics->precision, Arg.exponent,
3398                       hash_combine_range(
3399                         Arg.significandParts(),
3400                         Arg.significandParts() + Arg.partCount()));
3401 }
3402 
3403 // Conversion from APFloat to/from host float/double.  It may eventually be
3404 // possible to eliminate these and have everybody deal with APFloats, but that
3405 // will take a while.  This approach will not easily extend to long double.
3406 // Current implementation requires integerPartWidth==64, which is correct at
3407 // the moment but could be made more general.
3408 
3409 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3410 // the actual IEEE respresentations.  We compensate for that here.
3411 
3412 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3413   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3414   assert(partCount()==2);
3415 
3416   uint64_t myexponent, mysignificand;
3417 
3418   if (isFiniteNonZero()) {
3419     myexponent = exponent+16383; //bias
3420     mysignificand = significandParts()[0];
3421     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3422       myexponent = 0;   // denormal
3423   } else if (category==fcZero) {
3424     myexponent = 0;
3425     mysignificand = 0;
3426   } else if (category==fcInfinity) {
3427     myexponent = 0x7fff;
3428     mysignificand = 0x8000000000000000ULL;
3429   } else {
3430     assert(category == fcNaN && "Unknown category");
3431     myexponent = 0x7fff;
3432     mysignificand = significandParts()[0];
3433   }
3434 
3435   uint64_t words[2];
3436   words[0] = mysignificand;
3437   words[1] =  ((uint64_t)(sign & 1) << 15) |
3438               (myexponent & 0x7fffLL);
3439   return APInt(80, words);
3440 }
3441 
3442 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3443   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3444   assert(partCount()==2);
3445 
3446   uint64_t words[2];
3447   opStatus fs;
3448   bool losesInfo;
3449 
3450   // Convert number to double.  To avoid spurious underflows, we re-
3451   // normalize against the "double" minExponent first, and only *then*
3452   // truncate the mantissa.  The result of that second conversion
3453   // may be inexact, but should never underflow.
3454   // Declare fltSemantics before APFloat that uses it (and
3455   // saves pointer to it) to ensure correct destruction order.
3456   fltSemantics extendedSemantics = *semantics;
3457   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3458   IEEEFloat extended(*this);
3459   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3460   assert(fs == opOK && !losesInfo);
3461   (void)fs;
3462 
3463   IEEEFloat u(extended);
3464   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3465   assert(fs == opOK || fs == opInexact);
3466   (void)fs;
3467   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3468 
3469   // If conversion was exact or resulted in a special case, we're done;
3470   // just set the second double to zero.  Otherwise, re-convert back to
3471   // the extended format and compute the difference.  This now should
3472   // convert exactly to double.
3473   if (u.isFiniteNonZero() && losesInfo) {
3474     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3475     assert(fs == opOK && !losesInfo);
3476     (void)fs;
3477 
3478     IEEEFloat v(extended);
3479     v.subtract(u, rmNearestTiesToEven);
3480     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3481     assert(fs == opOK && !losesInfo);
3482     (void)fs;
3483     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3484   } else {
3485     words[1] = 0;
3486   }
3487 
3488   return APInt(128, words);
3489 }
3490 
3491 template <const fltSemantics &S>
3492 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3493   assert(semantics == &S);
3494 
3495   constexpr int bias = -(S.minExponent - 1);
3496   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3497   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3498   constexpr integerPart integer_bit =
3499       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3500   constexpr uint64_t significand_mask = integer_bit - 1;
3501   constexpr unsigned int exponent_bits =
3502       S.sizeInBits - 1 - trailing_significand_bits;
3503   static_assert(exponent_bits < 64);
3504   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3505 
3506   uint64_t myexponent;
3507   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3508       mysignificand;
3509 
3510   if (isFiniteNonZero()) {
3511     myexponent = exponent + bias;
3512     std::copy_n(significandParts(), mysignificand.size(),
3513                 mysignificand.begin());
3514     if (myexponent == 1 &&
3515         !(significandParts()[integer_bit_part] & integer_bit))
3516       myexponent = 0; // denormal
3517   } else if (category == fcZero) {
3518     myexponent = ::exponentZero(S) + bias;
3519     mysignificand.fill(0);
3520   } else if (category == fcInfinity) {
3521     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3522       llvm_unreachable("semantics don't support inf!");
3523     }
3524     myexponent = ::exponentInf(S) + bias;
3525     mysignificand.fill(0);
3526   } else {
3527     assert(category == fcNaN && "Unknown category!");
3528     myexponent = ::exponentNaN(S) + bias;
3529     std::copy_n(significandParts(), mysignificand.size(),
3530                 mysignificand.begin());
3531   }
3532   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3533   auto words_iter =
3534       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3535   if constexpr (significand_mask != 0) {
3536     // Clear the integer bit.
3537     words[mysignificand.size() - 1] &= significand_mask;
3538   }
3539   std::fill(words_iter, words.end(), uint64_t{0});
3540   constexpr size_t last_word = words.size() - 1;
3541   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3542                           << ((S.sizeInBits - 1) % 64);
3543   words[last_word] |= shifted_sign;
3544   uint64_t shifted_exponent = (myexponent & exponent_mask)
3545                               << (trailing_significand_bits % 64);
3546   words[last_word] |= shifted_exponent;
3547   if constexpr (last_word == 0) {
3548     return APInt(S.sizeInBits, words[0]);
3549   }
3550   return APInt(S.sizeInBits, words);
3551 }
3552 
3553 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3554   assert(partCount() == 2);
3555   return convertIEEEFloatToAPInt<semIEEEquad>();
3556 }
3557 
3558 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3559   assert(partCount()==1);
3560   return convertIEEEFloatToAPInt<semIEEEdouble>();
3561 }
3562 
3563 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3564   assert(partCount()==1);
3565   return convertIEEEFloatToAPInt<semIEEEsingle>();
3566 }
3567 
3568 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3569   assert(partCount() == 1);
3570   return convertIEEEFloatToAPInt<semBFloat>();
3571 }
3572 
3573 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3574   assert(partCount()==1);
3575   return convertIEEEFloatToAPInt<semIEEEhalf>();
3576 }
3577 
3578 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3579   assert(partCount() == 1);
3580   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3581 }
3582 
3583 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3584   assert(partCount() == 1);
3585   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3586 }
3587 
3588 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3589   assert(partCount() == 1);
3590   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3591 }
3592 
3593 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3594   assert(partCount() == 1);
3595   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3596 }
3597 
3598 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3599   assert(partCount() == 1);
3600   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3601 }
3602 
3603 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3604   assert(partCount() == 1);
3605   return convertIEEEFloatToAPInt<semFloatTF32>();
3606 }
3607 
3608 // This function creates an APInt that is just a bit map of the floating
3609 // point constant as it would appear in memory.  It is not a conversion,
3610 // and treating the result as a normal integer is unlikely to be useful.
3611 
3612 APInt IEEEFloat::bitcastToAPInt() const {
3613   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3614     return convertHalfAPFloatToAPInt();
3615 
3616   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3617     return convertBFloatAPFloatToAPInt();
3618 
3619   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3620     return convertFloatAPFloatToAPInt();
3621 
3622   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3623     return convertDoubleAPFloatToAPInt();
3624 
3625   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3626     return convertQuadrupleAPFloatToAPInt();
3627 
3628   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3629     return convertPPCDoubleDoubleAPFloatToAPInt();
3630 
3631   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3632     return convertFloat8E5M2APFloatToAPInt();
3633 
3634   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3635     return convertFloat8E5M2FNUZAPFloatToAPInt();
3636 
3637   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3638     return convertFloat8E4M3FNAPFloatToAPInt();
3639 
3640   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3641     return convertFloat8E4M3FNUZAPFloatToAPInt();
3642 
3643   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3644     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3645 
3646   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3647     return convertFloatTF32APFloatToAPInt();
3648 
3649   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3650          "unknown format!");
3651   return convertF80LongDoubleAPFloatToAPInt();
3652 }
3653 
3654 float IEEEFloat::convertToFloat() const {
3655   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3656          "Float semantics are not IEEEsingle");
3657   APInt api = bitcastToAPInt();
3658   return api.bitsToFloat();
3659 }
3660 
3661 double IEEEFloat::convertToDouble() const {
3662   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3663          "Float semantics are not IEEEdouble");
3664   APInt api = bitcastToAPInt();
3665   return api.bitsToDouble();
3666 }
3667 
3668 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3669 /// does not support these bit patterns:
3670 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3671 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3672 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3673 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3674 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3675 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3676   uint64_t i1 = api.getRawData()[0];
3677   uint64_t i2 = api.getRawData()[1];
3678   uint64_t myexponent = (i2 & 0x7fff);
3679   uint64_t mysignificand = i1;
3680   uint8_t myintegerbit = mysignificand >> 63;
3681 
3682   initialize(&semX87DoubleExtended);
3683   assert(partCount()==2);
3684 
3685   sign = static_cast<unsigned int>(i2>>15);
3686   if (myexponent == 0 && mysignificand == 0) {
3687     makeZero(sign);
3688   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3689     makeInf(sign);
3690   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3691              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3692     category = fcNaN;
3693     exponent = exponentNaN();
3694     significandParts()[0] = mysignificand;
3695     significandParts()[1] = 0;
3696   } else {
3697     category = fcNormal;
3698     exponent = myexponent - 16383;
3699     significandParts()[0] = mysignificand;
3700     significandParts()[1] = 0;
3701     if (myexponent==0)          // denormal
3702       exponent = -16382;
3703   }
3704 }
3705 
3706 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3707   uint64_t i1 = api.getRawData()[0];
3708   uint64_t i2 = api.getRawData()[1];
3709   opStatus fs;
3710   bool losesInfo;
3711 
3712   // Get the first double and convert to our format.
3713   initFromDoubleAPInt(APInt(64, i1));
3714   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3715   assert(fs == opOK && !losesInfo);
3716   (void)fs;
3717 
3718   // Unless we have a special case, add in second double.
3719   if (isFiniteNonZero()) {
3720     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3721     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3722     assert(fs == opOK && !losesInfo);
3723     (void)fs;
3724 
3725     add(v, rmNearestTiesToEven);
3726   }
3727 }
3728 
3729 template <const fltSemantics &S>
3730 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3731   assert(api.getBitWidth() == S.sizeInBits);
3732   constexpr integerPart integer_bit = integerPart{1}
3733                                       << ((S.precision - 1) % integerPartWidth);
3734   constexpr uint64_t significand_mask = integer_bit - 1;
3735   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3736   constexpr unsigned int stored_significand_parts =
3737       partCountForBits(trailing_significand_bits);
3738   constexpr unsigned int exponent_bits =
3739       S.sizeInBits - 1 - trailing_significand_bits;
3740   static_assert(exponent_bits < 64);
3741   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3742   constexpr int bias = -(S.minExponent - 1);
3743 
3744   // Copy the bits of the significand. We need to clear out the exponent and
3745   // sign bit in the last word.
3746   std::array<integerPart, stored_significand_parts> mysignificand;
3747   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3748   if constexpr (significand_mask != 0) {
3749     mysignificand[mysignificand.size() - 1] &= significand_mask;
3750   }
3751 
3752   // We assume the last word holds the sign bit, the exponent, and potentially
3753   // some of the trailing significand field.
3754   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3755   uint64_t myexponent =
3756       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3757 
3758   initialize(&S);
3759   assert(partCount() == mysignificand.size());
3760 
3761   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3762 
3763   bool all_zero_significand =
3764       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3765 
3766   bool is_zero = myexponent == 0 && all_zero_significand;
3767 
3768   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3769     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3770       makeInf(sign);
3771       return;
3772     }
3773   }
3774 
3775   bool is_nan = false;
3776 
3777   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3778     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3779   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3780     bool all_ones_significand =
3781         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3782                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3783         (!significand_mask ||
3784          mysignificand[mysignificand.size() - 1] == significand_mask);
3785     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3786   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3787     is_nan = is_zero && sign;
3788   }
3789 
3790   if (is_nan) {
3791     category = fcNaN;
3792     exponent = ::exponentNaN(S);
3793     std::copy_n(mysignificand.begin(), mysignificand.size(),
3794                 significandParts());
3795     return;
3796   }
3797 
3798   if (is_zero) {
3799     makeZero(sign);
3800     return;
3801   }
3802 
3803   category = fcNormal;
3804   exponent = myexponent - bias;
3805   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3806   if (myexponent == 0) // denormal
3807     exponent = S.minExponent;
3808   else
3809     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3810 }
3811 
3812 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3813   initFromIEEEAPInt<semIEEEquad>(api);
3814 }
3815 
3816 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3817   initFromIEEEAPInt<semIEEEdouble>(api);
3818 }
3819 
3820 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3821   initFromIEEEAPInt<semIEEEsingle>(api);
3822 }
3823 
3824 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3825   initFromIEEEAPInt<semBFloat>(api);
3826 }
3827 
3828 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3829   initFromIEEEAPInt<semIEEEhalf>(api);
3830 }
3831 
3832 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3833   initFromIEEEAPInt<semFloat8E5M2>(api);
3834 }
3835 
3836 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3837   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3838 }
3839 
3840 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3841   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3842 }
3843 
3844 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3845   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3846 }
3847 
3848 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3849   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3850 }
3851 
3852 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3853   initFromIEEEAPInt<semFloatTF32>(api);
3854 }
3855 
3856 /// Treat api as containing the bits of a floating point number.
3857 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3858   assert(api.getBitWidth() == Sem->sizeInBits);
3859   if (Sem == &semIEEEhalf)
3860     return initFromHalfAPInt(api);
3861   if (Sem == &semBFloat)
3862     return initFromBFloatAPInt(api);
3863   if (Sem == &semIEEEsingle)
3864     return initFromFloatAPInt(api);
3865   if (Sem == &semIEEEdouble)
3866     return initFromDoubleAPInt(api);
3867   if (Sem == &semX87DoubleExtended)
3868     return initFromF80LongDoubleAPInt(api);
3869   if (Sem == &semIEEEquad)
3870     return initFromQuadrupleAPInt(api);
3871   if (Sem == &semPPCDoubleDoubleLegacy)
3872     return initFromPPCDoubleDoubleAPInt(api);
3873   if (Sem == &semFloat8E5M2)
3874     return initFromFloat8E5M2APInt(api);
3875   if (Sem == &semFloat8E5M2FNUZ)
3876     return initFromFloat8E5M2FNUZAPInt(api);
3877   if (Sem == &semFloat8E4M3FN)
3878     return initFromFloat8E4M3FNAPInt(api);
3879   if (Sem == &semFloat8E4M3FNUZ)
3880     return initFromFloat8E4M3FNUZAPInt(api);
3881   if (Sem == &semFloat8E4M3B11FNUZ)
3882     return initFromFloat8E4M3B11FNUZAPInt(api);
3883   if (Sem == &semFloatTF32)
3884     return initFromFloatTF32APInt(api);
3885 
3886   llvm_unreachable(nullptr);
3887 }
3888 
3889 /// Make this number the largest magnitude normal number in the given
3890 /// semantics.
3891 void IEEEFloat::makeLargest(bool Negative) {
3892   // We want (in interchange format):
3893   //   sign = {Negative}
3894   //   exponent = 1..10
3895   //   significand = 1..1
3896   category = fcNormal;
3897   sign = Negative;
3898   exponent = semantics->maxExponent;
3899 
3900   // Use memset to set all but the highest integerPart to all ones.
3901   integerPart *significand = significandParts();
3902   unsigned PartCount = partCount();
3903   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3904 
3905   // Set the high integerPart especially setting all unused top bits for
3906   // internal consistency.
3907   const unsigned NumUnusedHighBits =
3908     PartCount*integerPartWidth - semantics->precision;
3909   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3910                                    ? (~integerPart(0) >> NumUnusedHighBits)
3911                                    : 0;
3912 
3913   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3914       semantics->nanEncoding == fltNanEncoding::AllOnes)
3915     significand[0] &= ~integerPart(1);
3916 }
3917 
3918 /// Make this number the smallest magnitude denormal number in the given
3919 /// semantics.
3920 void IEEEFloat::makeSmallest(bool Negative) {
3921   // We want (in interchange format):
3922   //   sign = {Negative}
3923   //   exponent = 0..0
3924   //   significand = 0..01
3925   category = fcNormal;
3926   sign = Negative;
3927   exponent = semantics->minExponent;
3928   APInt::tcSet(significandParts(), 1, partCount());
3929 }
3930 
3931 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3932   // We want (in interchange format):
3933   //   sign = {Negative}
3934   //   exponent = 0..0
3935   //   significand = 10..0
3936 
3937   category = fcNormal;
3938   zeroSignificand();
3939   sign = Negative;
3940   exponent = semantics->minExponent;
3941   APInt::tcSetBit(significandParts(), semantics->precision - 1);
3942 }
3943 
3944 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3945   initFromAPInt(&Sem, API);
3946 }
3947 
3948 IEEEFloat::IEEEFloat(float f) {
3949   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3950 }
3951 
3952 IEEEFloat::IEEEFloat(double d) {
3953   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3954 }
3955 
3956 namespace {
3957   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3958     Buffer.append(Str.begin(), Str.end());
3959   }
3960 
3961   /// Removes data from the given significand until it is no more
3962   /// precise than is required for the desired precision.
3963   void AdjustToPrecision(APInt &significand,
3964                          int &exp, unsigned FormatPrecision) {
3965     unsigned bits = significand.getActiveBits();
3966 
3967     // 196/59 is a very slight overestimate of lg_2(10).
3968     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3969 
3970     if (bits <= bitsRequired) return;
3971 
3972     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3973     if (!tensRemovable) return;
3974 
3975     exp += tensRemovable;
3976 
3977     APInt divisor(significand.getBitWidth(), 1);
3978     APInt powten(significand.getBitWidth(), 10);
3979     while (true) {
3980       if (tensRemovable & 1)
3981         divisor *= powten;
3982       tensRemovable >>= 1;
3983       if (!tensRemovable) break;
3984       powten *= powten;
3985     }
3986 
3987     significand = significand.udiv(divisor);
3988 
3989     // Truncate the significand down to its active bit count.
3990     significand = significand.trunc(significand.getActiveBits());
3991   }
3992 
3993 
3994   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3995                          int &exp, unsigned FormatPrecision) {
3996     unsigned N = buffer.size();
3997     if (N <= FormatPrecision) return;
3998 
3999     // The most significant figures are the last ones in the buffer.
4000     unsigned FirstSignificant = N - FormatPrecision;
4001 
4002     // Round.
4003     // FIXME: this probably shouldn't use 'round half up'.
4004 
4005     // Rounding down is just a truncation, except we also want to drop
4006     // trailing zeros from the new result.
4007     if (buffer[FirstSignificant - 1] < '5') {
4008       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4009         FirstSignificant++;
4010 
4011       exp += FirstSignificant;
4012       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4013       return;
4014     }
4015 
4016     // Rounding up requires a decimal add-with-carry.  If we continue
4017     // the carry, the newly-introduced zeros will just be truncated.
4018     for (unsigned I = FirstSignificant; I != N; ++I) {
4019       if (buffer[I] == '9') {
4020         FirstSignificant++;
4021       } else {
4022         buffer[I]++;
4023         break;
4024       }
4025     }
4026 
4027     // If we carried through, we have exactly one digit of precision.
4028     if (FirstSignificant == N) {
4029       exp += FirstSignificant;
4030       buffer.clear();
4031       buffer.push_back('1');
4032       return;
4033     }
4034 
4035     exp += FirstSignificant;
4036     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4037   }
4038 } // namespace
4039 
4040 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4041                          unsigned FormatMaxPadding, bool TruncateZero) const {
4042   switch (category) {
4043   case fcInfinity:
4044     if (isNegative())
4045       return append(Str, "-Inf");
4046     else
4047       return append(Str, "+Inf");
4048 
4049   case fcNaN: return append(Str, "NaN");
4050 
4051   case fcZero:
4052     if (isNegative())
4053       Str.push_back('-');
4054 
4055     if (!FormatMaxPadding) {
4056       if (TruncateZero)
4057         append(Str, "0.0E+0");
4058       else {
4059         append(Str, "0.0");
4060         if (FormatPrecision > 1)
4061           Str.append(FormatPrecision - 1, '0');
4062         append(Str, "e+00");
4063       }
4064     } else
4065       Str.push_back('0');
4066     return;
4067 
4068   case fcNormal:
4069     break;
4070   }
4071 
4072   if (isNegative())
4073     Str.push_back('-');
4074 
4075   // Decompose the number into an APInt and an exponent.
4076   int exp = exponent - ((int) semantics->precision - 1);
4077   APInt significand(
4078       semantics->precision,
4079       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4080 
4081   // Set FormatPrecision if zero.  We want to do this before we
4082   // truncate trailing zeros, as those are part of the precision.
4083   if (!FormatPrecision) {
4084     // We use enough digits so the number can be round-tripped back to an
4085     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4086     // Accurately" by Steele and White.
4087     // FIXME: Using a formula based purely on the precision is conservative;
4088     // we can print fewer digits depending on the actual value being printed.
4089 
4090     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4091     FormatPrecision = 2 + semantics->precision * 59 / 196;
4092   }
4093 
4094   // Ignore trailing binary zeros.
4095   int trailingZeros = significand.countr_zero();
4096   exp += trailingZeros;
4097   significand.lshrInPlace(trailingZeros);
4098 
4099   // Change the exponent from 2^e to 10^e.
4100   if (exp == 0) {
4101     // Nothing to do.
4102   } else if (exp > 0) {
4103     // Just shift left.
4104     significand = significand.zext(semantics->precision + exp);
4105     significand <<= exp;
4106     exp = 0;
4107   } else { /* exp < 0 */
4108     int texp = -exp;
4109 
4110     // We transform this using the identity:
4111     //   (N)(2^-e) == (N)(5^e)(10^-e)
4112     // This means we have to multiply N (the significand) by 5^e.
4113     // To avoid overflow, we have to operate on numbers large
4114     // enough to store N * 5^e:
4115     //   log2(N * 5^e) == log2(N) + e * log2(5)
4116     //                 <= semantics->precision + e * 137 / 59
4117     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4118 
4119     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4120 
4121     // Multiply significand by 5^e.
4122     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4123     significand = significand.zext(precision);
4124     APInt five_to_the_i(precision, 5);
4125     while (true) {
4126       if (texp & 1) significand *= five_to_the_i;
4127 
4128       texp >>= 1;
4129       if (!texp) break;
4130       five_to_the_i *= five_to_the_i;
4131     }
4132   }
4133 
4134   AdjustToPrecision(significand, exp, FormatPrecision);
4135 
4136   SmallVector<char, 256> buffer;
4137 
4138   // Fill the buffer.
4139   unsigned precision = significand.getBitWidth();
4140   if (precision < 4) {
4141     // We need enough precision to store the value 10.
4142     precision = 4;
4143     significand = significand.zext(precision);
4144   }
4145   APInt ten(precision, 10);
4146   APInt digit(precision, 0);
4147 
4148   bool inTrail = true;
4149   while (significand != 0) {
4150     // digit <- significand % 10
4151     // significand <- significand / 10
4152     APInt::udivrem(significand, ten, significand, digit);
4153 
4154     unsigned d = digit.getZExtValue();
4155 
4156     // Drop trailing zeros.
4157     if (inTrail && !d) exp++;
4158     else {
4159       buffer.push_back((char) ('0' + d));
4160       inTrail = false;
4161     }
4162   }
4163 
4164   assert(!buffer.empty() && "no characters in buffer!");
4165 
4166   // Drop down to FormatPrecision.
4167   // TODO: don't do more precise calculations above than are required.
4168   AdjustToPrecision(buffer, exp, FormatPrecision);
4169 
4170   unsigned NDigits = buffer.size();
4171 
4172   // Check whether we should use scientific notation.
4173   bool FormatScientific;
4174   if (!FormatMaxPadding)
4175     FormatScientific = true;
4176   else {
4177     if (exp >= 0) {
4178       // 765e3 --> 765000
4179       //              ^^^
4180       // But we shouldn't make the number look more precise than it is.
4181       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4182                           NDigits + (unsigned) exp > FormatPrecision);
4183     } else {
4184       // Power of the most significant digit.
4185       int MSD = exp + (int) (NDigits - 1);
4186       if (MSD >= 0) {
4187         // 765e-2 == 7.65
4188         FormatScientific = false;
4189       } else {
4190         // 765e-5 == 0.00765
4191         //           ^ ^^
4192         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4193       }
4194     }
4195   }
4196 
4197   // Scientific formatting is pretty straightforward.
4198   if (FormatScientific) {
4199     exp += (NDigits - 1);
4200 
4201     Str.push_back(buffer[NDigits-1]);
4202     Str.push_back('.');
4203     if (NDigits == 1 && TruncateZero)
4204       Str.push_back('0');
4205     else
4206       for (unsigned I = 1; I != NDigits; ++I)
4207         Str.push_back(buffer[NDigits-1-I]);
4208     // Fill with zeros up to FormatPrecision.
4209     if (!TruncateZero && FormatPrecision > NDigits - 1)
4210       Str.append(FormatPrecision - NDigits + 1, '0');
4211     // For !TruncateZero we use lower 'e'.
4212     Str.push_back(TruncateZero ? 'E' : 'e');
4213 
4214     Str.push_back(exp >= 0 ? '+' : '-');
4215     if (exp < 0) exp = -exp;
4216     SmallVector<char, 6> expbuf;
4217     do {
4218       expbuf.push_back((char) ('0' + (exp % 10)));
4219       exp /= 10;
4220     } while (exp);
4221     // Exponent always at least two digits if we do not truncate zeros.
4222     if (!TruncateZero && expbuf.size() < 2)
4223       expbuf.push_back('0');
4224     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4225       Str.push_back(expbuf[E-1-I]);
4226     return;
4227   }
4228 
4229   // Non-scientific, positive exponents.
4230   if (exp >= 0) {
4231     for (unsigned I = 0; I != NDigits; ++I)
4232       Str.push_back(buffer[NDigits-1-I]);
4233     for (unsigned I = 0; I != (unsigned) exp; ++I)
4234       Str.push_back('0');
4235     return;
4236   }
4237 
4238   // Non-scientific, negative exponents.
4239 
4240   // The number of digits to the left of the decimal point.
4241   int NWholeDigits = exp + (int) NDigits;
4242 
4243   unsigned I = 0;
4244   if (NWholeDigits > 0) {
4245     for (; I != (unsigned) NWholeDigits; ++I)
4246       Str.push_back(buffer[NDigits-I-1]);
4247     Str.push_back('.');
4248   } else {
4249     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4250 
4251     Str.push_back('0');
4252     Str.push_back('.');
4253     for (unsigned Z = 1; Z != NZeros; ++Z)
4254       Str.push_back('0');
4255   }
4256 
4257   for (; I != NDigits; ++I)
4258     Str.push_back(buffer[NDigits-I-1]);
4259 }
4260 
4261 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4262   // Special floats and denormals have no exact inverse.
4263   if (!isFiniteNonZero())
4264     return false;
4265 
4266   // Check that the number is a power of two by making sure that only the
4267   // integer bit is set in the significand.
4268   if (significandLSB() != semantics->precision - 1)
4269     return false;
4270 
4271   // Get the inverse.
4272   IEEEFloat reciprocal(*semantics, 1ULL);
4273   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4274     return false;
4275 
4276   // Avoid multiplication with a denormal, it is not safe on all platforms and
4277   // may be slower than a normal division.
4278   if (reciprocal.isDenormal())
4279     return false;
4280 
4281   assert(reciprocal.isFiniteNonZero() &&
4282          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4283 
4284   if (inv)
4285     *inv = APFloat(reciprocal, *semantics);
4286 
4287   return true;
4288 }
4289 
4290 int IEEEFloat::getExactLog2Abs() const {
4291   if (!isFinite() || isZero())
4292     return INT_MIN;
4293 
4294   const integerPart *Parts = significandParts();
4295   const int PartCount = partCountForBits(semantics->precision);
4296 
4297   int PopCount = 0;
4298   for (int i = 0; i < PartCount; ++i) {
4299     PopCount += llvm::popcount(Parts[i]);
4300     if (PopCount > 1)
4301       return INT_MIN;
4302   }
4303 
4304   if (exponent != semantics->minExponent)
4305     return exponent;
4306 
4307   int CountrParts = 0;
4308   for (int i = 0; i < PartCount;
4309        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4310     if (Parts[i] != 0) {
4311       return exponent - semantics->precision + CountrParts +
4312              llvm::countr_zero(Parts[i]) + 1;
4313     }
4314   }
4315 
4316   llvm_unreachable("didn't find the set bit");
4317 }
4318 
4319 bool IEEEFloat::isSignaling() const {
4320   if (!isNaN())
4321     return false;
4322   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4323     return false;
4324 
4325   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4326   // first bit of the trailing significand being 0.
4327   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4328 }
4329 
4330 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4331 ///
4332 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4333 /// appropriate sign switching before/after the computation.
4334 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4335   // If we are performing nextDown, swap sign so we have -x.
4336   if (nextDown)
4337     changeSign();
4338 
4339   // Compute nextUp(x)
4340   opStatus result = opOK;
4341 
4342   // Handle each float category separately.
4343   switch (category) {
4344   case fcInfinity:
4345     // nextUp(+inf) = +inf
4346     if (!isNegative())
4347       break;
4348     // nextUp(-inf) = -getLargest()
4349     makeLargest(true);
4350     break;
4351   case fcNaN:
4352     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4353     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4354     //                     change the payload.
4355     if (isSignaling()) {
4356       result = opInvalidOp;
4357       // For consistency, propagate the sign of the sNaN to the qNaN.
4358       makeNaN(false, isNegative(), nullptr);
4359     }
4360     break;
4361   case fcZero:
4362     // nextUp(pm 0) = +getSmallest()
4363     makeSmallest(false);
4364     break;
4365   case fcNormal:
4366     // nextUp(-getSmallest()) = -0
4367     if (isSmallest() && isNegative()) {
4368       APInt::tcSet(significandParts(), 0, partCount());
4369       category = fcZero;
4370       exponent = 0;
4371       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4372         sign = false;
4373       break;
4374     }
4375 
4376     if (isLargest() && !isNegative()) {
4377       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4378         // nextUp(getLargest()) == NAN
4379         makeNaN();
4380         break;
4381       } else {
4382         // nextUp(getLargest()) == INFINITY
4383         APInt::tcSet(significandParts(), 0, partCount());
4384         category = fcInfinity;
4385         exponent = semantics->maxExponent + 1;
4386         break;
4387       }
4388     }
4389 
4390     // nextUp(normal) == normal + inc.
4391     if (isNegative()) {
4392       // If we are negative, we need to decrement the significand.
4393 
4394       // We only cross a binade boundary that requires adjusting the exponent
4395       // if:
4396       //   1. exponent != semantics->minExponent. This implies we are not in the
4397       //   smallest binade or are dealing with denormals.
4398       //   2. Our significand excluding the integral bit is all zeros.
4399       bool WillCrossBinadeBoundary =
4400         exponent != semantics->minExponent && isSignificandAllZeros();
4401 
4402       // Decrement the significand.
4403       //
4404       // We always do this since:
4405       //   1. If we are dealing with a non-binade decrement, by definition we
4406       //   just decrement the significand.
4407       //   2. If we are dealing with a normal -> normal binade decrement, since
4408       //   we have an explicit integral bit the fact that all bits but the
4409       //   integral bit are zero implies that subtracting one will yield a
4410       //   significand with 0 integral bit and 1 in all other spots. Thus we
4411       //   must just adjust the exponent and set the integral bit to 1.
4412       //   3. If we are dealing with a normal -> denormal binade decrement,
4413       //   since we set the integral bit to 0 when we represent denormals, we
4414       //   just decrement the significand.
4415       integerPart *Parts = significandParts();
4416       APInt::tcDecrement(Parts, partCount());
4417 
4418       if (WillCrossBinadeBoundary) {
4419         // Our result is a normal number. Do the following:
4420         // 1. Set the integral bit to 1.
4421         // 2. Decrement the exponent.
4422         APInt::tcSetBit(Parts, semantics->precision - 1);
4423         exponent--;
4424       }
4425     } else {
4426       // If we are positive, we need to increment the significand.
4427 
4428       // We only cross a binade boundary that requires adjusting the exponent if
4429       // the input is not a denormal and all of said input's significand bits
4430       // are set. If all of said conditions are true: clear the significand, set
4431       // the integral bit to 1, and increment the exponent. If we have a
4432       // denormal always increment since moving denormals and the numbers in the
4433       // smallest normal binade have the same exponent in our representation.
4434       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4435 
4436       if (WillCrossBinadeBoundary) {
4437         integerPart *Parts = significandParts();
4438         APInt::tcSet(Parts, 0, partCount());
4439         APInt::tcSetBit(Parts, semantics->precision - 1);
4440         assert(exponent != semantics->maxExponent &&
4441                "We can not increment an exponent beyond the maxExponent allowed"
4442                " by the given floating point semantics.");
4443         exponent++;
4444       } else {
4445         incrementSignificand();
4446       }
4447     }
4448     break;
4449   }
4450 
4451   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4452   if (nextDown)
4453     changeSign();
4454 
4455   return result;
4456 }
4457 
4458 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4459   return ::exponentNaN(*semantics);
4460 }
4461 
4462 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4463   return ::exponentInf(*semantics);
4464 }
4465 
4466 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4467   return ::exponentZero(*semantics);
4468 }
4469 
4470 void IEEEFloat::makeInf(bool Negative) {
4471   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4472     // There is no Inf, so make NaN instead.
4473     makeNaN(false, Negative);
4474     return;
4475   }
4476   category = fcInfinity;
4477   sign = Negative;
4478   exponent = exponentInf();
4479   APInt::tcSet(significandParts(), 0, partCount());
4480 }
4481 
4482 void IEEEFloat::makeZero(bool Negative) {
4483   category = fcZero;
4484   sign = Negative;
4485   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4486     // Merge negative zero to positive because 0b10000...000 is used for NaN
4487     sign = false;
4488   }
4489   exponent = exponentZero();
4490   APInt::tcSet(significandParts(), 0, partCount());
4491 }
4492 
4493 void IEEEFloat::makeQuiet() {
4494   assert(isNaN());
4495   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4496     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4497 }
4498 
4499 int ilogb(const IEEEFloat &Arg) {
4500   if (Arg.isNaN())
4501     return IEEEFloat::IEK_NaN;
4502   if (Arg.isZero())
4503     return IEEEFloat::IEK_Zero;
4504   if (Arg.isInfinity())
4505     return IEEEFloat::IEK_Inf;
4506   if (!Arg.isDenormal())
4507     return Arg.exponent;
4508 
4509   IEEEFloat Normalized(Arg);
4510   int SignificandBits = Arg.getSemantics().precision - 1;
4511 
4512   Normalized.exponent += SignificandBits;
4513   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4514   return Normalized.exponent - SignificandBits;
4515 }
4516 
4517 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4518   auto MaxExp = X.getSemantics().maxExponent;
4519   auto MinExp = X.getSemantics().minExponent;
4520 
4521   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4522   // overflow; clamp it to a safe range before adding, but ensure that the range
4523   // is large enough that the clamp does not change the result. The range we
4524   // need to support is the difference between the largest possible exponent and
4525   // the normalized exponent of half the smallest denormal.
4526 
4527   int SignificandBits = X.getSemantics().precision - 1;
4528   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4529 
4530   // Clamp to one past the range ends to let normalize handle overlflow.
4531   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4532   X.normalize(RoundingMode, lfExactlyZero);
4533   if (X.isNaN())
4534     X.makeQuiet();
4535   return X;
4536 }
4537 
4538 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4539   Exp = ilogb(Val);
4540 
4541   // Quiet signalling nans.
4542   if (Exp == IEEEFloat::IEK_NaN) {
4543     IEEEFloat Quiet(Val);
4544     Quiet.makeQuiet();
4545     return Quiet;
4546   }
4547 
4548   if (Exp == IEEEFloat::IEK_Inf)
4549     return Val;
4550 
4551   // 1 is added because frexp is defined to return a normalized fraction in
4552   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4553   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4554   return scalbn(Val, -Exp, RM);
4555 }
4556 
4557 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4558     : Semantics(&S),
4559       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4560   assert(Semantics == &semPPCDoubleDouble);
4561 }
4562 
4563 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4564     : Semantics(&S),
4565       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4566                             APFloat(semIEEEdouble, uninitialized)}) {
4567   assert(Semantics == &semPPCDoubleDouble);
4568 }
4569 
4570 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4571     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4572                                            APFloat(semIEEEdouble)}) {
4573   assert(Semantics == &semPPCDoubleDouble);
4574 }
4575 
4576 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4577     : Semantics(&S),
4578       Floats(new APFloat[2]{
4579           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4580           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4581   assert(Semantics == &semPPCDoubleDouble);
4582 }
4583 
4584 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4585                              APFloat &&Second)
4586     : Semantics(&S),
4587       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4588   assert(Semantics == &semPPCDoubleDouble);
4589   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4590   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4591 }
4592 
4593 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4594     : Semantics(RHS.Semantics),
4595       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4596                                          APFloat(RHS.Floats[1])}
4597                         : nullptr) {
4598   assert(Semantics == &semPPCDoubleDouble);
4599 }
4600 
4601 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4602     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4603   RHS.Semantics = &semBogus;
4604   assert(Semantics == &semPPCDoubleDouble);
4605 }
4606 
4607 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4608   if (Semantics == RHS.Semantics && RHS.Floats) {
4609     Floats[0] = RHS.Floats[0];
4610     Floats[1] = RHS.Floats[1];
4611   } else if (this != &RHS) {
4612     this->~DoubleAPFloat();
4613     new (this) DoubleAPFloat(RHS);
4614   }
4615   return *this;
4616 }
4617 
4618 // Implement addition, subtraction, multiplication and division based on:
4619 // "Software for Doubled-Precision Floating-Point Computations",
4620 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4621 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4622                                          const APFloat &c, const APFloat &cc,
4623                                          roundingMode RM) {
4624   int Status = opOK;
4625   APFloat z = a;
4626   Status |= z.add(c, RM);
4627   if (!z.isFinite()) {
4628     if (!z.isInfinity()) {
4629       Floats[0] = std::move(z);
4630       Floats[1].makeZero(/* Neg = */ false);
4631       return (opStatus)Status;
4632     }
4633     Status = opOK;
4634     auto AComparedToC = a.compareAbsoluteValue(c);
4635     z = cc;
4636     Status |= z.add(aa, RM);
4637     if (AComparedToC == APFloat::cmpGreaterThan) {
4638       // z = cc + aa + c + a;
4639       Status |= z.add(c, RM);
4640       Status |= z.add(a, RM);
4641     } else {
4642       // z = cc + aa + a + c;
4643       Status |= z.add(a, RM);
4644       Status |= z.add(c, RM);
4645     }
4646     if (!z.isFinite()) {
4647       Floats[0] = std::move(z);
4648       Floats[1].makeZero(/* Neg = */ false);
4649       return (opStatus)Status;
4650     }
4651     Floats[0] = z;
4652     APFloat zz = aa;
4653     Status |= zz.add(cc, RM);
4654     if (AComparedToC == APFloat::cmpGreaterThan) {
4655       // Floats[1] = a - z + c + zz;
4656       Floats[1] = a;
4657       Status |= Floats[1].subtract(z, RM);
4658       Status |= Floats[1].add(c, RM);
4659       Status |= Floats[1].add(zz, RM);
4660     } else {
4661       // Floats[1] = c - z + a + zz;
4662       Floats[1] = c;
4663       Status |= Floats[1].subtract(z, RM);
4664       Status |= Floats[1].add(a, RM);
4665       Status |= Floats[1].add(zz, RM);
4666     }
4667   } else {
4668     // q = a - z;
4669     APFloat q = a;
4670     Status |= q.subtract(z, RM);
4671 
4672     // zz = q + c + (a - (q + z)) + aa + cc;
4673     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4674     auto zz = q;
4675     Status |= zz.add(c, RM);
4676     Status |= q.add(z, RM);
4677     Status |= q.subtract(a, RM);
4678     q.changeSign();
4679     Status |= zz.add(q, RM);
4680     Status |= zz.add(aa, RM);
4681     Status |= zz.add(cc, RM);
4682     if (zz.isZero() && !zz.isNegative()) {
4683       Floats[0] = std::move(z);
4684       Floats[1].makeZero(/* Neg = */ false);
4685       return opOK;
4686     }
4687     Floats[0] = z;
4688     Status |= Floats[0].add(zz, RM);
4689     if (!Floats[0].isFinite()) {
4690       Floats[1].makeZero(/* Neg = */ false);
4691       return (opStatus)Status;
4692     }
4693     Floats[1] = std::move(z);
4694     Status |= Floats[1].subtract(Floats[0], RM);
4695     Status |= Floats[1].add(zz, RM);
4696   }
4697   return (opStatus)Status;
4698 }
4699 
4700 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4701                                                 const DoubleAPFloat &RHS,
4702                                                 DoubleAPFloat &Out,
4703                                                 roundingMode RM) {
4704   if (LHS.getCategory() == fcNaN) {
4705     Out = LHS;
4706     return opOK;
4707   }
4708   if (RHS.getCategory() == fcNaN) {
4709     Out = RHS;
4710     return opOK;
4711   }
4712   if (LHS.getCategory() == fcZero) {
4713     Out = RHS;
4714     return opOK;
4715   }
4716   if (RHS.getCategory() == fcZero) {
4717     Out = LHS;
4718     return opOK;
4719   }
4720   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4721       LHS.isNegative() != RHS.isNegative()) {
4722     Out.makeNaN(false, Out.isNegative(), nullptr);
4723     return opInvalidOp;
4724   }
4725   if (LHS.getCategory() == fcInfinity) {
4726     Out = LHS;
4727     return opOK;
4728   }
4729   if (RHS.getCategory() == fcInfinity) {
4730     Out = RHS;
4731     return opOK;
4732   }
4733   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4734 
4735   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4736       CC(RHS.Floats[1]);
4737   assert(&A.getSemantics() == &semIEEEdouble);
4738   assert(&AA.getSemantics() == &semIEEEdouble);
4739   assert(&C.getSemantics() == &semIEEEdouble);
4740   assert(&CC.getSemantics() == &semIEEEdouble);
4741   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4742   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4743   return Out.addImpl(A, AA, C, CC, RM);
4744 }
4745 
4746 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4747                                      roundingMode RM) {
4748   return addWithSpecial(*this, RHS, *this, RM);
4749 }
4750 
4751 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4752                                           roundingMode RM) {
4753   changeSign();
4754   auto Ret = add(RHS, RM);
4755   changeSign();
4756   return Ret;
4757 }
4758 
4759 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4760                                           APFloat::roundingMode RM) {
4761   const auto &LHS = *this;
4762   auto &Out = *this;
4763   /* Interesting observation: For special categories, finding the lowest
4764      common ancestor of the following layered graph gives the correct
4765      return category:
4766 
4767         NaN
4768        /   \
4769      Zero  Inf
4770        \   /
4771        Normal
4772 
4773      e.g. NaN * NaN = NaN
4774           Zero * Inf = NaN
4775           Normal * Zero = Zero
4776           Normal * Inf = Inf
4777   */
4778   if (LHS.getCategory() == fcNaN) {
4779     Out = LHS;
4780     return opOK;
4781   }
4782   if (RHS.getCategory() == fcNaN) {
4783     Out = RHS;
4784     return opOK;
4785   }
4786   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4787       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4788     Out.makeNaN(false, false, nullptr);
4789     return opOK;
4790   }
4791   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4792     Out = LHS;
4793     return opOK;
4794   }
4795   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4796     Out = RHS;
4797     return opOK;
4798   }
4799   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4800          "Special cases not handled exhaustively");
4801 
4802   int Status = opOK;
4803   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4804   // t = a * c
4805   APFloat T = A;
4806   Status |= T.multiply(C, RM);
4807   if (!T.isFiniteNonZero()) {
4808     Floats[0] = T;
4809     Floats[1].makeZero(/* Neg = */ false);
4810     return (opStatus)Status;
4811   }
4812 
4813   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4814   APFloat Tau = A;
4815   T.changeSign();
4816   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4817   T.changeSign();
4818   {
4819     // v = a * d
4820     APFloat V = A;
4821     Status |= V.multiply(D, RM);
4822     // w = b * c
4823     APFloat W = B;
4824     Status |= W.multiply(C, RM);
4825     Status |= V.add(W, RM);
4826     // tau += v + w
4827     Status |= Tau.add(V, RM);
4828   }
4829   // u = t + tau
4830   APFloat U = T;
4831   Status |= U.add(Tau, RM);
4832 
4833   Floats[0] = U;
4834   if (!U.isFinite()) {
4835     Floats[1].makeZero(/* Neg = */ false);
4836   } else {
4837     // Floats[1] = (t - u) + tau
4838     Status |= T.subtract(U, RM);
4839     Status |= T.add(Tau, RM);
4840     Floats[1] = T;
4841   }
4842   return (opStatus)Status;
4843 }
4844 
4845 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4846                                         APFloat::roundingMode RM) {
4847   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4848   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4849   auto Ret =
4850       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4851   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4852   return Ret;
4853 }
4854 
4855 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4856   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4857   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4858   auto Ret =
4859       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4860   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4861   return Ret;
4862 }
4863 
4864 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4865   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4866   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4867   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4868   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4869   return Ret;
4870 }
4871 
4872 APFloat::opStatus
4873 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4874                                 const DoubleAPFloat &Addend,
4875                                 APFloat::roundingMode RM) {
4876   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4877   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4878   auto Ret = Tmp.fusedMultiplyAdd(
4879       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4880       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4881   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4882   return Ret;
4883 }
4884 
4885 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4886   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4887   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4888   auto Ret = Tmp.roundToIntegral(RM);
4889   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4890   return Ret;
4891 }
4892 
4893 void DoubleAPFloat::changeSign() {
4894   Floats[0].changeSign();
4895   Floats[1].changeSign();
4896 }
4897 
4898 APFloat::cmpResult
4899 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4900   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4901   if (Result != cmpEqual)
4902     return Result;
4903   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4904   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4905     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4906     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4907     if (Against && !RHSAgainst)
4908       return cmpLessThan;
4909     if (!Against && RHSAgainst)
4910       return cmpGreaterThan;
4911     if (!Against && !RHSAgainst)
4912       return Result;
4913     if (Against && RHSAgainst)
4914       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4915   }
4916   return Result;
4917 }
4918 
4919 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4920   return Floats[0].getCategory();
4921 }
4922 
4923 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4924 
4925 void DoubleAPFloat::makeInf(bool Neg) {
4926   Floats[0].makeInf(Neg);
4927   Floats[1].makeZero(/* Neg = */ false);
4928 }
4929 
4930 void DoubleAPFloat::makeZero(bool Neg) {
4931   Floats[0].makeZero(Neg);
4932   Floats[1].makeZero(/* Neg = */ false);
4933 }
4934 
4935 void DoubleAPFloat::makeLargest(bool Neg) {
4936   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4937   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4938   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4939   if (Neg)
4940     changeSign();
4941 }
4942 
4943 void DoubleAPFloat::makeSmallest(bool Neg) {
4944   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4945   Floats[0].makeSmallest(Neg);
4946   Floats[1].makeZero(/* Neg = */ false);
4947 }
4948 
4949 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4950   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4951   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4952   if (Neg)
4953     Floats[0].changeSign();
4954   Floats[1].makeZero(/* Neg = */ false);
4955 }
4956 
4957 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4958   Floats[0].makeNaN(SNaN, Neg, fill);
4959   Floats[1].makeZero(/* Neg = */ false);
4960 }
4961 
4962 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4963   auto Result = Floats[0].compare(RHS.Floats[0]);
4964   // |Float[0]| > |Float[1]|
4965   if (Result == APFloat::cmpEqual)
4966     return Floats[1].compare(RHS.Floats[1]);
4967   return Result;
4968 }
4969 
4970 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4971   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4972          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4973 }
4974 
4975 hash_code hash_value(const DoubleAPFloat &Arg) {
4976   if (Arg.Floats)
4977     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4978   return hash_combine(Arg.Semantics);
4979 }
4980 
4981 APInt DoubleAPFloat::bitcastToAPInt() const {
4982   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4983   uint64_t Data[] = {
4984       Floats[0].bitcastToAPInt().getRawData()[0],
4985       Floats[1].bitcastToAPInt().getRawData()[0],
4986   };
4987   return APInt(128, 2, Data);
4988 }
4989 
4990 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4991                                                              roundingMode RM) {
4992   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4993   APFloat Tmp(semPPCDoubleDoubleLegacy);
4994   auto Ret = Tmp.convertFromString(S, RM);
4995   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4996   return Ret;
4997 }
4998 
4999 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5000   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5001   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5002   auto Ret = Tmp.next(nextDown);
5003   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5004   return Ret;
5005 }
5006 
5007 APFloat::opStatus
5008 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5009                                 unsigned int Width, bool IsSigned,
5010                                 roundingMode RM, bool *IsExact) const {
5011   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5012   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5013       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5014 }
5015 
5016 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5017                                                   bool IsSigned,
5018                                                   roundingMode RM) {
5019   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5020   APFloat Tmp(semPPCDoubleDoubleLegacy);
5021   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5022   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5023   return Ret;
5024 }
5025 
5026 APFloat::opStatus
5027 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5028                                               unsigned int InputSize,
5029                                               bool IsSigned, roundingMode RM) {
5030   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5031   APFloat Tmp(semPPCDoubleDoubleLegacy);
5032   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5033   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5034   return Ret;
5035 }
5036 
5037 APFloat::opStatus
5038 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5039                                               unsigned int InputSize,
5040                                               bool IsSigned, roundingMode RM) {
5041   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5042   APFloat Tmp(semPPCDoubleDoubleLegacy);
5043   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5044   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5045   return Ret;
5046 }
5047 
5048 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5049                                                unsigned int HexDigits,
5050                                                bool UpperCase,
5051                                                roundingMode RM) const {
5052   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5053   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5054       .convertToHexString(DST, HexDigits, UpperCase, RM);
5055 }
5056 
5057 bool DoubleAPFloat::isDenormal() const {
5058   return getCategory() == fcNormal &&
5059          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5060           // (double)(Hi + Lo) == Hi defines a normal number.
5061           Floats[0] != Floats[0] + Floats[1]);
5062 }
5063 
5064 bool DoubleAPFloat::isSmallest() const {
5065   if (getCategory() != fcNormal)
5066     return false;
5067   DoubleAPFloat Tmp(*this);
5068   Tmp.makeSmallest(this->isNegative());
5069   return Tmp.compare(*this) == cmpEqual;
5070 }
5071 
5072 bool DoubleAPFloat::isSmallestNormalized() const {
5073   if (getCategory() != fcNormal)
5074     return false;
5075 
5076   DoubleAPFloat Tmp(*this);
5077   Tmp.makeSmallestNormalized(this->isNegative());
5078   return Tmp.compare(*this) == cmpEqual;
5079 }
5080 
5081 bool DoubleAPFloat::isLargest() const {
5082   if (getCategory() != fcNormal)
5083     return false;
5084   DoubleAPFloat Tmp(*this);
5085   Tmp.makeLargest(this->isNegative());
5086   return Tmp.compare(*this) == cmpEqual;
5087 }
5088 
5089 bool DoubleAPFloat::isInteger() const {
5090   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5091   return Floats[0].isInteger() && Floats[1].isInteger();
5092 }
5093 
5094 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5095                              unsigned FormatPrecision,
5096                              unsigned FormatMaxPadding,
5097                              bool TruncateZero) const {
5098   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5099   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5100       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5101 }
5102 
5103 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5104   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5105   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5106   if (!inv)
5107     return Tmp.getExactInverse(nullptr);
5108   APFloat Inv(semPPCDoubleDoubleLegacy);
5109   auto Ret = Tmp.getExactInverse(&Inv);
5110   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5111   return Ret;
5112 }
5113 
5114 int DoubleAPFloat::getExactLog2() const {
5115   // TODO: Implement me
5116   return INT_MIN;
5117 }
5118 
5119 int DoubleAPFloat::getExactLog2Abs() const {
5120   // TODO: Implement me
5121   return INT_MIN;
5122 }
5123 
5124 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5125                      APFloat::roundingMode RM) {
5126   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5127   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5128                        scalbn(Arg.Floats[1], Exp, RM));
5129 }
5130 
5131 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5132                     APFloat::roundingMode RM) {
5133   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5134   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5135   APFloat Second = Arg.Floats[1];
5136   if (Arg.getCategory() == APFloat::fcNormal)
5137     Second = scalbn(Second, -Exp, RM);
5138   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5139 }
5140 
5141 } // namespace detail
5142 
5143 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5144   if (usesLayout<IEEEFloat>(Semantics)) {
5145     new (&IEEE) IEEEFloat(std::move(F));
5146     return;
5147   }
5148   if (usesLayout<DoubleAPFloat>(Semantics)) {
5149     const fltSemantics& S = F.getSemantics();
5150     new (&Double)
5151         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5152                       APFloat(semIEEEdouble));
5153     return;
5154   }
5155   llvm_unreachable("Unexpected semantics");
5156 }
5157 
5158 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5159                                                        roundingMode RM) {
5160   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5161 }
5162 
5163 hash_code hash_value(const APFloat &Arg) {
5164   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5165     return hash_value(Arg.U.IEEE);
5166   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5167     return hash_value(Arg.U.Double);
5168   llvm_unreachable("Unexpected semantics");
5169 }
5170 
5171 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5172     : APFloat(Semantics) {
5173   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5174   assert(StatusOrErr && "Invalid floating point representation");
5175   consumeError(StatusOrErr.takeError());
5176 }
5177 
5178 FPClassTest APFloat::classify() const {
5179   if (isZero())
5180     return isNegative() ? fcNegZero : fcPosZero;
5181   if (isNormal())
5182     return isNegative() ? fcNegNormal : fcPosNormal;
5183   if (isDenormal())
5184     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5185   if (isInfinity())
5186     return isNegative() ? fcNegInf : fcPosInf;
5187   assert(isNaN() && "Other class of FP constant");
5188   return isSignaling() ? fcSNan : fcQNan;
5189 }
5190 
5191 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5192                                    roundingMode RM, bool *losesInfo) {
5193   if (&getSemantics() == &ToSemantics) {
5194     *losesInfo = false;
5195     return opOK;
5196   }
5197   if (usesLayout<IEEEFloat>(getSemantics()) &&
5198       usesLayout<IEEEFloat>(ToSemantics))
5199     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5200   if (usesLayout<IEEEFloat>(getSemantics()) &&
5201       usesLayout<DoubleAPFloat>(ToSemantics)) {
5202     assert(&ToSemantics == &semPPCDoubleDouble);
5203     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5204     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5205     return Ret;
5206   }
5207   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5208       usesLayout<IEEEFloat>(ToSemantics)) {
5209     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5210     *this = APFloat(std::move(getIEEE()), ToSemantics);
5211     return Ret;
5212   }
5213   llvm_unreachable("Unexpected semantics");
5214 }
5215 
5216 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5217   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5218 }
5219 
5220 void APFloat::print(raw_ostream &OS) const {
5221   SmallVector<char, 16> Buffer;
5222   toString(Buffer);
5223   OS << Buffer << "\n";
5224 }
5225 
5226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5227 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5228 #endif
5229 
5230 void APFloat::Profile(FoldingSetNodeID &NID) const {
5231   NID.Add(bitcastToAPInt());
5232 }
5233 
5234 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5235    an APSInt, whose initial bit-width and signed-ness are used to determine the
5236    precision of the conversion.
5237  */
5238 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5239                                             roundingMode rounding_mode,
5240                                             bool *isExact) const {
5241   unsigned bitWidth = result.getBitWidth();
5242   SmallVector<uint64_t, 4> parts(result.getNumWords());
5243   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5244                                      rounding_mode, isExact);
5245   // Keeps the original signed-ness.
5246   result = APInt(bitWidth, parts);
5247   return status;
5248 }
5249 
5250 double APFloat::convertToDouble() const {
5251   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5252     return getIEEE().convertToDouble();
5253   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5254          "Float semantics is not representable by IEEEdouble");
5255   APFloat Temp = *this;
5256   bool LosesInfo;
5257   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5258   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5259   (void)St;
5260   return Temp.getIEEE().convertToDouble();
5261 }
5262 
5263 float APFloat::convertToFloat() const {
5264   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5265     return getIEEE().convertToFloat();
5266   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5267          "Float semantics is not representable by IEEEsingle");
5268   APFloat Temp = *this;
5269   bool LosesInfo;
5270   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5271   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5272   (void)St;
5273   return Temp.getIEEE().convertToFloat();
5274 }
5275 
5276 } // namespace llvm
5277 
5278 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5279