xref: /llvm-project/llvm/lib/Support/APFloat.cpp (revision 3613b2683107bd60fda6d9348623be0686f6d7e3)
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cstring>
29 #include <limits.h>
30 
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
32   do {                                                                         \
33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
34       return U.IEEE.METHOD_CALL;                                               \
35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
36       return U.Double.METHOD_CALL;                                             \
37     llvm_unreachable("Unexpected semantics");                                  \
38   } while (false)
39 
40 using namespace llvm;
41 
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
45 ///
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 
50 /* Assumed in hexadecimal significand parsing, and conversion to
51    hexadecimal strings.  */
52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 
54 namespace llvm {
55 
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior {
58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
59   // exponent field is all 1s. In such cases, a value is Inf if the
60   // significand bits are all zero, and NaN otherwise
61   IEEE754,
62 
63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65   // representation for Inf, and operations that would ordinarily produce Inf
66   // produce NaN instead.
67   // The details of the NaN representation(s) in this form are determined by the
68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69   // encodings do not distinguish between signalling and quiet NaN.
70   NanOnly,
71 };
72 
73 // How NaN values are represented. This is curently only used in combination
74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75 // while having IEEE non-finite behavior is liable to lead to unexpected
76 // results.
77 enum class fltNanEncoding {
78   // Represents the standard IEEE behavior where a value is NaN if its
79   // exponent is all 1s and the significand is non-zero.
80   IEEE,
81 
82   // Represents the behavior in the Float8E4M3 floating point type where NaN is
83   // represented by having the exponent and mantissa set to all 1s.
84   // This behavior matches the FP8 E4M3 type described in
85   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86   // as non-signalling, although the paper does not state whether the NaN
87   // values are signalling or not.
88   AllOnes,
89 
90   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93   // there is only one NaN value, it is treated as quiet NaN. This matches the
94   // behavior described in https://arxiv.org/abs/2206.02915 .
95   NegativeZero,
96 };
97 
98 /* Represents floating point arithmetic semantics.  */
99 struct fltSemantics {
100   /* The largest E such that 2^E is representable; this matches the
101      definition of IEEE 754.  */
102   APFloatBase::ExponentType maxExponent;
103 
104   /* The smallest E such that 2^E is a normalized number; this
105      matches the definition of IEEE 754.  */
106   APFloatBase::ExponentType minExponent;
107 
108   /* Number of bits in the significand.  This includes the integer
109      bit.  */
110   unsigned int precision;
111 
112   /* Number of bits actually used in the semantics. */
113   unsigned int sizeInBits;
114 
115   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
116 
117   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
118   // Returns true if any number described by this semantics can be precisely
119   // represented by the specified semantics. Does not take into account
120   // the value of fltNonfiniteBehavior.
121   bool isRepresentableBy(const fltSemantics &S) const {
122     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123            precision <= S.precision;
124   }
125 };
126 
127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133 static constexpr fltSemantics semFloat8E5M2FNUZ = {
134     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
135 static constexpr fltSemantics semFloat8E4M3FN = {
136     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
137 static constexpr fltSemantics semFloat8E4M3FNUZ = {
138     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
140     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144 
145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
146    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149    to each other, and two 11-bit exponents.
150 
151    Note: we need to make the value different from semBogus as otherwise
152    an unsafe optimization may collapse both values to a single address,
153    and we heavily rely on them having distinct addresses.             */
154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
155 
156 /* These are legacy semantics for the fallback, inaccrurate implementation of
157    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158    operation. It's equivalent to having an IEEE number with consecutive 106
159    bits of mantissa and 11 bits of exponent.
160 
161    It's not equivalent to IBM double-double. For example, a legit IBM
162    double-double, 1 + epsilon:
163 
164      1 + epsilon = 1 + (1 >> 1076)
165 
166    is not representable by a consecutive 106 bits of mantissa.
167 
168    Currently, these semantics are used in the following way:
169 
170      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172      semPPCDoubleDoubleLegacy -> IEEE operations
173 
174    We use bitcastToAPInt() to get the bit representation (in APInt) of the
175    underlying IEEEdouble, then use the APInt constructor to construct the
176    legacy IEEE float.
177 
178    TODO: Implement all operations in semPPCDoubleDouble, and delete these
179    semantics.  */
180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181                                                           53 + 53, 128};
182 
183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
184   switch (S) {
185   case S_IEEEhalf:
186     return IEEEhalf();
187   case S_BFloat:
188     return BFloat();
189   case S_IEEEsingle:
190     return IEEEsingle();
191   case S_IEEEdouble:
192     return IEEEdouble();
193   case S_IEEEquad:
194     return IEEEquad();
195   case S_PPCDoubleDouble:
196     return PPCDoubleDouble();
197   case S_Float8E5M2:
198     return Float8E5M2();
199   case S_Float8E5M2FNUZ:
200     return Float8E5M2FNUZ();
201   case S_Float8E4M3FN:
202     return Float8E4M3FN();
203   case S_Float8E4M3FNUZ:
204     return Float8E4M3FNUZ();
205   case S_Float8E4M3B11FNUZ:
206     return Float8E4M3B11FNUZ();
207   case S_FloatTF32:
208     return FloatTF32();
209   case S_x87DoubleExtended:
210     return x87DoubleExtended();
211   }
212   llvm_unreachable("Unrecognised floating semantics");
213 }
214 
215 APFloatBase::Semantics
216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
217   if (&Sem == &llvm::APFloat::IEEEhalf())
218     return S_IEEEhalf;
219   else if (&Sem == &llvm::APFloat::BFloat())
220     return S_BFloat;
221   else if (&Sem == &llvm::APFloat::IEEEsingle())
222     return S_IEEEsingle;
223   else if (&Sem == &llvm::APFloat::IEEEdouble())
224     return S_IEEEdouble;
225   else if (&Sem == &llvm::APFloat::IEEEquad())
226     return S_IEEEquad;
227   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228     return S_PPCDoubleDouble;
229   else if (&Sem == &llvm::APFloat::Float8E5M2())
230     return S_Float8E5M2;
231   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232     return S_Float8E5M2FNUZ;
233   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234     return S_Float8E4M3FN;
235   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236     return S_Float8E4M3FNUZ;
237   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238     return S_Float8E4M3B11FNUZ;
239   else if (&Sem == &llvm::APFloat::FloatTF32())
240     return S_FloatTF32;
241   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242     return S_x87DoubleExtended;
243   else
244     llvm_unreachable("Unknown floating semantics");
245 }
246 
247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
252 const fltSemantics &APFloatBase::PPCDoubleDouble() {
253   return semPPCDoubleDouble;
254 }
255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
260   return semFloat8E4M3B11FNUZ;
261 }
262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
263 const fltSemantics &APFloatBase::x87DoubleExtended() {
264   return semX87DoubleExtended;
265 }
266 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
267 
268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
269 constexpr RoundingMode APFloatBase::rmTowardPositive;
270 constexpr RoundingMode APFloatBase::rmTowardNegative;
271 constexpr RoundingMode APFloatBase::rmTowardZero;
272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
273 
274 /* A tight upper bound on number of parts required to hold the value
275    pow(5, power) is
276 
277      power * 815 / (351 * integerPartWidth) + 1
278 
279    However, whilst the result may require only this many parts,
280    because we are multiplying two values to get it, the
281    multiplication may require an extra part with the excess part
282    being zero (consider the trivial case of 1 * 1, tcFullMultiply
283    requires two parts to hold the single-part result).  So we add an
284    extra one to guarantee enough space whilst multiplying.  */
285 const unsigned int maxExponent = 16383;
286 const unsigned int maxPrecision = 113;
287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
288 const unsigned int maxPowerOfFiveParts =
289     2 +
290     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
291 
292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293   return semantics.precision;
294 }
295 APFloatBase::ExponentType
296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
297   return semantics.maxExponent;
298 }
299 APFloatBase::ExponentType
300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
301   return semantics.minExponent;
302 }
303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304   return semantics.sizeInBits;
305 }
306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
307                                                  bool isSigned) {
308   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309   // at least one more bit than the MaxExponent to hold the max FP value.
310   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311   // Extra sign bit needed.
312   if (isSigned)
313     ++MinBitWidth;
314   return MinBitWidth;
315 }
316 
317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
318                                             const fltSemantics &Dst) {
319   // Exponent range must be larger.
320   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321     return false;
322 
323   // If the mantissa is long enough, the result value could still be denormal
324   // with a larger exponent range.
325   //
326   // FIXME: This condition is probably not accurate but also shouldn't be a
327   // practical concern with existing types.
328   return Dst.precision >= Src.precision;
329 }
330 
331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
332   return Sem.sizeInBits;
333 }
334 
335 static constexpr APFloatBase::ExponentType
336 exponentZero(const fltSemantics &semantics) {
337   return semantics.minExponent - 1;
338 }
339 
340 static constexpr APFloatBase::ExponentType
341 exponentInf(const fltSemantics &semantics) {
342   return semantics.maxExponent + 1;
343 }
344 
345 static constexpr APFloatBase::ExponentType
346 exponentNaN(const fltSemantics &semantics) {
347   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
348     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
349       return exponentZero(semantics);
350     return semantics.maxExponent;
351   }
352   return semantics.maxExponent + 1;
353 }
354 
355 /* A bunch of private, handy routines.  */
356 
357 static inline Error createError(const Twine &Err) {
358   return make_error<StringError>(Err, inconvertibleErrorCode());
359 }
360 
361 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
362   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
363 }
364 
365 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
366 static inline unsigned int
367 decDigitValue(unsigned int c)
368 {
369   return c - '0';
370 }
371 
372 /* Return the value of a decimal exponent of the form
373    [+-]ddddddd.
374 
375    If the exponent overflows, returns a large exponent with the
376    appropriate sign.  */
377 static Expected<int> readExponent(StringRef::iterator begin,
378                                   StringRef::iterator end) {
379   bool isNegative;
380   unsigned int absExponent;
381   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
382   StringRef::iterator p = begin;
383 
384   // Treat no exponent as 0 to match binutils
385   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386     return 0;
387   }
388 
389   isNegative = (*p == '-');
390   if (*p == '-' || *p == '+') {
391     p++;
392     if (p == end)
393       return createError("Exponent has no digits");
394   }
395 
396   absExponent = decDigitValue(*p++);
397   if (absExponent >= 10U)
398     return createError("Invalid character in exponent");
399 
400   for (; p != end; ++p) {
401     unsigned int value;
402 
403     value = decDigitValue(*p);
404     if (value >= 10U)
405       return createError("Invalid character in exponent");
406 
407     absExponent = absExponent * 10U + value;
408     if (absExponent >= overlargeExponent) {
409       absExponent = overlargeExponent;
410       break;
411     }
412   }
413 
414   if (isNegative)
415     return -(int) absExponent;
416   else
417     return (int) absExponent;
418 }
419 
420 /* This is ugly and needs cleaning up, but I don't immediately see
421    how whilst remaining safe.  */
422 static Expected<int> totalExponent(StringRef::iterator p,
423                                    StringRef::iterator end,
424                                    int exponentAdjustment) {
425   int unsignedExponent;
426   bool negative, overflow;
427   int exponent = 0;
428 
429   if (p == end)
430     return createError("Exponent has no digits");
431 
432   negative = *p == '-';
433   if (*p == '-' || *p == '+') {
434     p++;
435     if (p == end)
436       return createError("Exponent has no digits");
437   }
438 
439   unsignedExponent = 0;
440   overflow = false;
441   for (; p != end; ++p) {
442     unsigned int value;
443 
444     value = decDigitValue(*p);
445     if (value >= 10U)
446       return createError("Invalid character in exponent");
447 
448     unsignedExponent = unsignedExponent * 10 + value;
449     if (unsignedExponent > 32767) {
450       overflow = true;
451       break;
452     }
453   }
454 
455   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456     overflow = true;
457 
458   if (!overflow) {
459     exponent = unsignedExponent;
460     if (negative)
461       exponent = -exponent;
462     exponent += exponentAdjustment;
463     if (exponent > 32767 || exponent < -32768)
464       overflow = true;
465   }
466 
467   if (overflow)
468     exponent = negative ? -32768: 32767;
469 
470   return exponent;
471 }
472 
473 static Expected<StringRef::iterator>
474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
475                            StringRef::iterator *dot) {
476   StringRef::iterator p = begin;
477   *dot = end;
478   while (p != end && *p == '0')
479     p++;
480 
481   if (p != end && *p == '.') {
482     *dot = p++;
483 
484     if (end - begin == 1)
485       return createError("Significand has no digits");
486 
487     while (p != end && *p == '0')
488       p++;
489   }
490 
491   return p;
492 }
493 
494 /* Given a normal decimal floating point number of the form
495 
496      dddd.dddd[eE][+-]ddd
497 
498    where the decimal point and exponent are optional, fill out the
499    structure D.  Exponent is appropriate if the significand is
500    treated as an integer, and normalizedExponent if the significand
501    is taken to have the decimal point after a single leading
502    non-zero digit.
503 
504    If the value is zero, V->firstSigDigit points to a non-digit, and
505    the return exponent is zero.
506 */
507 struct decimalInfo {
508   const char *firstSigDigit;
509   const char *lastSigDigit;
510   int exponent;
511   int normalizedExponent;
512 };
513 
514 static Error interpretDecimal(StringRef::iterator begin,
515                               StringRef::iterator end, decimalInfo *D) {
516   StringRef::iterator dot = end;
517 
518   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519   if (!PtrOrErr)
520     return PtrOrErr.takeError();
521   StringRef::iterator p = *PtrOrErr;
522 
523   D->firstSigDigit = p;
524   D->exponent = 0;
525   D->normalizedExponent = 0;
526 
527   for (; p != end; ++p) {
528     if (*p == '.') {
529       if (dot != end)
530         return createError("String contains multiple dots");
531       dot = p++;
532       if (p == end)
533         break;
534     }
535     if (decDigitValue(*p) >= 10U)
536       break;
537   }
538 
539   if (p != end) {
540     if (*p != 'e' && *p != 'E')
541       return createError("Invalid character in significand");
542     if (p == begin)
543       return createError("Significand has no digits");
544     if (dot != end && p - begin == 1)
545       return createError("Significand has no digits");
546 
547     /* p points to the first non-digit in the string */
548     auto ExpOrErr = readExponent(p + 1, end);
549     if (!ExpOrErr)
550       return ExpOrErr.takeError();
551     D->exponent = *ExpOrErr;
552 
553     /* Implied decimal point?  */
554     if (dot == end)
555       dot = p;
556   }
557 
558   /* If number is all zeroes accept any exponent.  */
559   if (p != D->firstSigDigit) {
560     /* Drop insignificant trailing zeroes.  */
561     if (p != begin) {
562       do
563         do
564           p--;
565         while (p != begin && *p == '0');
566       while (p != begin && *p == '.');
567     }
568 
569     /* Adjust the exponents for any decimal point.  */
570     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571     D->normalizedExponent = (D->exponent +
572               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573                                       - (dot > D->firstSigDigit && dot < p)));
574   }
575 
576   D->lastSigDigit = p;
577   return Error::success();
578 }
579 
580 /* Return the trailing fraction of a hexadecimal number.
581    DIGITVALUE is the first hex digit of the fraction, P points to
582    the next digit.  */
583 static Expected<lostFraction>
584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
585                             unsigned int digitValue) {
586   unsigned int hexDigit;
587 
588   /* If the first trailing digit isn't 0 or 8 we can work out the
589      fraction immediately.  */
590   if (digitValue > 8)
591     return lfMoreThanHalf;
592   else if (digitValue < 8 && digitValue > 0)
593     return lfLessThanHalf;
594 
595   // Otherwise we need to find the first non-zero digit.
596   while (p != end && (*p == '0' || *p == '.'))
597     p++;
598 
599   if (p == end)
600     return createError("Invalid trailing hexadecimal fraction!");
601 
602   hexDigit = hexDigitValue(*p);
603 
604   /* If we ran off the end it is exactly zero or one-half, otherwise
605      a little more.  */
606   if (hexDigit == UINT_MAX)
607     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608   else
609     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
610 }
611 
612 /* Return the fraction lost were a bignum truncated losing the least
613    significant BITS bits.  */
614 static lostFraction
615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
616                               unsigned int partCount,
617                               unsigned int bits)
618 {
619   unsigned int lsb;
620 
621   lsb = APInt::tcLSB(parts, partCount);
622 
623   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
624   if (bits <= lsb)
625     return lfExactlyZero;
626   if (bits == lsb + 1)
627     return lfExactlyHalf;
628   if (bits <= partCount * APFloatBase::integerPartWidth &&
629       APInt::tcExtractBit(parts, bits - 1))
630     return lfMoreThanHalf;
631 
632   return lfLessThanHalf;
633 }
634 
635 /* Shift DST right BITS bits noting lost fraction.  */
636 static lostFraction
637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
638 {
639   lostFraction lost_fraction;
640 
641   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
642 
643   APInt::tcShiftRight(dst, parts, bits);
644 
645   return lost_fraction;
646 }
647 
648 /* Combine the effect of two lost fractions.  */
649 static lostFraction
650 combineLostFractions(lostFraction moreSignificant,
651                      lostFraction lessSignificant)
652 {
653   if (lessSignificant != lfExactlyZero) {
654     if (moreSignificant == lfExactlyZero)
655       moreSignificant = lfLessThanHalf;
656     else if (moreSignificant == lfExactlyHalf)
657       moreSignificant = lfMoreThanHalf;
658   }
659 
660   return moreSignificant;
661 }
662 
663 /* The error from the true value, in half-ulps, on multiplying two
664    floating point numbers, which differ from the value they
665    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666    than the returned value.
667 
668    See "How to Read Floating Point Numbers Accurately" by William D
669    Clinger.  */
670 static unsigned int
671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
672 {
673   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
674 
675   if (HUerr1 + HUerr2 == 0)
676     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
677   else
678     return inexactMultiply + 2 * (HUerr1 + HUerr2);
679 }
680 
681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
682    when the least significant BITS are truncated.  BITS cannot be
683    zero.  */
684 static APFloatBase::integerPart
685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686                  bool isNearest) {
687   unsigned int count, partBits;
688   APFloatBase::integerPart part, boundary;
689 
690   assert(bits != 0);
691 
692   bits--;
693   count = bits / APFloatBase::integerPartWidth;
694   partBits = bits % APFloatBase::integerPartWidth + 1;
695 
696   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
697 
698   if (isNearest)
699     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700   else
701     boundary = 0;
702 
703   if (count == 0) {
704     if (part - boundary <= boundary - part)
705       return part - boundary;
706     else
707       return boundary - part;
708   }
709 
710   if (part == boundary) {
711     while (--count)
712       if (parts[count])
713         return ~(APFloatBase::integerPart) 0; /* A lot.  */
714 
715     return parts[0];
716   } else if (part == boundary - 1) {
717     while (--count)
718       if (~parts[count])
719         return ~(APFloatBase::integerPart) 0; /* A lot.  */
720 
721     return -parts[0];
722   }
723 
724   return ~(APFloatBase::integerPart) 0; /* A lot.  */
725 }
726 
727 /* Place pow(5, power) in DST, and return the number of parts used.
728    DST must be at least one part larger than size of the answer.  */
729 static unsigned int
730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
732   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
733   pow5s[0] = 78125 * 5;
734 
735   unsigned int partsCount = 1;
736   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737   unsigned int result;
738   assert(power <= maxExponent);
739 
740   p1 = dst;
741   p2 = scratch;
742 
743   *p1 = firstEightPowers[power & 7];
744   power >>= 3;
745 
746   result = 1;
747   pow5 = pow5s;
748 
749   for (unsigned int n = 0; power; power >>= 1, n++) {
750     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
751     if (n != 0) {
752       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
753                             partsCount, partsCount);
754       partsCount *= 2;
755       if (pow5[partsCount - 1] == 0)
756         partsCount--;
757     }
758 
759     if (power & 1) {
760       APFloatBase::integerPart *tmp;
761 
762       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
763       result += partsCount;
764       if (p2[result - 1] == 0)
765         result--;
766 
767       /* Now result is in p1 with partsCount parts and p2 is scratch
768          space.  */
769       tmp = p1;
770       p1 = p2;
771       p2 = tmp;
772     }
773 
774     pow5 += partsCount;
775   }
776 
777   if (p1 != dst)
778     APInt::tcAssign(dst, p1, result);
779 
780   return result;
781 }
782 
783 /* Zero at the end to avoid modular arithmetic when adding one; used
784    when rounding up during hexadecimal output.  */
785 static const char hexDigitsLower[] = "0123456789abcdef0";
786 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
787 static const char infinityL[] = "infinity";
788 static const char infinityU[] = "INFINITY";
789 static const char NaNL[] = "nan";
790 static const char NaNU[] = "NAN";
791 
792 /* Write out an integerPart in hexadecimal, starting with the most
793    significant nibble.  Write out exactly COUNT hexdigits, return
794    COUNT.  */
795 static unsigned int
796 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
797            const char *hexDigitChars)
798 {
799   unsigned int result = count;
800 
801   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
802 
803   part >>= (APFloatBase::integerPartWidth - 4 * count);
804   while (count--) {
805     dst[count] = hexDigitChars[part & 0xf];
806     part >>= 4;
807   }
808 
809   return result;
810 }
811 
812 /* Write out an unsigned decimal integer.  */
813 static char *
814 writeUnsignedDecimal (char *dst, unsigned int n)
815 {
816   char buff[40], *p;
817 
818   p = buff;
819   do
820     *p++ = '0' + n % 10;
821   while (n /= 10);
822 
823   do
824     *dst++ = *--p;
825   while (p != buff);
826 
827   return dst;
828 }
829 
830 /* Write out a signed decimal integer.  */
831 static char *
832 writeSignedDecimal (char *dst, int value)
833 {
834   if (value < 0) {
835     *dst++ = '-';
836     dst = writeUnsignedDecimal(dst, -(unsigned) value);
837   } else
838     dst = writeUnsignedDecimal(dst, value);
839 
840   return dst;
841 }
842 
843 namespace detail {
844 /* Constructors.  */
845 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
846   unsigned int count;
847 
848   semantics = ourSemantics;
849   count = partCount();
850   if (count > 1)
851     significand.parts = new integerPart[count];
852 }
853 
854 void IEEEFloat::freeSignificand() {
855   if (needsCleanup())
856     delete [] significand.parts;
857 }
858 
859 void IEEEFloat::assign(const IEEEFloat &rhs) {
860   assert(semantics == rhs.semantics);
861 
862   sign = rhs.sign;
863   category = rhs.category;
864   exponent = rhs.exponent;
865   if (isFiniteNonZero() || category == fcNaN)
866     copySignificand(rhs);
867 }
868 
869 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
870   assert(isFiniteNonZero() || category == fcNaN);
871   assert(rhs.partCount() >= partCount());
872 
873   APInt::tcAssign(significandParts(), rhs.significandParts(),
874                   partCount());
875 }
876 
877 /* Make this number a NaN, with an arbitrary but deterministic value
878    for the significand.  If double or longer, this is a signalling NaN,
879    which may not be ideal.  If float, this is QNaN(0).  */
880 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
881   category = fcNaN;
882   sign = Negative;
883   exponent = exponentNaN();
884 
885   integerPart *significand = significandParts();
886   unsigned numParts = partCount();
887 
888   APInt fill_storage;
889   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
890     // Finite-only types do not distinguish signalling and quiet NaN, so
891     // make them all signalling.
892     SNaN = false;
893     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
894       sign = true;
895       fill_storage = APInt::getZero(semantics->precision - 1);
896     } else {
897       fill_storage = APInt::getAllOnes(semantics->precision - 1);
898     }
899     fill = &fill_storage;
900   }
901 
902   // Set the significand bits to the fill.
903   if (!fill || fill->getNumWords() < numParts)
904     APInt::tcSet(significand, 0, numParts);
905   if (fill) {
906     APInt::tcAssign(significand, fill->getRawData(),
907                     std::min(fill->getNumWords(), numParts));
908 
909     // Zero out the excess bits of the significand.
910     unsigned bitsToPreserve = semantics->precision - 1;
911     unsigned part = bitsToPreserve / 64;
912     bitsToPreserve %= 64;
913     significand[part] &= ((1ULL << bitsToPreserve) - 1);
914     for (part++; part != numParts; ++part)
915       significand[part] = 0;
916   }
917 
918   unsigned QNaNBit = semantics->precision - 2;
919 
920   if (SNaN) {
921     // We always have to clear the QNaN bit to make it an SNaN.
922     APInt::tcClearBit(significand, QNaNBit);
923 
924     // If there are no bits set in the payload, we have to set
925     // *something* to make it a NaN instead of an infinity;
926     // conventionally, this is the next bit down from the QNaN bit.
927     if (APInt::tcIsZero(significand, numParts))
928       APInt::tcSetBit(significand, QNaNBit - 1);
929   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
930     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
931     // Do nothing.
932   } else {
933     // We always have to set the QNaN bit to make it a QNaN.
934     APInt::tcSetBit(significand, QNaNBit);
935   }
936 
937   // For x87 extended precision, we want to make a NaN, not a
938   // pseudo-NaN.  Maybe we should expose the ability to make
939   // pseudo-NaNs?
940   if (semantics == &semX87DoubleExtended)
941     APInt::tcSetBit(significand, QNaNBit + 1);
942 }
943 
944 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
945   if (this != &rhs) {
946     if (semantics != rhs.semantics) {
947       freeSignificand();
948       initialize(rhs.semantics);
949     }
950     assign(rhs);
951   }
952 
953   return *this;
954 }
955 
956 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
957   freeSignificand();
958 
959   semantics = rhs.semantics;
960   significand = rhs.significand;
961   exponent = rhs.exponent;
962   category = rhs.category;
963   sign = rhs.sign;
964 
965   rhs.semantics = &semBogus;
966   return *this;
967 }
968 
969 bool IEEEFloat::isDenormal() const {
970   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
971          (APInt::tcExtractBit(significandParts(),
972                               semantics->precision - 1) == 0);
973 }
974 
975 bool IEEEFloat::isSmallest() const {
976   // The smallest number by magnitude in our format will be the smallest
977   // denormal, i.e. the floating point number with exponent being minimum
978   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
979   return isFiniteNonZero() && exponent == semantics->minExponent &&
980     significandMSB() == 0;
981 }
982 
983 bool IEEEFloat::isSmallestNormalized() const {
984   return getCategory() == fcNormal && exponent == semantics->minExponent &&
985          isSignificandAllZerosExceptMSB();
986 }
987 
988 bool IEEEFloat::isSignificandAllOnes() const {
989   // Test if the significand excluding the integral bit is all ones. This allows
990   // us to test for binade boundaries.
991   const integerPart *Parts = significandParts();
992   const unsigned PartCount = partCountForBits(semantics->precision);
993   for (unsigned i = 0; i < PartCount - 1; i++)
994     if (~Parts[i])
995       return false;
996 
997   // Set the unused high bits to all ones when we compare.
998   const unsigned NumHighBits =
999     PartCount*integerPartWidth - semantics->precision + 1;
1000   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001          "Can not have more high bits to fill than integerPartWidth");
1002   const integerPart HighBitFill =
1003     ~integerPart(0) << (integerPartWidth - NumHighBits);
1004   if (~(Parts[PartCount - 1] | HighBitFill))
1005     return false;
1006 
1007   return true;
1008 }
1009 
1010 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1011   // Test if the significand excluding the integral bit is all ones except for
1012   // the least significant bit.
1013   const integerPart *Parts = significandParts();
1014 
1015   if (Parts[0] & 1)
1016     return false;
1017 
1018   const unsigned PartCount = partCountForBits(semantics->precision);
1019   for (unsigned i = 0; i < PartCount - 1; i++) {
1020     if (~Parts[i] & ~unsigned{!i})
1021       return false;
1022   }
1023 
1024   // Set the unused high bits to all ones when we compare.
1025   const unsigned NumHighBits =
1026       PartCount * integerPartWidth - semantics->precision + 1;
1027   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1028          "Can not have more high bits to fill than integerPartWidth");
1029   const integerPart HighBitFill = ~integerPart(0)
1030                                   << (integerPartWidth - NumHighBits);
1031   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1032     return false;
1033 
1034   return true;
1035 }
1036 
1037 bool IEEEFloat::isSignificandAllZeros() const {
1038   // Test if the significand excluding the integral bit is all zeros. This
1039   // allows us to test for binade boundaries.
1040   const integerPart *Parts = significandParts();
1041   const unsigned PartCount = partCountForBits(semantics->precision);
1042 
1043   for (unsigned i = 0; i < PartCount - 1; i++)
1044     if (Parts[i])
1045       return false;
1046 
1047   // Compute how many bits are used in the final word.
1048   const unsigned NumHighBits =
1049     PartCount*integerPartWidth - semantics->precision + 1;
1050   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1051          "clear than integerPartWidth");
1052   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1053 
1054   if (Parts[PartCount - 1] & HighBitMask)
1055     return false;
1056 
1057   return true;
1058 }
1059 
1060 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1061   const integerPart *Parts = significandParts();
1062   const unsigned PartCount = partCountForBits(semantics->precision);
1063 
1064   for (unsigned i = 0; i < PartCount - 1; i++) {
1065     if (Parts[i])
1066       return false;
1067   }
1068 
1069   const unsigned NumHighBits =
1070       PartCount * integerPartWidth - semantics->precision + 1;
1071   return Parts[PartCount - 1] == integerPart(1)
1072                                      << (integerPartWidth - NumHighBits);
1073 }
1074 
1075 bool IEEEFloat::isLargest() const {
1076   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1077       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1078     // The largest number by magnitude in our format will be the floating point
1079     // number with maximum exponent and with significand that is all ones except
1080     // the LSB.
1081     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1082            isSignificandAllOnesExceptLSB();
1083   } else {
1084     // The largest number by magnitude in our format will be the floating point
1085     // number with maximum exponent and with significand that is all ones.
1086     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087            isSignificandAllOnes();
1088   }
1089 }
1090 
1091 bool IEEEFloat::isInteger() const {
1092   // This could be made more efficient; I'm going for obviously correct.
1093   if (!isFinite()) return false;
1094   IEEEFloat truncated = *this;
1095   truncated.roundToIntegral(rmTowardZero);
1096   return compare(truncated) == cmpEqual;
1097 }
1098 
1099 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1100   if (this == &rhs)
1101     return true;
1102   if (semantics != rhs.semantics ||
1103       category != rhs.category ||
1104       sign != rhs.sign)
1105     return false;
1106   if (category==fcZero || category==fcInfinity)
1107     return true;
1108 
1109   if (isFiniteNonZero() && exponent != rhs.exponent)
1110     return false;
1111 
1112   return std::equal(significandParts(), significandParts() + partCount(),
1113                     rhs.significandParts());
1114 }
1115 
1116 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1117   initialize(&ourSemantics);
1118   sign = 0;
1119   category = fcNormal;
1120   zeroSignificand();
1121   exponent = ourSemantics.precision - 1;
1122   significandParts()[0] = value;
1123   normalize(rmNearestTiesToEven, lfExactlyZero);
1124 }
1125 
1126 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1127   initialize(&ourSemantics);
1128   makeZero(false);
1129 }
1130 
1131 // Delegate to the previous constructor, because later copy constructor may
1132 // actually inspects category, which can't be garbage.
1133 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1134     : IEEEFloat(ourSemantics) {}
1135 
1136 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1137   initialize(rhs.semantics);
1138   assign(rhs);
1139 }
1140 
1141 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1142   *this = std::move(rhs);
1143 }
1144 
1145 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1146 
1147 unsigned int IEEEFloat::partCount() const {
1148   return partCountForBits(semantics->precision + 1);
1149 }
1150 
1151 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1152   return const_cast<IEEEFloat *>(this)->significandParts();
1153 }
1154 
1155 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1156   if (partCount() > 1)
1157     return significand.parts;
1158   else
1159     return &significand.part;
1160 }
1161 
1162 void IEEEFloat::zeroSignificand() {
1163   APInt::tcSet(significandParts(), 0, partCount());
1164 }
1165 
1166 /* Increment an fcNormal floating point number's significand.  */
1167 void IEEEFloat::incrementSignificand() {
1168   integerPart carry;
1169 
1170   carry = APInt::tcIncrement(significandParts(), partCount());
1171 
1172   /* Our callers should never cause us to overflow.  */
1173   assert(carry == 0);
1174   (void)carry;
1175 }
1176 
1177 /* Add the significand of the RHS.  Returns the carry flag.  */
1178 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1179   integerPart *parts;
1180 
1181   parts = significandParts();
1182 
1183   assert(semantics == rhs.semantics);
1184   assert(exponent == rhs.exponent);
1185 
1186   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1187 }
1188 
1189 /* Subtract the significand of the RHS with a borrow flag.  Returns
1190    the borrow flag.  */
1191 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1192                                                       integerPart borrow) {
1193   integerPart *parts;
1194 
1195   parts = significandParts();
1196 
1197   assert(semantics == rhs.semantics);
1198   assert(exponent == rhs.exponent);
1199 
1200   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1201                            partCount());
1202 }
1203 
1204 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1205    on to the full-precision result of the multiplication.  Returns the
1206    lost fraction.  */
1207 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1208                                             IEEEFloat addend) {
1209   unsigned int omsb;        // One, not zero, based MSB.
1210   unsigned int partsCount, newPartsCount, precision;
1211   integerPart *lhsSignificand;
1212   integerPart scratch[4];
1213   integerPart *fullSignificand;
1214   lostFraction lost_fraction;
1215   bool ignored;
1216 
1217   assert(semantics == rhs.semantics);
1218 
1219   precision = semantics->precision;
1220 
1221   // Allocate space for twice as many bits as the original significand, plus one
1222   // extra bit for the addition to overflow into.
1223   newPartsCount = partCountForBits(precision * 2 + 1);
1224 
1225   if (newPartsCount > 4)
1226     fullSignificand = new integerPart[newPartsCount];
1227   else
1228     fullSignificand = scratch;
1229 
1230   lhsSignificand = significandParts();
1231   partsCount = partCount();
1232 
1233   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1234                         rhs.significandParts(), partsCount, partsCount);
1235 
1236   lost_fraction = lfExactlyZero;
1237   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1238   exponent += rhs.exponent;
1239 
1240   // Assume the operands involved in the multiplication are single-precision
1241   // FP, and the two multiplicants are:
1242   //   *this = a23 . a22 ... a0 * 2^e1
1243   //     rhs = b23 . b22 ... b0 * 2^e2
1244   // the result of multiplication is:
1245   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1246   // Note that there are three significant bits at the left-hand side of the
1247   // radix point: two for the multiplication, and an overflow bit for the
1248   // addition (that will always be zero at this point). Move the radix point
1249   // toward left by two bits, and adjust exponent accordingly.
1250   exponent += 2;
1251 
1252   if (addend.isNonZero()) {
1253     // The intermediate result of the multiplication has "2 * precision"
1254     // signicant bit; adjust the addend to be consistent with mul result.
1255     //
1256     Significand savedSignificand = significand;
1257     const fltSemantics *savedSemantics = semantics;
1258     fltSemantics extendedSemantics;
1259     opStatus status;
1260     unsigned int extendedPrecision;
1261 
1262     // Normalize our MSB to one below the top bit to allow for overflow.
1263     extendedPrecision = 2 * precision + 1;
1264     if (omsb != extendedPrecision - 1) {
1265       assert(extendedPrecision > omsb);
1266       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1267                          (extendedPrecision - 1) - omsb);
1268       exponent -= (extendedPrecision - 1) - omsb;
1269     }
1270 
1271     /* Create new semantics.  */
1272     extendedSemantics = *semantics;
1273     extendedSemantics.precision = extendedPrecision;
1274 
1275     if (newPartsCount == 1)
1276       significand.part = fullSignificand[0];
1277     else
1278       significand.parts = fullSignificand;
1279     semantics = &extendedSemantics;
1280 
1281     // Make a copy so we can convert it to the extended semantics.
1282     // Note that we cannot convert the addend directly, as the extendedSemantics
1283     // is a local variable (which we take a reference to).
1284     IEEEFloat extendedAddend(addend);
1285     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1286     assert(status == opOK);
1287     (void)status;
1288 
1289     // Shift the significand of the addend right by one bit. This guarantees
1290     // that the high bit of the significand is zero (same as fullSignificand),
1291     // so the addition will overflow (if it does overflow at all) into the top bit.
1292     lost_fraction = extendedAddend.shiftSignificandRight(1);
1293     assert(lost_fraction == lfExactlyZero &&
1294            "Lost precision while shifting addend for fused-multiply-add.");
1295 
1296     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1297 
1298     /* Restore our state.  */
1299     if (newPartsCount == 1)
1300       fullSignificand[0] = significand.part;
1301     significand = savedSignificand;
1302     semantics = savedSemantics;
1303 
1304     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1305   }
1306 
1307   // Convert the result having "2 * precision" significant-bits back to the one
1308   // having "precision" significant-bits. First, move the radix point from
1309   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1310   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1311   exponent -= precision + 1;
1312 
1313   // In case MSB resides at the left-hand side of radix point, shift the
1314   // mantissa right by some amount to make sure the MSB reside right before
1315   // the radix point (i.e. "MSB . rest-significant-bits").
1316   //
1317   // Note that the result is not normalized when "omsb < precision". So, the
1318   // caller needs to call IEEEFloat::normalize() if normalized value is
1319   // expected.
1320   if (omsb > precision) {
1321     unsigned int bits, significantParts;
1322     lostFraction lf;
1323 
1324     bits = omsb - precision;
1325     significantParts = partCountForBits(omsb);
1326     lf = shiftRight(fullSignificand, significantParts, bits);
1327     lost_fraction = combineLostFractions(lf, lost_fraction);
1328     exponent += bits;
1329   }
1330 
1331   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1332 
1333   if (newPartsCount > 4)
1334     delete [] fullSignificand;
1335 
1336   return lost_fraction;
1337 }
1338 
1339 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1340   return multiplySignificand(rhs, IEEEFloat(*semantics));
1341 }
1342 
1343 /* Multiply the significands of LHS and RHS to DST.  */
1344 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1345   unsigned int bit, i, partsCount;
1346   const integerPart *rhsSignificand;
1347   integerPart *lhsSignificand, *dividend, *divisor;
1348   integerPart scratch[4];
1349   lostFraction lost_fraction;
1350 
1351   assert(semantics == rhs.semantics);
1352 
1353   lhsSignificand = significandParts();
1354   rhsSignificand = rhs.significandParts();
1355   partsCount = partCount();
1356 
1357   if (partsCount > 2)
1358     dividend = new integerPart[partsCount * 2];
1359   else
1360     dividend = scratch;
1361 
1362   divisor = dividend + partsCount;
1363 
1364   /* Copy the dividend and divisor as they will be modified in-place.  */
1365   for (i = 0; i < partsCount; i++) {
1366     dividend[i] = lhsSignificand[i];
1367     divisor[i] = rhsSignificand[i];
1368     lhsSignificand[i] = 0;
1369   }
1370 
1371   exponent -= rhs.exponent;
1372 
1373   unsigned int precision = semantics->precision;
1374 
1375   /* Normalize the divisor.  */
1376   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1377   if (bit) {
1378     exponent += bit;
1379     APInt::tcShiftLeft(divisor, partsCount, bit);
1380   }
1381 
1382   /* Normalize the dividend.  */
1383   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1384   if (bit) {
1385     exponent -= bit;
1386     APInt::tcShiftLeft(dividend, partsCount, bit);
1387   }
1388 
1389   /* Ensure the dividend >= divisor initially for the loop below.
1390      Incidentally, this means that the division loop below is
1391      guaranteed to set the integer bit to one.  */
1392   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1393     exponent--;
1394     APInt::tcShiftLeft(dividend, partsCount, 1);
1395     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1396   }
1397 
1398   /* Long division.  */
1399   for (bit = precision; bit; bit -= 1) {
1400     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1401       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1402       APInt::tcSetBit(lhsSignificand, bit - 1);
1403     }
1404 
1405     APInt::tcShiftLeft(dividend, partsCount, 1);
1406   }
1407 
1408   /* Figure out the lost fraction.  */
1409   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1410 
1411   if (cmp > 0)
1412     lost_fraction = lfMoreThanHalf;
1413   else if (cmp == 0)
1414     lost_fraction = lfExactlyHalf;
1415   else if (APInt::tcIsZero(dividend, partsCount))
1416     lost_fraction = lfExactlyZero;
1417   else
1418     lost_fraction = lfLessThanHalf;
1419 
1420   if (partsCount > 2)
1421     delete [] dividend;
1422 
1423   return lost_fraction;
1424 }
1425 
1426 unsigned int IEEEFloat::significandMSB() const {
1427   return APInt::tcMSB(significandParts(), partCount());
1428 }
1429 
1430 unsigned int IEEEFloat::significandLSB() const {
1431   return APInt::tcLSB(significandParts(), partCount());
1432 }
1433 
1434 /* Note that a zero result is NOT normalized to fcZero.  */
1435 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1436   /* Our exponent should not overflow.  */
1437   assert((ExponentType) (exponent + bits) >= exponent);
1438 
1439   exponent += bits;
1440 
1441   return shiftRight(significandParts(), partCount(), bits);
1442 }
1443 
1444 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1445 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1446   assert(bits < semantics->precision);
1447 
1448   if (bits) {
1449     unsigned int partsCount = partCount();
1450 
1451     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1452     exponent -= bits;
1453 
1454     assert(!APInt::tcIsZero(significandParts(), partsCount));
1455   }
1456 }
1457 
1458 IEEEFloat::cmpResult
1459 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1460   int compare;
1461 
1462   assert(semantics == rhs.semantics);
1463   assert(isFiniteNonZero());
1464   assert(rhs.isFiniteNonZero());
1465 
1466   compare = exponent - rhs.exponent;
1467 
1468   /* If exponents are equal, do an unsigned bignum comparison of the
1469      significands.  */
1470   if (compare == 0)
1471     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1472                                partCount());
1473 
1474   if (compare > 0)
1475     return cmpGreaterThan;
1476   else if (compare < 0)
1477     return cmpLessThan;
1478   else
1479     return cmpEqual;
1480 }
1481 
1482 /* Set the least significant BITS bits of a bignum, clear the
1483    rest.  */
1484 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1485                                       unsigned bits) {
1486   unsigned i = 0;
1487   while (bits > APInt::APINT_BITS_PER_WORD) {
1488     dst[i++] = ~(APInt::WordType)0;
1489     bits -= APInt::APINT_BITS_PER_WORD;
1490   }
1491 
1492   if (bits)
1493     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1494 
1495   while (i < parts)
1496     dst[i++] = 0;
1497 }
1498 
1499 /* Handle overflow.  Sign is preserved.  We either become infinity or
1500    the largest finite number.  */
1501 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1502   /* Infinity?  */
1503   if (rounding_mode == rmNearestTiesToEven ||
1504       rounding_mode == rmNearestTiesToAway ||
1505       (rounding_mode == rmTowardPositive && !sign) ||
1506       (rounding_mode == rmTowardNegative && sign)) {
1507     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1508       makeNaN(false, sign);
1509     else
1510       category = fcInfinity;
1511     return (opStatus) (opOverflow | opInexact);
1512   }
1513 
1514   /* Otherwise we become the largest finite number.  */
1515   category = fcNormal;
1516   exponent = semantics->maxExponent;
1517   tcSetLeastSignificantBits(significandParts(), partCount(),
1518                             semantics->precision);
1519   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1520       semantics->nanEncoding == fltNanEncoding::AllOnes)
1521     APInt::tcClearBit(significandParts(), 0);
1522 
1523   return opInexact;
1524 }
1525 
1526 /* Returns TRUE if, when truncating the current number, with BIT the
1527    new LSB, with the given lost fraction and rounding mode, the result
1528    would need to be rounded away from zero (i.e., by increasing the
1529    signficand).  This routine must work for fcZero of both signs, and
1530    fcNormal numbers.  */
1531 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1532                                   lostFraction lost_fraction,
1533                                   unsigned int bit) const {
1534   /* NaNs and infinities should not have lost fractions.  */
1535   assert(isFiniteNonZero() || category == fcZero);
1536 
1537   /* Current callers never pass this so we don't handle it.  */
1538   assert(lost_fraction != lfExactlyZero);
1539 
1540   switch (rounding_mode) {
1541   case rmNearestTiesToAway:
1542     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1543 
1544   case rmNearestTiesToEven:
1545     if (lost_fraction == lfMoreThanHalf)
1546       return true;
1547 
1548     /* Our zeroes don't have a significand to test.  */
1549     if (lost_fraction == lfExactlyHalf && category != fcZero)
1550       return APInt::tcExtractBit(significandParts(), bit);
1551 
1552     return false;
1553 
1554   case rmTowardZero:
1555     return false;
1556 
1557   case rmTowardPositive:
1558     return !sign;
1559 
1560   case rmTowardNegative:
1561     return sign;
1562 
1563   default:
1564     break;
1565   }
1566   llvm_unreachable("Invalid rounding mode found");
1567 }
1568 
1569 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1570                                          lostFraction lost_fraction) {
1571   unsigned int omsb;                /* One, not zero, based MSB.  */
1572   int exponentChange;
1573 
1574   if (!isFiniteNonZero())
1575     return opOK;
1576 
1577   /* Before rounding normalize the exponent of fcNormal numbers.  */
1578   omsb = significandMSB() + 1;
1579 
1580   if (omsb) {
1581     /* OMSB is numbered from 1.  We want to place it in the integer
1582        bit numbered PRECISION if possible, with a compensating change in
1583        the exponent.  */
1584     exponentChange = omsb - semantics->precision;
1585 
1586     /* If the resulting exponent is too high, overflow according to
1587        the rounding mode.  */
1588     if (exponent + exponentChange > semantics->maxExponent)
1589       return handleOverflow(rounding_mode);
1590 
1591     /* Subnormal numbers have exponent minExponent, and their MSB
1592        is forced based on that.  */
1593     if (exponent + exponentChange < semantics->minExponent)
1594       exponentChange = semantics->minExponent - exponent;
1595 
1596     /* Shifting left is easy as we don't lose precision.  */
1597     if (exponentChange < 0) {
1598       assert(lost_fraction == lfExactlyZero);
1599 
1600       shiftSignificandLeft(-exponentChange);
1601 
1602       return opOK;
1603     }
1604 
1605     if (exponentChange > 0) {
1606       lostFraction lf;
1607 
1608       /* Shift right and capture any new lost fraction.  */
1609       lf = shiftSignificandRight(exponentChange);
1610 
1611       lost_fraction = combineLostFractions(lf, lost_fraction);
1612 
1613       /* Keep OMSB up-to-date.  */
1614       if (omsb > (unsigned) exponentChange)
1615         omsb -= exponentChange;
1616       else
1617         omsb = 0;
1618     }
1619   }
1620 
1621   // The all-ones values is an overflow if NaN is all ones. If NaN is
1622   // represented by negative zero, then it is a valid finite value.
1623   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1624       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1625       exponent == semantics->maxExponent && isSignificandAllOnes())
1626     return handleOverflow(rounding_mode);
1627 
1628   /* Now round the number according to rounding_mode given the lost
1629      fraction.  */
1630 
1631   /* As specified in IEEE 754, since we do not trap we do not report
1632      underflow for exact results.  */
1633   if (lost_fraction == lfExactlyZero) {
1634     /* Canonicalize zeroes.  */
1635     if (omsb == 0) {
1636       category = fcZero;
1637       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1638         sign = false;
1639     }
1640 
1641     return opOK;
1642   }
1643 
1644   /* Increment the significand if we're rounding away from zero.  */
1645   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1646     if (omsb == 0)
1647       exponent = semantics->minExponent;
1648 
1649     incrementSignificand();
1650     omsb = significandMSB() + 1;
1651 
1652     /* Did the significand increment overflow?  */
1653     if (omsb == (unsigned) semantics->precision + 1) {
1654       /* Renormalize by incrementing the exponent and shifting our
1655          significand right one.  However if we already have the
1656          maximum exponent we overflow to infinity.  */
1657       if (exponent == semantics->maxExponent)
1658         // Invoke overflow handling with a rounding mode that will guarantee
1659         // that the result gets turned into the correct infinity representation.
1660         // This is needed instead of just setting the category to infinity to
1661         // account for 8-bit floating point types that have no inf, only NaN.
1662         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1663 
1664       shiftSignificandRight(1);
1665 
1666       return opInexact;
1667     }
1668 
1669     // The all-ones values is an overflow if NaN is all ones. If NaN is
1670     // represented by negative zero, then it is a valid finite value.
1671     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1672         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1673         exponent == semantics->maxExponent && isSignificandAllOnes())
1674       return handleOverflow(rounding_mode);
1675   }
1676 
1677   /* The normal case - we were and are not denormal, and any
1678      significand increment above didn't overflow.  */
1679   if (omsb == semantics->precision)
1680     return opInexact;
1681 
1682   /* We have a non-zero denormal.  */
1683   assert(omsb < semantics->precision);
1684 
1685   /* Canonicalize zeroes.  */
1686   if (omsb == 0) {
1687     category = fcZero;
1688     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1689       sign = false;
1690   }
1691 
1692   /* The fcZero case is a denormal that underflowed to zero.  */
1693   return (opStatus) (opUnderflow | opInexact);
1694 }
1695 
1696 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1697                                                      bool subtract) {
1698   switch (PackCategoriesIntoKey(category, rhs.category)) {
1699   default:
1700     llvm_unreachable(nullptr);
1701 
1702   case PackCategoriesIntoKey(fcZero, fcNaN):
1703   case PackCategoriesIntoKey(fcNormal, fcNaN):
1704   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1705     assign(rhs);
1706     [[fallthrough]];
1707   case PackCategoriesIntoKey(fcNaN, fcZero):
1708   case PackCategoriesIntoKey(fcNaN, fcNormal):
1709   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1710   case PackCategoriesIntoKey(fcNaN, fcNaN):
1711     if (isSignaling()) {
1712       makeQuiet();
1713       return opInvalidOp;
1714     }
1715     return rhs.isSignaling() ? opInvalidOp : opOK;
1716 
1717   case PackCategoriesIntoKey(fcNormal, fcZero):
1718   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1719   case PackCategoriesIntoKey(fcInfinity, fcZero):
1720     return opOK;
1721 
1722   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1723   case PackCategoriesIntoKey(fcZero, fcInfinity):
1724     category = fcInfinity;
1725     sign = rhs.sign ^ subtract;
1726     return opOK;
1727 
1728   case PackCategoriesIntoKey(fcZero, fcNormal):
1729     assign(rhs);
1730     sign = rhs.sign ^ subtract;
1731     return opOK;
1732 
1733   case PackCategoriesIntoKey(fcZero, fcZero):
1734     /* Sign depends on rounding mode; handled by caller.  */
1735     return opOK;
1736 
1737   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1738     /* Differently signed infinities can only be validly
1739        subtracted.  */
1740     if (((sign ^ rhs.sign)!=0) != subtract) {
1741       makeNaN();
1742       return opInvalidOp;
1743     }
1744 
1745     return opOK;
1746 
1747   case PackCategoriesIntoKey(fcNormal, fcNormal):
1748     return opDivByZero;
1749   }
1750 }
1751 
1752 /* Add or subtract two normal numbers.  */
1753 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1754                                                  bool subtract) {
1755   integerPart carry;
1756   lostFraction lost_fraction;
1757   int bits;
1758 
1759   /* Determine if the operation on the absolute values is effectively
1760      an addition or subtraction.  */
1761   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1762 
1763   /* Are we bigger exponent-wise than the RHS?  */
1764   bits = exponent - rhs.exponent;
1765 
1766   /* Subtraction is more subtle than one might naively expect.  */
1767   if (subtract) {
1768     IEEEFloat temp_rhs(rhs);
1769 
1770     if (bits == 0)
1771       lost_fraction = lfExactlyZero;
1772     else if (bits > 0) {
1773       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1774       shiftSignificandLeft(1);
1775     } else {
1776       lost_fraction = shiftSignificandRight(-bits - 1);
1777       temp_rhs.shiftSignificandLeft(1);
1778     }
1779 
1780     // Should we reverse the subtraction.
1781     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1782       carry = temp_rhs.subtractSignificand
1783         (*this, lost_fraction != lfExactlyZero);
1784       copySignificand(temp_rhs);
1785       sign = !sign;
1786     } else {
1787       carry = subtractSignificand
1788         (temp_rhs, lost_fraction != lfExactlyZero);
1789     }
1790 
1791     /* Invert the lost fraction - it was on the RHS and
1792        subtracted.  */
1793     if (lost_fraction == lfLessThanHalf)
1794       lost_fraction = lfMoreThanHalf;
1795     else if (lost_fraction == lfMoreThanHalf)
1796       lost_fraction = lfLessThanHalf;
1797 
1798     /* The code above is intended to ensure that no borrow is
1799        necessary.  */
1800     assert(!carry);
1801     (void)carry;
1802   } else {
1803     if (bits > 0) {
1804       IEEEFloat temp_rhs(rhs);
1805 
1806       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1807       carry = addSignificand(temp_rhs);
1808     } else {
1809       lost_fraction = shiftSignificandRight(-bits);
1810       carry = addSignificand(rhs);
1811     }
1812 
1813     /* We have a guard bit; generating a carry cannot happen.  */
1814     assert(!carry);
1815     (void)carry;
1816   }
1817 
1818   return lost_fraction;
1819 }
1820 
1821 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1822   switch (PackCategoriesIntoKey(category, rhs.category)) {
1823   default:
1824     llvm_unreachable(nullptr);
1825 
1826   case PackCategoriesIntoKey(fcZero, fcNaN):
1827   case PackCategoriesIntoKey(fcNormal, fcNaN):
1828   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1829     assign(rhs);
1830     sign = false;
1831     [[fallthrough]];
1832   case PackCategoriesIntoKey(fcNaN, fcZero):
1833   case PackCategoriesIntoKey(fcNaN, fcNormal):
1834   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1835   case PackCategoriesIntoKey(fcNaN, fcNaN):
1836     sign ^= rhs.sign; // restore the original sign
1837     if (isSignaling()) {
1838       makeQuiet();
1839       return opInvalidOp;
1840     }
1841     return rhs.isSignaling() ? opInvalidOp : opOK;
1842 
1843   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1844   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1845   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1846     category = fcInfinity;
1847     return opOK;
1848 
1849   case PackCategoriesIntoKey(fcZero, fcNormal):
1850   case PackCategoriesIntoKey(fcNormal, fcZero):
1851   case PackCategoriesIntoKey(fcZero, fcZero):
1852     category = fcZero;
1853     return opOK;
1854 
1855   case PackCategoriesIntoKey(fcZero, fcInfinity):
1856   case PackCategoriesIntoKey(fcInfinity, fcZero):
1857     makeNaN();
1858     return opInvalidOp;
1859 
1860   case PackCategoriesIntoKey(fcNormal, fcNormal):
1861     return opOK;
1862   }
1863 }
1864 
1865 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1866   switch (PackCategoriesIntoKey(category, rhs.category)) {
1867   default:
1868     llvm_unreachable(nullptr);
1869 
1870   case PackCategoriesIntoKey(fcZero, fcNaN):
1871   case PackCategoriesIntoKey(fcNormal, fcNaN):
1872   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1873     assign(rhs);
1874     sign = false;
1875     [[fallthrough]];
1876   case PackCategoriesIntoKey(fcNaN, fcZero):
1877   case PackCategoriesIntoKey(fcNaN, fcNormal):
1878   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1879   case PackCategoriesIntoKey(fcNaN, fcNaN):
1880     sign ^= rhs.sign; // restore the original sign
1881     if (isSignaling()) {
1882       makeQuiet();
1883       return opInvalidOp;
1884     }
1885     return rhs.isSignaling() ? opInvalidOp : opOK;
1886 
1887   case PackCategoriesIntoKey(fcInfinity, fcZero):
1888   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1889   case PackCategoriesIntoKey(fcZero, fcInfinity):
1890   case PackCategoriesIntoKey(fcZero, fcNormal):
1891     return opOK;
1892 
1893   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1894     category = fcZero;
1895     return opOK;
1896 
1897   case PackCategoriesIntoKey(fcNormal, fcZero):
1898     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1899       makeNaN(false, sign);
1900     else
1901       category = fcInfinity;
1902     return opDivByZero;
1903 
1904   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1905   case PackCategoriesIntoKey(fcZero, fcZero):
1906     makeNaN();
1907     return opInvalidOp;
1908 
1909   case PackCategoriesIntoKey(fcNormal, fcNormal):
1910     return opOK;
1911   }
1912 }
1913 
1914 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1915   switch (PackCategoriesIntoKey(category, rhs.category)) {
1916   default:
1917     llvm_unreachable(nullptr);
1918 
1919   case PackCategoriesIntoKey(fcZero, fcNaN):
1920   case PackCategoriesIntoKey(fcNormal, fcNaN):
1921   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1922     assign(rhs);
1923     [[fallthrough]];
1924   case PackCategoriesIntoKey(fcNaN, fcZero):
1925   case PackCategoriesIntoKey(fcNaN, fcNormal):
1926   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1927   case PackCategoriesIntoKey(fcNaN, fcNaN):
1928     if (isSignaling()) {
1929       makeQuiet();
1930       return opInvalidOp;
1931     }
1932     return rhs.isSignaling() ? opInvalidOp : opOK;
1933 
1934   case PackCategoriesIntoKey(fcZero, fcInfinity):
1935   case PackCategoriesIntoKey(fcZero, fcNormal):
1936   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1937     return opOK;
1938 
1939   case PackCategoriesIntoKey(fcNormal, fcZero):
1940   case PackCategoriesIntoKey(fcInfinity, fcZero):
1941   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1942   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1943   case PackCategoriesIntoKey(fcZero, fcZero):
1944     makeNaN();
1945     return opInvalidOp;
1946 
1947   case PackCategoriesIntoKey(fcNormal, fcNormal):
1948     return opOK;
1949   }
1950 }
1951 
1952 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1953   switch (PackCategoriesIntoKey(category, rhs.category)) {
1954   default:
1955     llvm_unreachable(nullptr);
1956 
1957   case PackCategoriesIntoKey(fcZero, fcNaN):
1958   case PackCategoriesIntoKey(fcNormal, fcNaN):
1959   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1960     assign(rhs);
1961     [[fallthrough]];
1962   case PackCategoriesIntoKey(fcNaN, fcZero):
1963   case PackCategoriesIntoKey(fcNaN, fcNormal):
1964   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1965   case PackCategoriesIntoKey(fcNaN, fcNaN):
1966     if (isSignaling()) {
1967       makeQuiet();
1968       return opInvalidOp;
1969     }
1970     return rhs.isSignaling() ? opInvalidOp : opOK;
1971 
1972   case PackCategoriesIntoKey(fcZero, fcInfinity):
1973   case PackCategoriesIntoKey(fcZero, fcNormal):
1974   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1975     return opOK;
1976 
1977   case PackCategoriesIntoKey(fcNormal, fcZero):
1978   case PackCategoriesIntoKey(fcInfinity, fcZero):
1979   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1980   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1981   case PackCategoriesIntoKey(fcZero, fcZero):
1982     makeNaN();
1983     return opInvalidOp;
1984 
1985   case PackCategoriesIntoKey(fcNormal, fcNormal):
1986     return opDivByZero; // fake status, indicating this is not a special case
1987   }
1988 }
1989 
1990 /* Change sign.  */
1991 void IEEEFloat::changeSign() {
1992   // With NaN-as-negative-zero, neither NaN or negative zero can change
1993   // their signs.
1994   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1995       (isZero() || isNaN()))
1996     return;
1997   /* Look mummy, this one's easy.  */
1998   sign = !sign;
1999 }
2000 
2001 /* Normalized addition or subtraction.  */
2002 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2003                                              roundingMode rounding_mode,
2004                                              bool subtract) {
2005   opStatus fs;
2006 
2007   fs = addOrSubtractSpecials(rhs, subtract);
2008 
2009   /* This return code means it was not a simple case.  */
2010   if (fs == opDivByZero) {
2011     lostFraction lost_fraction;
2012 
2013     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2014     fs = normalize(rounding_mode, lost_fraction);
2015 
2016     /* Can only be zero if we lost no fraction.  */
2017     assert(category != fcZero || lost_fraction == lfExactlyZero);
2018   }
2019 
2020   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2021      positive zero unless rounding to minus infinity, except that
2022      adding two like-signed zeroes gives that zero.  */
2023   if (category == fcZero) {
2024     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2025       sign = (rounding_mode == rmTowardNegative);
2026     // NaN-in-negative-zero means zeros need to be normalized to +0.
2027     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2028       sign = false;
2029   }
2030 
2031   return fs;
2032 }
2033 
2034 /* Normalized addition.  */
2035 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2036                                    roundingMode rounding_mode) {
2037   return addOrSubtract(rhs, rounding_mode, false);
2038 }
2039 
2040 /* Normalized subtraction.  */
2041 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2042                                         roundingMode rounding_mode) {
2043   return addOrSubtract(rhs, rounding_mode, true);
2044 }
2045 
2046 /* Normalized multiply.  */
2047 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2048                                         roundingMode rounding_mode) {
2049   opStatus fs;
2050 
2051   sign ^= rhs.sign;
2052   fs = multiplySpecials(rhs);
2053 
2054   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2055     sign = false;
2056   if (isFiniteNonZero()) {
2057     lostFraction lost_fraction = multiplySignificand(rhs);
2058     fs = normalize(rounding_mode, lost_fraction);
2059     if (lost_fraction != lfExactlyZero)
2060       fs = (opStatus) (fs | opInexact);
2061   }
2062 
2063   return fs;
2064 }
2065 
2066 /* Normalized divide.  */
2067 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2068                                       roundingMode rounding_mode) {
2069   opStatus fs;
2070 
2071   sign ^= rhs.sign;
2072   fs = divideSpecials(rhs);
2073 
2074   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2075     sign = false;
2076   if (isFiniteNonZero()) {
2077     lostFraction lost_fraction = divideSignificand(rhs);
2078     fs = normalize(rounding_mode, lost_fraction);
2079     if (lost_fraction != lfExactlyZero)
2080       fs = (opStatus) (fs | opInexact);
2081   }
2082 
2083   return fs;
2084 }
2085 
2086 /* Normalized remainder.  */
2087 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2088   opStatus fs;
2089   unsigned int origSign = sign;
2090 
2091   // First handle the special cases.
2092   fs = remainderSpecials(rhs);
2093   if (fs != opDivByZero)
2094     return fs;
2095 
2096   fs = opOK;
2097 
2098   // Make sure the current value is less than twice the denom. If the addition
2099   // did not succeed (an overflow has happened), which means that the finite
2100   // value we currently posses must be less than twice the denom (as we are
2101   // using the same semantics).
2102   IEEEFloat P2 = rhs;
2103   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2104     fs = mod(P2);
2105     assert(fs == opOK);
2106   }
2107 
2108   // Lets work with absolute numbers.
2109   IEEEFloat P = rhs;
2110   P.sign = false;
2111   sign = false;
2112 
2113   //
2114   // To calculate the remainder we use the following scheme.
2115   //
2116   // The remainder is defained as follows:
2117   //
2118   // remainder = numer - rquot * denom = x - r * p
2119   //
2120   // Where r is the result of: x/p, rounded toward the nearest integral value
2121   // (with halfway cases rounded toward the even number).
2122   //
2123   // Currently, (after x mod 2p):
2124   // r is the number of 2p's present inside x, which is inherently, an even
2125   // number of p's.
2126   //
2127   // We may split the remaining calculation into 4 options:
2128   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2129   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2130   //   are done as well.
2131   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2132   //   to subtract 1p at least once.
2133   // - if x >= p then we must subtract p at least once, as x must be a
2134   //   remainder.
2135   //
2136   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2137   //
2138   // We can now split the remaining calculation to the following 3 options:
2139   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2140   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2141   //   must round up to the next even number. so we must subtract p once more.
2142   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2143   //   integral, and subtract p once more.
2144   //
2145 
2146   // Extend the semantics to prevent an overflow/underflow or inexact result.
2147   bool losesInfo;
2148   fltSemantics extendedSemantics = *semantics;
2149   extendedSemantics.maxExponent++;
2150   extendedSemantics.minExponent--;
2151   extendedSemantics.precision += 2;
2152 
2153   IEEEFloat VEx = *this;
2154   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155   assert(fs == opOK && !losesInfo);
2156   IEEEFloat PEx = P;
2157   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2158   assert(fs == opOK && !losesInfo);
2159 
2160   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2161   // any fraction.
2162   fs = VEx.add(VEx, rmNearestTiesToEven);
2163   assert(fs == opOK);
2164 
2165   if (VEx.compare(PEx) == cmpGreaterThan) {
2166     fs = subtract(P, rmNearestTiesToEven);
2167     assert(fs == opOK);
2168 
2169     // Make VEx = this.add(this), but because we have different semantics, we do
2170     // not want to `convert` again, so we just subtract PEx twice (which equals
2171     // to the desired value).
2172     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2173     assert(fs == opOK);
2174     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2175     assert(fs == opOK);
2176 
2177     cmpResult result = VEx.compare(PEx);
2178     if (result == cmpGreaterThan || result == cmpEqual) {
2179       fs = subtract(P, rmNearestTiesToEven);
2180       assert(fs == opOK);
2181     }
2182   }
2183 
2184   if (isZero()) {
2185     sign = origSign;    // IEEE754 requires this
2186     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2187       // But some 8-bit floats only have positive 0.
2188       sign = false;
2189   }
2190 
2191   else
2192     sign ^= origSign;
2193   return fs;
2194 }
2195 
2196 /* Normalized llvm frem (C fmod). */
2197 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2198   opStatus fs;
2199   fs = modSpecials(rhs);
2200   unsigned int origSign = sign;
2201 
2202   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2203          compareAbsoluteValue(rhs) != cmpLessThan) {
2204     int Exp = ilogb(*this) - ilogb(rhs);
2205     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2206     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2207     // check for it.
2208     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2209       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2210     V.sign = sign;
2211 
2212     fs = subtract(V, rmNearestTiesToEven);
2213     assert(fs==opOK);
2214   }
2215   if (isZero()) {
2216     sign = origSign; // fmod requires this
2217     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2218       sign = false;
2219   }
2220   return fs;
2221 }
2222 
2223 /* Normalized fused-multiply-add.  */
2224 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2225                                                 const IEEEFloat &addend,
2226                                                 roundingMode rounding_mode) {
2227   opStatus fs;
2228 
2229   /* Post-multiplication sign, before addition.  */
2230   sign ^= multiplicand.sign;
2231 
2232   /* If and only if all arguments are normal do we need to do an
2233      extended-precision calculation.  */
2234   if (isFiniteNonZero() &&
2235       multiplicand.isFiniteNonZero() &&
2236       addend.isFinite()) {
2237     lostFraction lost_fraction;
2238 
2239     lost_fraction = multiplySignificand(multiplicand, addend);
2240     fs = normalize(rounding_mode, lost_fraction);
2241     if (lost_fraction != lfExactlyZero)
2242       fs = (opStatus) (fs | opInexact);
2243 
2244     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2245        positive zero unless rounding to minus infinity, except that
2246        adding two like-signed zeroes gives that zero.  */
2247     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2248       sign = (rounding_mode == rmTowardNegative);
2249       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2250         sign = false;
2251     }
2252   } else {
2253     fs = multiplySpecials(multiplicand);
2254 
2255     /* FS can only be opOK or opInvalidOp.  There is no more work
2256        to do in the latter case.  The IEEE-754R standard says it is
2257        implementation-defined in this case whether, if ADDEND is a
2258        quiet NaN, we raise invalid op; this implementation does so.
2259 
2260        If we need to do the addition we can do so with normal
2261        precision.  */
2262     if (fs == opOK)
2263       fs = addOrSubtract(addend, rounding_mode, false);
2264   }
2265 
2266   return fs;
2267 }
2268 
2269 /* Rounding-mode correct round to integral value.  */
2270 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2271   opStatus fs;
2272 
2273   if (isInfinity())
2274     // [IEEE Std 754-2008 6.1]:
2275     // The behavior of infinity in floating-point arithmetic is derived from the
2276     // limiting cases of real arithmetic with operands of arbitrarily
2277     // large magnitude, when such a limit exists.
2278     // ...
2279     // Operations on infinite operands are usually exact and therefore signal no
2280     // exceptions ...
2281     return opOK;
2282 
2283   if (isNaN()) {
2284     if (isSignaling()) {
2285       // [IEEE Std 754-2008 6.2]:
2286       // Under default exception handling, any operation signaling an invalid
2287       // operation exception and for which a floating-point result is to be
2288       // delivered shall deliver a quiet NaN.
2289       makeQuiet();
2290       // [IEEE Std 754-2008 6.2]:
2291       // Signaling NaNs shall be reserved operands that, under default exception
2292       // handling, signal the invalid operation exception(see 7.2) for every
2293       // general-computational and signaling-computational operation except for
2294       // the conversions described in 5.12.
2295       return opInvalidOp;
2296     } else {
2297       // [IEEE Std 754-2008 6.2]:
2298       // For an operation with quiet NaN inputs, other than maximum and minimum
2299       // operations, if a floating-point result is to be delivered the result
2300       // shall be a quiet NaN which should be one of the input NaNs.
2301       // ...
2302       // Every general-computational and quiet-computational operation involving
2303       // one or more input NaNs, none of them signaling, shall signal no
2304       // exception, except fusedMultiplyAdd might signal the invalid operation
2305       // exception(see 7.2).
2306       return opOK;
2307     }
2308   }
2309 
2310   if (isZero()) {
2311     // [IEEE Std 754-2008 6.3]:
2312     // ... the sign of the result of conversions, the quantize operation, the
2313     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2314     // the sign of the first or only operand.
2315     return opOK;
2316   }
2317 
2318   // If the exponent is large enough, we know that this value is already
2319   // integral, and the arithmetic below would potentially cause it to saturate
2320   // to +/-Inf.  Bail out early instead.
2321   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2322     return opOK;
2323 
2324   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2325   // precision of our format, and then subtract it back off again.  The choice
2326   // of rounding modes for the addition/subtraction determines the rounding mode
2327   // for our integral rounding as well.
2328   // NOTE: When the input value is negative, we do subtraction followed by
2329   // addition instead.
2330   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2331   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2332   IEEEFloat MagicConstant(*semantics);
2333   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2334                                       rmNearestTiesToEven);
2335   assert(fs == opOK);
2336   MagicConstant.sign = sign;
2337 
2338   // Preserve the input sign so that we can handle the case of zero result
2339   // correctly.
2340   bool inputSign = isNegative();
2341 
2342   fs = add(MagicConstant, rounding_mode);
2343 
2344   // Current value and 'MagicConstant' are both integers, so the result of the
2345   // subtraction is always exact according to Sterbenz' lemma.
2346   subtract(MagicConstant, rounding_mode);
2347 
2348   // Restore the input sign.
2349   if (inputSign != isNegative())
2350     changeSign();
2351 
2352   return fs;
2353 }
2354 
2355 
2356 /* Comparison requires normalized numbers.  */
2357 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2358   cmpResult result;
2359 
2360   assert(semantics == rhs.semantics);
2361 
2362   switch (PackCategoriesIntoKey(category, rhs.category)) {
2363   default:
2364     llvm_unreachable(nullptr);
2365 
2366   case PackCategoriesIntoKey(fcNaN, fcZero):
2367   case PackCategoriesIntoKey(fcNaN, fcNormal):
2368   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2369   case PackCategoriesIntoKey(fcNaN, fcNaN):
2370   case PackCategoriesIntoKey(fcZero, fcNaN):
2371   case PackCategoriesIntoKey(fcNormal, fcNaN):
2372   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2373     return cmpUnordered;
2374 
2375   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2376   case PackCategoriesIntoKey(fcInfinity, fcZero):
2377   case PackCategoriesIntoKey(fcNormal, fcZero):
2378     if (sign)
2379       return cmpLessThan;
2380     else
2381       return cmpGreaterThan;
2382 
2383   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2384   case PackCategoriesIntoKey(fcZero, fcInfinity):
2385   case PackCategoriesIntoKey(fcZero, fcNormal):
2386     if (rhs.sign)
2387       return cmpGreaterThan;
2388     else
2389       return cmpLessThan;
2390 
2391   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2392     if (sign == rhs.sign)
2393       return cmpEqual;
2394     else if (sign)
2395       return cmpLessThan;
2396     else
2397       return cmpGreaterThan;
2398 
2399   case PackCategoriesIntoKey(fcZero, fcZero):
2400     return cmpEqual;
2401 
2402   case PackCategoriesIntoKey(fcNormal, fcNormal):
2403     break;
2404   }
2405 
2406   /* Two normal numbers.  Do they have the same sign?  */
2407   if (sign != rhs.sign) {
2408     if (sign)
2409       result = cmpLessThan;
2410     else
2411       result = cmpGreaterThan;
2412   } else {
2413     /* Compare absolute values; invert result if negative.  */
2414     result = compareAbsoluteValue(rhs);
2415 
2416     if (sign) {
2417       if (result == cmpLessThan)
2418         result = cmpGreaterThan;
2419       else if (result == cmpGreaterThan)
2420         result = cmpLessThan;
2421     }
2422   }
2423 
2424   return result;
2425 }
2426 
2427 /// IEEEFloat::convert - convert a value of one floating point type to another.
2428 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2429 /// records whether the transformation lost information, i.e. whether
2430 /// converting the result back to the original type will produce the
2431 /// original value (this is almost the same as return value==fsOK, but there
2432 /// are edge cases where this is not so).
2433 
2434 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2435                                        roundingMode rounding_mode,
2436                                        bool *losesInfo) {
2437   lostFraction lostFraction;
2438   unsigned int newPartCount, oldPartCount;
2439   opStatus fs;
2440   int shift;
2441   const fltSemantics &fromSemantics = *semantics;
2442   bool is_signaling = isSignaling();
2443 
2444   lostFraction = lfExactlyZero;
2445   newPartCount = partCountForBits(toSemantics.precision + 1);
2446   oldPartCount = partCount();
2447   shift = toSemantics.precision - fromSemantics.precision;
2448 
2449   bool X86SpecialNan = false;
2450   if (&fromSemantics == &semX87DoubleExtended &&
2451       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2452       (!(*significandParts() & 0x8000000000000000ULL) ||
2453        !(*significandParts() & 0x4000000000000000ULL))) {
2454     // x86 has some unusual NaNs which cannot be represented in any other
2455     // format; note them here.
2456     X86SpecialNan = true;
2457   }
2458 
2459   // If this is a truncation of a denormal number, and the target semantics
2460   // has larger exponent range than the source semantics (this can happen
2461   // when truncating from PowerPC double-double to double format), the
2462   // right shift could lose result mantissa bits.  Adjust exponent instead
2463   // of performing excessive shift.
2464   // Also do a similar trick in case shifting denormal would produce zero
2465   // significand as this case isn't handled correctly by normalize.
2466   if (shift < 0 && isFiniteNonZero()) {
2467     int omsb = significandMSB() + 1;
2468     int exponentChange = omsb - fromSemantics.precision;
2469     if (exponent + exponentChange < toSemantics.minExponent)
2470       exponentChange = toSemantics.minExponent - exponent;
2471     if (exponentChange < shift)
2472       exponentChange = shift;
2473     if (exponentChange < 0) {
2474       shift -= exponentChange;
2475       exponent += exponentChange;
2476     } else if (omsb <= -shift) {
2477       exponentChange = omsb + shift - 1; // leave at least one bit set
2478       shift -= exponentChange;
2479       exponent += exponentChange;
2480     }
2481   }
2482 
2483   // If this is a truncation, perform the shift before we narrow the storage.
2484   if (shift < 0 && (isFiniteNonZero() ||
2485                     (category == fcNaN && semantics->nonFiniteBehavior !=
2486                                               fltNonfiniteBehavior::NanOnly)))
2487     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2488 
2489   // Fix the storage so it can hold to new value.
2490   if (newPartCount > oldPartCount) {
2491     // The new type requires more storage; make it available.
2492     integerPart *newParts;
2493     newParts = new integerPart[newPartCount];
2494     APInt::tcSet(newParts, 0, newPartCount);
2495     if (isFiniteNonZero() || category==fcNaN)
2496       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2497     freeSignificand();
2498     significand.parts = newParts;
2499   } else if (newPartCount == 1 && oldPartCount != 1) {
2500     // Switch to built-in storage for a single part.
2501     integerPart newPart = 0;
2502     if (isFiniteNonZero() || category==fcNaN)
2503       newPart = significandParts()[0];
2504     freeSignificand();
2505     significand.part = newPart;
2506   }
2507 
2508   // Now that we have the right storage, switch the semantics.
2509   semantics = &toSemantics;
2510 
2511   // If this is an extension, perform the shift now that the storage is
2512   // available.
2513   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2514     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2515 
2516   if (isFiniteNonZero()) {
2517     fs = normalize(rounding_mode, lostFraction);
2518     *losesInfo = (fs != opOK);
2519   } else if (category == fcNaN) {
2520     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2521       *losesInfo =
2522           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2523       makeNaN(false, sign);
2524       return is_signaling ? opInvalidOp : opOK;
2525     }
2526 
2527     // If NaN is negative zero, we need to create a new NaN to avoid converting
2528     // NaN to -Inf.
2529     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2530         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2531       makeNaN(false, false);
2532 
2533     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2534 
2535     // For x87 extended precision, we want to make a NaN, not a special NaN if
2536     // the input wasn't special either.
2537     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2538       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2539 
2540     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2541     // This also guarantees that a sNaN does not become Inf on a truncation
2542     // that loses all payload bits.
2543     if (is_signaling) {
2544       makeQuiet();
2545       fs = opInvalidOp;
2546     } else {
2547       fs = opOK;
2548     }
2549   } else if (category == fcInfinity &&
2550              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2551     makeNaN(false, sign);
2552     *losesInfo = true;
2553     fs = opInexact;
2554   } else if (category == fcZero &&
2555              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2556     // Negative zero loses info, but positive zero doesn't.
2557     *losesInfo =
2558         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2559     fs = *losesInfo ? opInexact : opOK;
2560     // NaN is negative zero means -0 -> +0, which can lose information
2561     sign = false;
2562   } else {
2563     *losesInfo = false;
2564     fs = opOK;
2565   }
2566 
2567   return fs;
2568 }
2569 
2570 /* Convert a floating point number to an integer according to the
2571    rounding mode.  If the rounded integer value is out of range this
2572    returns an invalid operation exception and the contents of the
2573    destination parts are unspecified.  If the rounded value is in
2574    range but the floating point number is not the exact integer, the C
2575    standard doesn't require an inexact exception to be raised.  IEEE
2576    854 does require it so we do that.
2577 
2578    Note that for conversions to integer type the C standard requires
2579    round-to-zero to always be used.  */
2580 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2581     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2582     roundingMode rounding_mode, bool *isExact) const {
2583   lostFraction lost_fraction;
2584   const integerPart *src;
2585   unsigned int dstPartsCount, truncatedBits;
2586 
2587   *isExact = false;
2588 
2589   /* Handle the three special cases first.  */
2590   if (category == fcInfinity || category == fcNaN)
2591     return opInvalidOp;
2592 
2593   dstPartsCount = partCountForBits(width);
2594   assert(dstPartsCount <= parts.size() && "Integer too big");
2595 
2596   if (category == fcZero) {
2597     APInt::tcSet(parts.data(), 0, dstPartsCount);
2598     // Negative zero can't be represented as an int.
2599     *isExact = !sign;
2600     return opOK;
2601   }
2602 
2603   src = significandParts();
2604 
2605   /* Step 1: place our absolute value, with any fraction truncated, in
2606      the destination.  */
2607   if (exponent < 0) {
2608     /* Our absolute value is less than one; truncate everything.  */
2609     APInt::tcSet(parts.data(), 0, dstPartsCount);
2610     /* For exponent -1 the integer bit represents .5, look at that.
2611        For smaller exponents leftmost truncated bit is 0. */
2612     truncatedBits = semantics->precision -1U - exponent;
2613   } else {
2614     /* We want the most significant (exponent + 1) bits; the rest are
2615        truncated.  */
2616     unsigned int bits = exponent + 1U;
2617 
2618     /* Hopelessly large in magnitude?  */
2619     if (bits > width)
2620       return opInvalidOp;
2621 
2622     if (bits < semantics->precision) {
2623       /* We truncate (semantics->precision - bits) bits.  */
2624       truncatedBits = semantics->precision - bits;
2625       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2626     } else {
2627       /* We want at least as many bits as are available.  */
2628       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2629                        0);
2630       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2631                          bits - semantics->precision);
2632       truncatedBits = 0;
2633     }
2634   }
2635 
2636   /* Step 2: work out any lost fraction, and increment the absolute
2637      value if we would round away from zero.  */
2638   if (truncatedBits) {
2639     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2640                                                   truncatedBits);
2641     if (lost_fraction != lfExactlyZero &&
2642         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2643       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2644         return opInvalidOp;     /* Overflow.  */
2645     }
2646   } else {
2647     lost_fraction = lfExactlyZero;
2648   }
2649 
2650   /* Step 3: check if we fit in the destination.  */
2651   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2652 
2653   if (sign) {
2654     if (!isSigned) {
2655       /* Negative numbers cannot be represented as unsigned.  */
2656       if (omsb != 0)
2657         return opInvalidOp;
2658     } else {
2659       /* It takes omsb bits to represent the unsigned integer value.
2660          We lose a bit for the sign, but care is needed as the
2661          maximally negative integer is a special case.  */
2662       if (omsb == width &&
2663           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2664         return opInvalidOp;
2665 
2666       /* This case can happen because of rounding.  */
2667       if (omsb > width)
2668         return opInvalidOp;
2669     }
2670 
2671     APInt::tcNegate (parts.data(), dstPartsCount);
2672   } else {
2673     if (omsb >= width + !isSigned)
2674       return opInvalidOp;
2675   }
2676 
2677   if (lost_fraction == lfExactlyZero) {
2678     *isExact = true;
2679     return opOK;
2680   } else
2681     return opInexact;
2682 }
2683 
2684 /* Same as convertToSignExtendedInteger, except we provide
2685    deterministic values in case of an invalid operation exception,
2686    namely zero for NaNs and the minimal or maximal value respectively
2687    for underflow or overflow.
2688    The *isExact output tells whether the result is exact, in the sense
2689    that converting it back to the original floating point type produces
2690    the original value.  This is almost equivalent to result==opOK,
2691    except for negative zeroes.
2692 */
2693 IEEEFloat::opStatus
2694 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2695                             unsigned int width, bool isSigned,
2696                             roundingMode rounding_mode, bool *isExact) const {
2697   opStatus fs;
2698 
2699   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2700                                     isExact);
2701 
2702   if (fs == opInvalidOp) {
2703     unsigned int bits, dstPartsCount;
2704 
2705     dstPartsCount = partCountForBits(width);
2706     assert(dstPartsCount <= parts.size() && "Integer too big");
2707 
2708     if (category == fcNaN)
2709       bits = 0;
2710     else if (sign)
2711       bits = isSigned;
2712     else
2713       bits = width - isSigned;
2714 
2715     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2716     if (sign && isSigned)
2717       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2718   }
2719 
2720   return fs;
2721 }
2722 
2723 /* Convert an unsigned integer SRC to a floating point number,
2724    rounding according to ROUNDING_MODE.  The sign of the floating
2725    point number is not modified.  */
2726 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2727     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2728   unsigned int omsb, precision, dstCount;
2729   integerPart *dst;
2730   lostFraction lost_fraction;
2731 
2732   category = fcNormal;
2733   omsb = APInt::tcMSB(src, srcCount) + 1;
2734   dst = significandParts();
2735   dstCount = partCount();
2736   precision = semantics->precision;
2737 
2738   /* We want the most significant PRECISION bits of SRC.  There may not
2739      be that many; extract what we can.  */
2740   if (precision <= omsb) {
2741     exponent = omsb - 1;
2742     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2743                                                   omsb - precision);
2744     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2745   } else {
2746     exponent = precision - 1;
2747     lost_fraction = lfExactlyZero;
2748     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2749   }
2750 
2751   return normalize(rounding_mode, lost_fraction);
2752 }
2753 
2754 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2755                                                 roundingMode rounding_mode) {
2756   unsigned int partCount = Val.getNumWords();
2757   APInt api = Val;
2758 
2759   sign = false;
2760   if (isSigned && api.isNegative()) {
2761     sign = true;
2762     api = -api;
2763   }
2764 
2765   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2766 }
2767 
2768 /* Convert a two's complement integer SRC to a floating point number,
2769    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2770    integer is signed, in which case it must be sign-extended.  */
2771 IEEEFloat::opStatus
2772 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2773                                           unsigned int srcCount, bool isSigned,
2774                                           roundingMode rounding_mode) {
2775   opStatus status;
2776 
2777   if (isSigned &&
2778       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2779     integerPart *copy;
2780 
2781     /* If we're signed and negative negate a copy.  */
2782     sign = true;
2783     copy = new integerPart[srcCount];
2784     APInt::tcAssign(copy, src, srcCount);
2785     APInt::tcNegate(copy, srcCount);
2786     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2787     delete [] copy;
2788   } else {
2789     sign = false;
2790     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2791   }
2792 
2793   return status;
2794 }
2795 
2796 /* FIXME: should this just take a const APInt reference?  */
2797 IEEEFloat::opStatus
2798 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2799                                           unsigned int width, bool isSigned,
2800                                           roundingMode rounding_mode) {
2801   unsigned int partCount = partCountForBits(width);
2802   APInt api = APInt(width, ArrayRef(parts, partCount));
2803 
2804   sign = false;
2805   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2806     sign = true;
2807     api = -api;
2808   }
2809 
2810   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2811 }
2812 
2813 Expected<IEEEFloat::opStatus>
2814 IEEEFloat::convertFromHexadecimalString(StringRef s,
2815                                         roundingMode rounding_mode) {
2816   lostFraction lost_fraction = lfExactlyZero;
2817 
2818   category = fcNormal;
2819   zeroSignificand();
2820   exponent = 0;
2821 
2822   integerPart *significand = significandParts();
2823   unsigned partsCount = partCount();
2824   unsigned bitPos = partsCount * integerPartWidth;
2825   bool computedTrailingFraction = false;
2826 
2827   // Skip leading zeroes and any (hexa)decimal point.
2828   StringRef::iterator begin = s.begin();
2829   StringRef::iterator end = s.end();
2830   StringRef::iterator dot;
2831   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2832   if (!PtrOrErr)
2833     return PtrOrErr.takeError();
2834   StringRef::iterator p = *PtrOrErr;
2835   StringRef::iterator firstSignificantDigit = p;
2836 
2837   while (p != end) {
2838     integerPart hex_value;
2839 
2840     if (*p == '.') {
2841       if (dot != end)
2842         return createError("String contains multiple dots");
2843       dot = p++;
2844       continue;
2845     }
2846 
2847     hex_value = hexDigitValue(*p);
2848     if (hex_value == UINT_MAX)
2849       break;
2850 
2851     p++;
2852 
2853     // Store the number while we have space.
2854     if (bitPos) {
2855       bitPos -= 4;
2856       hex_value <<= bitPos % integerPartWidth;
2857       significand[bitPos / integerPartWidth] |= hex_value;
2858     } else if (!computedTrailingFraction) {
2859       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2860       if (!FractOrErr)
2861         return FractOrErr.takeError();
2862       lost_fraction = *FractOrErr;
2863       computedTrailingFraction = true;
2864     }
2865   }
2866 
2867   /* Hex floats require an exponent but not a hexadecimal point.  */
2868   if (p == end)
2869     return createError("Hex strings require an exponent");
2870   if (*p != 'p' && *p != 'P')
2871     return createError("Invalid character in significand");
2872   if (p == begin)
2873     return createError("Significand has no digits");
2874   if (dot != end && p - begin == 1)
2875     return createError("Significand has no digits");
2876 
2877   /* Ignore the exponent if we are zero.  */
2878   if (p != firstSignificantDigit) {
2879     int expAdjustment;
2880 
2881     /* Implicit hexadecimal point?  */
2882     if (dot == end)
2883       dot = p;
2884 
2885     /* Calculate the exponent adjustment implicit in the number of
2886        significant digits.  */
2887     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2888     if (expAdjustment < 0)
2889       expAdjustment++;
2890     expAdjustment = expAdjustment * 4 - 1;
2891 
2892     /* Adjust for writing the significand starting at the most
2893        significant nibble.  */
2894     expAdjustment += semantics->precision;
2895     expAdjustment -= partsCount * integerPartWidth;
2896 
2897     /* Adjust for the given exponent.  */
2898     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2899     if (!ExpOrErr)
2900       return ExpOrErr.takeError();
2901     exponent = *ExpOrErr;
2902   }
2903 
2904   return normalize(rounding_mode, lost_fraction);
2905 }
2906 
2907 IEEEFloat::opStatus
2908 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2909                                         unsigned sigPartCount, int exp,
2910                                         roundingMode rounding_mode) {
2911   unsigned int parts, pow5PartCount;
2912   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2913   integerPart pow5Parts[maxPowerOfFiveParts];
2914   bool isNearest;
2915 
2916   isNearest = (rounding_mode == rmNearestTiesToEven ||
2917                rounding_mode == rmNearestTiesToAway);
2918 
2919   parts = partCountForBits(semantics->precision + 11);
2920 
2921   /* Calculate pow(5, abs(exp)).  */
2922   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2923 
2924   for (;; parts *= 2) {
2925     opStatus sigStatus, powStatus;
2926     unsigned int excessPrecision, truncatedBits;
2927 
2928     calcSemantics.precision = parts * integerPartWidth - 1;
2929     excessPrecision = calcSemantics.precision - semantics->precision;
2930     truncatedBits = excessPrecision;
2931 
2932     IEEEFloat decSig(calcSemantics, uninitialized);
2933     decSig.makeZero(sign);
2934     IEEEFloat pow5(calcSemantics);
2935 
2936     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2937                                                 rmNearestTiesToEven);
2938     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2939                                               rmNearestTiesToEven);
2940     /* Add exp, as 10^n = 5^n * 2^n.  */
2941     decSig.exponent += exp;
2942 
2943     lostFraction calcLostFraction;
2944     integerPart HUerr, HUdistance;
2945     unsigned int powHUerr;
2946 
2947     if (exp >= 0) {
2948       /* multiplySignificand leaves the precision-th bit set to 1.  */
2949       calcLostFraction = decSig.multiplySignificand(pow5);
2950       powHUerr = powStatus != opOK;
2951     } else {
2952       calcLostFraction = decSig.divideSignificand(pow5);
2953       /* Denormal numbers have less precision.  */
2954       if (decSig.exponent < semantics->minExponent) {
2955         excessPrecision += (semantics->minExponent - decSig.exponent);
2956         truncatedBits = excessPrecision;
2957         if (excessPrecision > calcSemantics.precision)
2958           excessPrecision = calcSemantics.precision;
2959       }
2960       /* Extra half-ulp lost in reciprocal of exponent.  */
2961       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2962     }
2963 
2964     /* Both multiplySignificand and divideSignificand return the
2965        result with the integer bit set.  */
2966     assert(APInt::tcExtractBit
2967            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2968 
2969     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2970                        powHUerr);
2971     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2972                                       excessPrecision, isNearest);
2973 
2974     /* Are we guaranteed to round correctly if we truncate?  */
2975     if (HUdistance >= HUerr) {
2976       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2977                        calcSemantics.precision - excessPrecision,
2978                        excessPrecision);
2979       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2980          above we must adjust our exponent to compensate for the
2981          implicit right shift.  */
2982       exponent = (decSig.exponent + semantics->precision
2983                   - (calcSemantics.precision - excessPrecision));
2984       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2985                                                        decSig.partCount(),
2986                                                        truncatedBits);
2987       return normalize(rounding_mode, calcLostFraction);
2988     }
2989   }
2990 }
2991 
2992 Expected<IEEEFloat::opStatus>
2993 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2994   decimalInfo D;
2995   opStatus fs;
2996 
2997   /* Scan the text.  */
2998   StringRef::iterator p = str.begin();
2999   if (Error Err = interpretDecimal(p, str.end(), &D))
3000     return std::move(Err);
3001 
3002   /* Handle the quick cases.  First the case of no significant digits,
3003      i.e. zero, and then exponents that are obviously too large or too
3004      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3005      definitely overflows if
3006 
3007            (exp - 1) * L >= maxExponent
3008 
3009      and definitely underflows to zero where
3010 
3011            (exp + 1) * L <= minExponent - precision
3012 
3013      With integer arithmetic the tightest bounds for L are
3014 
3015            93/28 < L < 196/59            [ numerator <= 256 ]
3016            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3017   */
3018 
3019   // Test if we have a zero number allowing for strings with no null terminators
3020   // and zero decimals with non-zero exponents.
3021   //
3022   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3023   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3024   // be at most one dot. On the other hand, if we have a zero with a non-zero
3025   // exponent, then we know that D.firstSigDigit will be non-numeric.
3026   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3027     category = fcZero;
3028     fs = opOK;
3029     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3030       sign = false;
3031 
3032     /* Check whether the normalized exponent is high enough to overflow
3033        max during the log-rebasing in the max-exponent check below. */
3034   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3035     fs = handleOverflow(rounding_mode);
3036 
3037   /* If it wasn't, then it also wasn't high enough to overflow max
3038      during the log-rebasing in the min-exponent check.  Check that it
3039      won't overflow min in either check, then perform the min-exponent
3040      check. */
3041   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3042              (D.normalizedExponent + 1) * 28738 <=
3043                8651 * (semantics->minExponent - (int) semantics->precision)) {
3044     /* Underflow to zero and round.  */
3045     category = fcNormal;
3046     zeroSignificand();
3047     fs = normalize(rounding_mode, lfLessThanHalf);
3048 
3049   /* We can finally safely perform the max-exponent check. */
3050   } else if ((D.normalizedExponent - 1) * 42039
3051              >= 12655 * semantics->maxExponent) {
3052     /* Overflow and round.  */
3053     fs = handleOverflow(rounding_mode);
3054   } else {
3055     integerPart *decSignificand;
3056     unsigned int partCount;
3057 
3058     /* A tight upper bound on number of bits required to hold an
3059        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3060        to hold the full significand, and an extra part required by
3061        tcMultiplyPart.  */
3062     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3063     partCount = partCountForBits(1 + 196 * partCount / 59);
3064     decSignificand = new integerPart[partCount + 1];
3065     partCount = 0;
3066 
3067     /* Convert to binary efficiently - we do almost all multiplication
3068        in an integerPart.  When this would overflow do we do a single
3069        bignum multiplication, and then revert again to multiplication
3070        in an integerPart.  */
3071     do {
3072       integerPart decValue, val, multiplier;
3073 
3074       val = 0;
3075       multiplier = 1;
3076 
3077       do {
3078         if (*p == '.') {
3079           p++;
3080           if (p == str.end()) {
3081             break;
3082           }
3083         }
3084         decValue = decDigitValue(*p++);
3085         if (decValue >= 10U) {
3086           delete[] decSignificand;
3087           return createError("Invalid character in significand");
3088         }
3089         multiplier *= 10;
3090         val = val * 10 + decValue;
3091         /* The maximum number that can be multiplied by ten with any
3092            digit added without overflowing an integerPart.  */
3093       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3094 
3095       /* Multiply out the current part.  */
3096       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3097                             partCount, partCount + 1, false);
3098 
3099       /* If we used another part (likely but not guaranteed), increase
3100          the count.  */
3101       if (decSignificand[partCount])
3102         partCount++;
3103     } while (p <= D.lastSigDigit);
3104 
3105     category = fcNormal;
3106     fs = roundSignificandWithExponent(decSignificand, partCount,
3107                                       D.exponent, rounding_mode);
3108 
3109     delete [] decSignificand;
3110   }
3111 
3112   return fs;
3113 }
3114 
3115 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3116   const size_t MIN_NAME_SIZE = 3;
3117 
3118   if (str.size() < MIN_NAME_SIZE)
3119     return false;
3120 
3121   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3122     makeInf(false);
3123     return true;
3124   }
3125 
3126   bool IsNegative = str.front() == '-';
3127   if (IsNegative) {
3128     str = str.drop_front();
3129     if (str.size() < MIN_NAME_SIZE)
3130       return false;
3131 
3132     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3133       makeInf(true);
3134       return true;
3135     }
3136   }
3137 
3138   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3139   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3140   if (IsSignaling) {
3141     str = str.drop_front();
3142     if (str.size() < MIN_NAME_SIZE)
3143       return false;
3144   }
3145 
3146   if (str.starts_with("nan") || str.starts_with("NaN")) {
3147     str = str.drop_front(3);
3148 
3149     // A NaN without payload.
3150     if (str.empty()) {
3151       makeNaN(IsSignaling, IsNegative);
3152       return true;
3153     }
3154 
3155     // Allow the payload to be inside parentheses.
3156     if (str.front() == '(') {
3157       // Parentheses should be balanced (and not empty).
3158       if (str.size() <= 2 || str.back() != ')')
3159         return false;
3160 
3161       str = str.slice(1, str.size() - 1);
3162     }
3163 
3164     // Determine the payload number's radix.
3165     unsigned Radix = 10;
3166     if (str[0] == '0') {
3167       if (str.size() > 1 && tolower(str[1]) == 'x') {
3168         str = str.drop_front(2);
3169         Radix = 16;
3170       } else
3171         Radix = 8;
3172     }
3173 
3174     // Parse the payload and make the NaN.
3175     APInt Payload;
3176     if (!str.getAsInteger(Radix, Payload)) {
3177       makeNaN(IsSignaling, IsNegative, &Payload);
3178       return true;
3179     }
3180   }
3181 
3182   return false;
3183 }
3184 
3185 Expected<IEEEFloat::opStatus>
3186 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3187   if (str.empty())
3188     return createError("Invalid string length");
3189 
3190   // Handle special cases.
3191   if (convertFromStringSpecials(str))
3192     return opOK;
3193 
3194   /* Handle a leading minus sign.  */
3195   StringRef::iterator p = str.begin();
3196   size_t slen = str.size();
3197   sign = *p == '-' ? 1 : 0;
3198   if (*p == '-' || *p == '+') {
3199     p++;
3200     slen--;
3201     if (!slen)
3202       return createError("String has no digits");
3203   }
3204 
3205   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3206     if (slen == 2)
3207       return createError("Invalid string");
3208     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3209                                         rounding_mode);
3210   }
3211 
3212   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3213 }
3214 
3215 /* Write out a hexadecimal representation of the floating point value
3216    to DST, which must be of sufficient size, in the C99 form
3217    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3218    excluding the terminating NUL.
3219 
3220    If UPPERCASE, the output is in upper case, otherwise in lower case.
3221 
3222    HEXDIGITS digits appear altogether, rounding the value if
3223    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3224    number precisely is used instead.  If nothing would appear after
3225    the decimal point it is suppressed.
3226 
3227    The decimal exponent is always printed and has at least one digit.
3228    Zero values display an exponent of zero.  Infinities and NaNs
3229    appear as "infinity" or "nan" respectively.
3230 
3231    The above rules are as specified by C99.  There is ambiguity about
3232    what the leading hexadecimal digit should be.  This implementation
3233    uses whatever is necessary so that the exponent is displayed as
3234    stored.  This implies the exponent will fall within the IEEE format
3235    range, and the leading hexadecimal digit will be 0 (for denormals),
3236    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3237    any other digits zero).
3238 */
3239 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3240                                            bool upperCase,
3241                                            roundingMode rounding_mode) const {
3242   char *p;
3243 
3244   p = dst;
3245   if (sign)
3246     *dst++ = '-';
3247 
3248   switch (category) {
3249   case fcInfinity:
3250     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3251     dst += sizeof infinityL - 1;
3252     break;
3253 
3254   case fcNaN:
3255     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3256     dst += sizeof NaNU - 1;
3257     break;
3258 
3259   case fcZero:
3260     *dst++ = '0';
3261     *dst++ = upperCase ? 'X': 'x';
3262     *dst++ = '0';
3263     if (hexDigits > 1) {
3264       *dst++ = '.';
3265       memset (dst, '0', hexDigits - 1);
3266       dst += hexDigits - 1;
3267     }
3268     *dst++ = upperCase ? 'P': 'p';
3269     *dst++ = '0';
3270     break;
3271 
3272   case fcNormal:
3273     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3274     break;
3275   }
3276 
3277   *dst = 0;
3278 
3279   return static_cast<unsigned int>(dst - p);
3280 }
3281 
3282 /* Does the hard work of outputting the correctly rounded hexadecimal
3283    form of a normal floating point number with the specified number of
3284    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3285    digits necessary to print the value precisely is output.  */
3286 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3287                                           bool upperCase,
3288                                           roundingMode rounding_mode) const {
3289   unsigned int count, valueBits, shift, partsCount, outputDigits;
3290   const char *hexDigitChars;
3291   const integerPart *significand;
3292   char *p;
3293   bool roundUp;
3294 
3295   *dst++ = '0';
3296   *dst++ = upperCase ? 'X': 'x';
3297 
3298   roundUp = false;
3299   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3300 
3301   significand = significandParts();
3302   partsCount = partCount();
3303 
3304   /* +3 because the first digit only uses the single integer bit, so
3305      we have 3 virtual zero most-significant-bits.  */
3306   valueBits = semantics->precision + 3;
3307   shift = integerPartWidth - valueBits % integerPartWidth;
3308 
3309   /* The natural number of digits required ignoring trailing
3310      insignificant zeroes.  */
3311   outputDigits = (valueBits - significandLSB () + 3) / 4;
3312 
3313   /* hexDigits of zero means use the required number for the
3314      precision.  Otherwise, see if we are truncating.  If we are,
3315      find out if we need to round away from zero.  */
3316   if (hexDigits) {
3317     if (hexDigits < outputDigits) {
3318       /* We are dropping non-zero bits, so need to check how to round.
3319          "bits" is the number of dropped bits.  */
3320       unsigned int bits;
3321       lostFraction fraction;
3322 
3323       bits = valueBits - hexDigits * 4;
3324       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3325       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3326     }
3327     outputDigits = hexDigits;
3328   }
3329 
3330   /* Write the digits consecutively, and start writing in the location
3331      of the hexadecimal point.  We move the most significant digit
3332      left and add the hexadecimal point later.  */
3333   p = ++dst;
3334 
3335   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3336 
3337   while (outputDigits && count) {
3338     integerPart part;
3339 
3340     /* Put the most significant integerPartWidth bits in "part".  */
3341     if (--count == partsCount)
3342       part = 0;  /* An imaginary higher zero part.  */
3343     else
3344       part = significand[count] << shift;
3345 
3346     if (count && shift)
3347       part |= significand[count - 1] >> (integerPartWidth - shift);
3348 
3349     /* Convert as much of "part" to hexdigits as we can.  */
3350     unsigned int curDigits = integerPartWidth / 4;
3351 
3352     if (curDigits > outputDigits)
3353       curDigits = outputDigits;
3354     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3355     outputDigits -= curDigits;
3356   }
3357 
3358   if (roundUp) {
3359     char *q = dst;
3360 
3361     /* Note that hexDigitChars has a trailing '0'.  */
3362     do {
3363       q--;
3364       *q = hexDigitChars[hexDigitValue (*q) + 1];
3365     } while (*q == '0');
3366     assert(q >= p);
3367   } else {
3368     /* Add trailing zeroes.  */
3369     memset (dst, '0', outputDigits);
3370     dst += outputDigits;
3371   }
3372 
3373   /* Move the most significant digit to before the point, and if there
3374      is something after the decimal point add it.  This must come
3375      after rounding above.  */
3376   p[-1] = p[0];
3377   if (dst -1 == p)
3378     dst--;
3379   else
3380     p[0] = '.';
3381 
3382   /* Finally output the exponent.  */
3383   *dst++ = upperCase ? 'P': 'p';
3384 
3385   return writeSignedDecimal (dst, exponent);
3386 }
3387 
3388 hash_code hash_value(const IEEEFloat &Arg) {
3389   if (!Arg.isFiniteNonZero())
3390     return hash_combine((uint8_t)Arg.category,
3391                         // NaN has no sign, fix it at zero.
3392                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3393                         Arg.semantics->precision);
3394 
3395   // Normal floats need their exponent and significand hashed.
3396   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3397                       Arg.semantics->precision, Arg.exponent,
3398                       hash_combine_range(
3399                         Arg.significandParts(),
3400                         Arg.significandParts() + Arg.partCount()));
3401 }
3402 
3403 // Conversion from APFloat to/from host float/double.  It may eventually be
3404 // possible to eliminate these and have everybody deal with APFloats, but that
3405 // will take a while.  This approach will not easily extend to long double.
3406 // Current implementation requires integerPartWidth==64, which is correct at
3407 // the moment but could be made more general.
3408 
3409 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3410 // the actual IEEE respresentations.  We compensate for that here.
3411 
3412 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3413   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3414   assert(partCount()==2);
3415 
3416   uint64_t myexponent, mysignificand;
3417 
3418   if (isFiniteNonZero()) {
3419     myexponent = exponent+16383; //bias
3420     mysignificand = significandParts()[0];
3421     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3422       myexponent = 0;   // denormal
3423   } else if (category==fcZero) {
3424     myexponent = 0;
3425     mysignificand = 0;
3426   } else if (category==fcInfinity) {
3427     myexponent = 0x7fff;
3428     mysignificand = 0x8000000000000000ULL;
3429   } else {
3430     assert(category == fcNaN && "Unknown category");
3431     myexponent = 0x7fff;
3432     mysignificand = significandParts()[0];
3433   }
3434 
3435   uint64_t words[2];
3436   words[0] = mysignificand;
3437   words[1] =  ((uint64_t)(sign & 1) << 15) |
3438               (myexponent & 0x7fffLL);
3439   return APInt(80, words);
3440 }
3441 
3442 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3443   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3444   assert(partCount()==2);
3445 
3446   uint64_t words[2];
3447   opStatus fs;
3448   bool losesInfo;
3449 
3450   // Convert number to double.  To avoid spurious underflows, we re-
3451   // normalize against the "double" minExponent first, and only *then*
3452   // truncate the mantissa.  The result of that second conversion
3453   // may be inexact, but should never underflow.
3454   // Declare fltSemantics before APFloat that uses it (and
3455   // saves pointer to it) to ensure correct destruction order.
3456   fltSemantics extendedSemantics = *semantics;
3457   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3458   IEEEFloat extended(*this);
3459   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3460   assert(fs == opOK && !losesInfo);
3461   (void)fs;
3462 
3463   IEEEFloat u(extended);
3464   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3465   assert(fs == opOK || fs == opInexact);
3466   (void)fs;
3467   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3468 
3469   // If conversion was exact or resulted in a special case, we're done;
3470   // just set the second double to zero.  Otherwise, re-convert back to
3471   // the extended format and compute the difference.  This now should
3472   // convert exactly to double.
3473   if (u.isFiniteNonZero() && losesInfo) {
3474     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3475     assert(fs == opOK && !losesInfo);
3476     (void)fs;
3477 
3478     IEEEFloat v(extended);
3479     v.subtract(u, rmNearestTiesToEven);
3480     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3481     assert(fs == opOK && !losesInfo);
3482     (void)fs;
3483     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3484   } else {
3485     words[1] = 0;
3486   }
3487 
3488   return APInt(128, words);
3489 }
3490 
3491 template <const fltSemantics &S>
3492 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3493   assert(semantics == &S);
3494 
3495   constexpr int bias = -(S.minExponent - 1);
3496   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3497   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3498   constexpr integerPart integer_bit =
3499       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3500   constexpr uint64_t significand_mask = integer_bit - 1;
3501   constexpr unsigned int exponent_bits =
3502       S.sizeInBits - 1 - trailing_significand_bits;
3503   static_assert(exponent_bits < 64);
3504   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3505 
3506   uint64_t myexponent;
3507   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3508       mysignificand;
3509 
3510   if (isFiniteNonZero()) {
3511     myexponent = exponent + bias;
3512     std::copy_n(significandParts(), mysignificand.size(),
3513                 mysignificand.begin());
3514     if (myexponent == 1 &&
3515         !(significandParts()[integer_bit_part] & integer_bit))
3516       myexponent = 0; // denormal
3517   } else if (category == fcZero) {
3518     myexponent = ::exponentZero(S) + bias;
3519     mysignificand.fill(0);
3520   } else if (category == fcInfinity) {
3521     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3522       llvm_unreachable("semantics don't support inf!");
3523     }
3524     myexponent = ::exponentInf(S) + bias;
3525     mysignificand.fill(0);
3526   } else {
3527     assert(category == fcNaN && "Unknown category!");
3528     myexponent = ::exponentNaN(S) + bias;
3529     std::copy_n(significandParts(), mysignificand.size(),
3530                 mysignificand.begin());
3531   }
3532   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3533   auto words_iter =
3534       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3535   if constexpr (significand_mask != 0) {
3536     // Clear the integer bit.
3537     words[mysignificand.size() - 1] &= significand_mask;
3538   }
3539   std::fill(words_iter, words.end(), uint64_t{0});
3540   constexpr size_t last_word = words.size() - 1;
3541   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3542                           << ((S.sizeInBits - 1) % 64);
3543   words[last_word] |= shifted_sign;
3544   uint64_t shifted_exponent = (myexponent & exponent_mask)
3545                               << (trailing_significand_bits % 64);
3546   words[last_word] |= shifted_exponent;
3547   if constexpr (last_word == 0) {
3548     return APInt(S.sizeInBits, words[0]);
3549   }
3550   return APInt(S.sizeInBits, words);
3551 }
3552 
3553 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3554   assert(partCount() == 2);
3555   return convertIEEEFloatToAPInt<semIEEEquad>();
3556 }
3557 
3558 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3559   assert(partCount()==1);
3560   return convertIEEEFloatToAPInt<semIEEEdouble>();
3561 }
3562 
3563 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3564   assert(partCount()==1);
3565   return convertIEEEFloatToAPInt<semIEEEsingle>();
3566 }
3567 
3568 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3569   assert(partCount() == 1);
3570   return convertIEEEFloatToAPInt<semBFloat>();
3571 }
3572 
3573 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3574   assert(partCount()==1);
3575   return convertIEEEFloatToAPInt<semIEEEhalf>();
3576 }
3577 
3578 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3579   assert(partCount() == 1);
3580   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3581 }
3582 
3583 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3584   assert(partCount() == 1);
3585   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3586 }
3587 
3588 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3589   assert(partCount() == 1);
3590   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3591 }
3592 
3593 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3594   assert(partCount() == 1);
3595   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3596 }
3597 
3598 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3599   assert(partCount() == 1);
3600   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3601 }
3602 
3603 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3604   assert(partCount() == 1);
3605   return convertIEEEFloatToAPInt<semFloatTF32>();
3606 }
3607 
3608 // This function creates an APInt that is just a bit map of the floating
3609 // point constant as it would appear in memory.  It is not a conversion,
3610 // and treating the result as a normal integer is unlikely to be useful.
3611 
3612 APInt IEEEFloat::bitcastToAPInt() const {
3613   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3614     return convertHalfAPFloatToAPInt();
3615 
3616   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3617     return convertBFloatAPFloatToAPInt();
3618 
3619   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3620     return convertFloatAPFloatToAPInt();
3621 
3622   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3623     return convertDoubleAPFloatToAPInt();
3624 
3625   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3626     return convertQuadrupleAPFloatToAPInt();
3627 
3628   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3629     return convertPPCDoubleDoubleAPFloatToAPInt();
3630 
3631   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3632     return convertFloat8E5M2APFloatToAPInt();
3633 
3634   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3635     return convertFloat8E5M2FNUZAPFloatToAPInt();
3636 
3637   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3638     return convertFloat8E4M3FNAPFloatToAPInt();
3639 
3640   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3641     return convertFloat8E4M3FNUZAPFloatToAPInt();
3642 
3643   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3644     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3645 
3646   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3647     return convertFloatTF32APFloatToAPInt();
3648 
3649   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3650          "unknown format!");
3651   return convertF80LongDoubleAPFloatToAPInt();
3652 }
3653 
3654 float IEEEFloat::convertToFloat() const {
3655   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3656          "Float semantics are not IEEEsingle");
3657   APInt api = bitcastToAPInt();
3658   return api.bitsToFloat();
3659 }
3660 
3661 double IEEEFloat::convertToDouble() const {
3662   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3663          "Float semantics are not IEEEdouble");
3664   APInt api = bitcastToAPInt();
3665   return api.bitsToDouble();
3666 }
3667 
3668 #ifdef HAS_IEE754_FLOAT128
3669 float128 IEEEFloat::convertToQuad() const {
3670   assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3671          "Float semantics are not IEEEquads");
3672   APInt api = bitcastToAPInt();
3673   return api.bitsToQuad();
3674 }
3675 #endif
3676 
3677 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3678 /// does not support these bit patterns:
3679 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3680 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3681 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3682 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3683 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3684 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3685   uint64_t i1 = api.getRawData()[0];
3686   uint64_t i2 = api.getRawData()[1];
3687   uint64_t myexponent = (i2 & 0x7fff);
3688   uint64_t mysignificand = i1;
3689   uint8_t myintegerbit = mysignificand >> 63;
3690 
3691   initialize(&semX87DoubleExtended);
3692   assert(partCount()==2);
3693 
3694   sign = static_cast<unsigned int>(i2>>15);
3695   if (myexponent == 0 && mysignificand == 0) {
3696     makeZero(sign);
3697   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3698     makeInf(sign);
3699   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3700              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3701     category = fcNaN;
3702     exponent = exponentNaN();
3703     significandParts()[0] = mysignificand;
3704     significandParts()[1] = 0;
3705   } else {
3706     category = fcNormal;
3707     exponent = myexponent - 16383;
3708     significandParts()[0] = mysignificand;
3709     significandParts()[1] = 0;
3710     if (myexponent==0)          // denormal
3711       exponent = -16382;
3712   }
3713 }
3714 
3715 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3716   uint64_t i1 = api.getRawData()[0];
3717   uint64_t i2 = api.getRawData()[1];
3718   opStatus fs;
3719   bool losesInfo;
3720 
3721   // Get the first double and convert to our format.
3722   initFromDoubleAPInt(APInt(64, i1));
3723   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3724   assert(fs == opOK && !losesInfo);
3725   (void)fs;
3726 
3727   // Unless we have a special case, add in second double.
3728   if (isFiniteNonZero()) {
3729     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3730     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3731     assert(fs == opOK && !losesInfo);
3732     (void)fs;
3733 
3734     add(v, rmNearestTiesToEven);
3735   }
3736 }
3737 
3738 template <const fltSemantics &S>
3739 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3740   assert(api.getBitWidth() == S.sizeInBits);
3741   constexpr integerPart integer_bit = integerPart{1}
3742                                       << ((S.precision - 1) % integerPartWidth);
3743   constexpr uint64_t significand_mask = integer_bit - 1;
3744   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3745   constexpr unsigned int stored_significand_parts =
3746       partCountForBits(trailing_significand_bits);
3747   constexpr unsigned int exponent_bits =
3748       S.sizeInBits - 1 - trailing_significand_bits;
3749   static_assert(exponent_bits < 64);
3750   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3751   constexpr int bias = -(S.minExponent - 1);
3752 
3753   // Copy the bits of the significand. We need to clear out the exponent and
3754   // sign bit in the last word.
3755   std::array<integerPart, stored_significand_parts> mysignificand;
3756   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3757   if constexpr (significand_mask != 0) {
3758     mysignificand[mysignificand.size() - 1] &= significand_mask;
3759   }
3760 
3761   // We assume the last word holds the sign bit, the exponent, and potentially
3762   // some of the trailing significand field.
3763   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3764   uint64_t myexponent =
3765       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3766 
3767   initialize(&S);
3768   assert(partCount() == mysignificand.size());
3769 
3770   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3771 
3772   bool all_zero_significand =
3773       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3774 
3775   bool is_zero = myexponent == 0 && all_zero_significand;
3776 
3777   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3778     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3779       makeInf(sign);
3780       return;
3781     }
3782   }
3783 
3784   bool is_nan = false;
3785 
3786   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3787     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3788   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3789     bool all_ones_significand =
3790         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3791                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3792         (!significand_mask ||
3793          mysignificand[mysignificand.size() - 1] == significand_mask);
3794     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3795   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3796     is_nan = is_zero && sign;
3797   }
3798 
3799   if (is_nan) {
3800     category = fcNaN;
3801     exponent = ::exponentNaN(S);
3802     std::copy_n(mysignificand.begin(), mysignificand.size(),
3803                 significandParts());
3804     return;
3805   }
3806 
3807   if (is_zero) {
3808     makeZero(sign);
3809     return;
3810   }
3811 
3812   category = fcNormal;
3813   exponent = myexponent - bias;
3814   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3815   if (myexponent == 0) // denormal
3816     exponent = S.minExponent;
3817   else
3818     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3819 }
3820 
3821 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3822   initFromIEEEAPInt<semIEEEquad>(api);
3823 }
3824 
3825 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3826   initFromIEEEAPInt<semIEEEdouble>(api);
3827 }
3828 
3829 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3830   initFromIEEEAPInt<semIEEEsingle>(api);
3831 }
3832 
3833 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3834   initFromIEEEAPInt<semBFloat>(api);
3835 }
3836 
3837 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3838   initFromIEEEAPInt<semIEEEhalf>(api);
3839 }
3840 
3841 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3842   initFromIEEEAPInt<semFloat8E5M2>(api);
3843 }
3844 
3845 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3846   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3847 }
3848 
3849 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3850   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3851 }
3852 
3853 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3854   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3855 }
3856 
3857 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3858   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3859 }
3860 
3861 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3862   initFromIEEEAPInt<semFloatTF32>(api);
3863 }
3864 
3865 /// Treat api as containing the bits of a floating point number.
3866 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3867   assert(api.getBitWidth() == Sem->sizeInBits);
3868   if (Sem == &semIEEEhalf)
3869     return initFromHalfAPInt(api);
3870   if (Sem == &semBFloat)
3871     return initFromBFloatAPInt(api);
3872   if (Sem == &semIEEEsingle)
3873     return initFromFloatAPInt(api);
3874   if (Sem == &semIEEEdouble)
3875     return initFromDoubleAPInt(api);
3876   if (Sem == &semX87DoubleExtended)
3877     return initFromF80LongDoubleAPInt(api);
3878   if (Sem == &semIEEEquad)
3879     return initFromQuadrupleAPInt(api);
3880   if (Sem == &semPPCDoubleDoubleLegacy)
3881     return initFromPPCDoubleDoubleAPInt(api);
3882   if (Sem == &semFloat8E5M2)
3883     return initFromFloat8E5M2APInt(api);
3884   if (Sem == &semFloat8E5M2FNUZ)
3885     return initFromFloat8E5M2FNUZAPInt(api);
3886   if (Sem == &semFloat8E4M3FN)
3887     return initFromFloat8E4M3FNAPInt(api);
3888   if (Sem == &semFloat8E4M3FNUZ)
3889     return initFromFloat8E4M3FNUZAPInt(api);
3890   if (Sem == &semFloat8E4M3B11FNUZ)
3891     return initFromFloat8E4M3B11FNUZAPInt(api);
3892   if (Sem == &semFloatTF32)
3893     return initFromFloatTF32APInt(api);
3894 
3895   llvm_unreachable(nullptr);
3896 }
3897 
3898 /// Make this number the largest magnitude normal number in the given
3899 /// semantics.
3900 void IEEEFloat::makeLargest(bool Negative) {
3901   // We want (in interchange format):
3902   //   sign = {Negative}
3903   //   exponent = 1..10
3904   //   significand = 1..1
3905   category = fcNormal;
3906   sign = Negative;
3907   exponent = semantics->maxExponent;
3908 
3909   // Use memset to set all but the highest integerPart to all ones.
3910   integerPart *significand = significandParts();
3911   unsigned PartCount = partCount();
3912   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3913 
3914   // Set the high integerPart especially setting all unused top bits for
3915   // internal consistency.
3916   const unsigned NumUnusedHighBits =
3917     PartCount*integerPartWidth - semantics->precision;
3918   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3919                                    ? (~integerPart(0) >> NumUnusedHighBits)
3920                                    : 0;
3921 
3922   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3923       semantics->nanEncoding == fltNanEncoding::AllOnes)
3924     significand[0] &= ~integerPart(1);
3925 }
3926 
3927 /// Make this number the smallest magnitude denormal number in the given
3928 /// semantics.
3929 void IEEEFloat::makeSmallest(bool Negative) {
3930   // We want (in interchange format):
3931   //   sign = {Negative}
3932   //   exponent = 0..0
3933   //   significand = 0..01
3934   category = fcNormal;
3935   sign = Negative;
3936   exponent = semantics->minExponent;
3937   APInt::tcSet(significandParts(), 1, partCount());
3938 }
3939 
3940 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3941   // We want (in interchange format):
3942   //   sign = {Negative}
3943   //   exponent = 0..0
3944   //   significand = 10..0
3945 
3946   category = fcNormal;
3947   zeroSignificand();
3948   sign = Negative;
3949   exponent = semantics->minExponent;
3950   APInt::tcSetBit(significandParts(), semantics->precision - 1);
3951 }
3952 
3953 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3954   initFromAPInt(&Sem, API);
3955 }
3956 
3957 IEEEFloat::IEEEFloat(float f) {
3958   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3959 }
3960 
3961 IEEEFloat::IEEEFloat(double d) {
3962   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3963 }
3964 
3965 namespace {
3966   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3967     Buffer.append(Str.begin(), Str.end());
3968   }
3969 
3970   /// Removes data from the given significand until it is no more
3971   /// precise than is required for the desired precision.
3972   void AdjustToPrecision(APInt &significand,
3973                          int &exp, unsigned FormatPrecision) {
3974     unsigned bits = significand.getActiveBits();
3975 
3976     // 196/59 is a very slight overestimate of lg_2(10).
3977     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3978 
3979     if (bits <= bitsRequired) return;
3980 
3981     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3982     if (!tensRemovable) return;
3983 
3984     exp += tensRemovable;
3985 
3986     APInt divisor(significand.getBitWidth(), 1);
3987     APInt powten(significand.getBitWidth(), 10);
3988     while (true) {
3989       if (tensRemovable & 1)
3990         divisor *= powten;
3991       tensRemovable >>= 1;
3992       if (!tensRemovable) break;
3993       powten *= powten;
3994     }
3995 
3996     significand = significand.udiv(divisor);
3997 
3998     // Truncate the significand down to its active bit count.
3999     significand = significand.trunc(significand.getActiveBits());
4000   }
4001 
4002 
4003   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4004                          int &exp, unsigned FormatPrecision) {
4005     unsigned N = buffer.size();
4006     if (N <= FormatPrecision) return;
4007 
4008     // The most significant figures are the last ones in the buffer.
4009     unsigned FirstSignificant = N - FormatPrecision;
4010 
4011     // Round.
4012     // FIXME: this probably shouldn't use 'round half up'.
4013 
4014     // Rounding down is just a truncation, except we also want to drop
4015     // trailing zeros from the new result.
4016     if (buffer[FirstSignificant - 1] < '5') {
4017       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4018         FirstSignificant++;
4019 
4020       exp += FirstSignificant;
4021       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4022       return;
4023     }
4024 
4025     // Rounding up requires a decimal add-with-carry.  If we continue
4026     // the carry, the newly-introduced zeros will just be truncated.
4027     for (unsigned I = FirstSignificant; I != N; ++I) {
4028       if (buffer[I] == '9') {
4029         FirstSignificant++;
4030       } else {
4031         buffer[I]++;
4032         break;
4033       }
4034     }
4035 
4036     // If we carried through, we have exactly one digit of precision.
4037     if (FirstSignificant == N) {
4038       exp += FirstSignificant;
4039       buffer.clear();
4040       buffer.push_back('1');
4041       return;
4042     }
4043 
4044     exp += FirstSignificant;
4045     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4046   }
4047 } // namespace
4048 
4049 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4050                          unsigned FormatMaxPadding, bool TruncateZero) const {
4051   switch (category) {
4052   case fcInfinity:
4053     if (isNegative())
4054       return append(Str, "-Inf");
4055     else
4056       return append(Str, "+Inf");
4057 
4058   case fcNaN: return append(Str, "NaN");
4059 
4060   case fcZero:
4061     if (isNegative())
4062       Str.push_back('-');
4063 
4064     if (!FormatMaxPadding) {
4065       if (TruncateZero)
4066         append(Str, "0.0E+0");
4067       else {
4068         append(Str, "0.0");
4069         if (FormatPrecision > 1)
4070           Str.append(FormatPrecision - 1, '0');
4071         append(Str, "e+00");
4072       }
4073     } else
4074       Str.push_back('0');
4075     return;
4076 
4077   case fcNormal:
4078     break;
4079   }
4080 
4081   if (isNegative())
4082     Str.push_back('-');
4083 
4084   // Decompose the number into an APInt and an exponent.
4085   int exp = exponent - ((int) semantics->precision - 1);
4086   APInt significand(
4087       semantics->precision,
4088       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4089 
4090   // Set FormatPrecision if zero.  We want to do this before we
4091   // truncate trailing zeros, as those are part of the precision.
4092   if (!FormatPrecision) {
4093     // We use enough digits so the number can be round-tripped back to an
4094     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4095     // Accurately" by Steele and White.
4096     // FIXME: Using a formula based purely on the precision is conservative;
4097     // we can print fewer digits depending on the actual value being printed.
4098 
4099     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4100     FormatPrecision = 2 + semantics->precision * 59 / 196;
4101   }
4102 
4103   // Ignore trailing binary zeros.
4104   int trailingZeros = significand.countr_zero();
4105   exp += trailingZeros;
4106   significand.lshrInPlace(trailingZeros);
4107 
4108   // Change the exponent from 2^e to 10^e.
4109   if (exp == 0) {
4110     // Nothing to do.
4111   } else if (exp > 0) {
4112     // Just shift left.
4113     significand = significand.zext(semantics->precision + exp);
4114     significand <<= exp;
4115     exp = 0;
4116   } else { /* exp < 0 */
4117     int texp = -exp;
4118 
4119     // We transform this using the identity:
4120     //   (N)(2^-e) == (N)(5^e)(10^-e)
4121     // This means we have to multiply N (the significand) by 5^e.
4122     // To avoid overflow, we have to operate on numbers large
4123     // enough to store N * 5^e:
4124     //   log2(N * 5^e) == log2(N) + e * log2(5)
4125     //                 <= semantics->precision + e * 137 / 59
4126     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4127 
4128     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4129 
4130     // Multiply significand by 5^e.
4131     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4132     significand = significand.zext(precision);
4133     APInt five_to_the_i(precision, 5);
4134     while (true) {
4135       if (texp & 1) significand *= five_to_the_i;
4136 
4137       texp >>= 1;
4138       if (!texp) break;
4139       five_to_the_i *= five_to_the_i;
4140     }
4141   }
4142 
4143   AdjustToPrecision(significand, exp, FormatPrecision);
4144 
4145   SmallVector<char, 256> buffer;
4146 
4147   // Fill the buffer.
4148   unsigned precision = significand.getBitWidth();
4149   if (precision < 4) {
4150     // We need enough precision to store the value 10.
4151     precision = 4;
4152     significand = significand.zext(precision);
4153   }
4154   APInt ten(precision, 10);
4155   APInt digit(precision, 0);
4156 
4157   bool inTrail = true;
4158   while (significand != 0) {
4159     // digit <- significand % 10
4160     // significand <- significand / 10
4161     APInt::udivrem(significand, ten, significand, digit);
4162 
4163     unsigned d = digit.getZExtValue();
4164 
4165     // Drop trailing zeros.
4166     if (inTrail && !d) exp++;
4167     else {
4168       buffer.push_back((char) ('0' + d));
4169       inTrail = false;
4170     }
4171   }
4172 
4173   assert(!buffer.empty() && "no characters in buffer!");
4174 
4175   // Drop down to FormatPrecision.
4176   // TODO: don't do more precise calculations above than are required.
4177   AdjustToPrecision(buffer, exp, FormatPrecision);
4178 
4179   unsigned NDigits = buffer.size();
4180 
4181   // Check whether we should use scientific notation.
4182   bool FormatScientific;
4183   if (!FormatMaxPadding)
4184     FormatScientific = true;
4185   else {
4186     if (exp >= 0) {
4187       // 765e3 --> 765000
4188       //              ^^^
4189       // But we shouldn't make the number look more precise than it is.
4190       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4191                           NDigits + (unsigned) exp > FormatPrecision);
4192     } else {
4193       // Power of the most significant digit.
4194       int MSD = exp + (int) (NDigits - 1);
4195       if (MSD >= 0) {
4196         // 765e-2 == 7.65
4197         FormatScientific = false;
4198       } else {
4199         // 765e-5 == 0.00765
4200         //           ^ ^^
4201         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4202       }
4203     }
4204   }
4205 
4206   // Scientific formatting is pretty straightforward.
4207   if (FormatScientific) {
4208     exp += (NDigits - 1);
4209 
4210     Str.push_back(buffer[NDigits-1]);
4211     Str.push_back('.');
4212     if (NDigits == 1 && TruncateZero)
4213       Str.push_back('0');
4214     else
4215       for (unsigned I = 1; I != NDigits; ++I)
4216         Str.push_back(buffer[NDigits-1-I]);
4217     // Fill with zeros up to FormatPrecision.
4218     if (!TruncateZero && FormatPrecision > NDigits - 1)
4219       Str.append(FormatPrecision - NDigits + 1, '0');
4220     // For !TruncateZero we use lower 'e'.
4221     Str.push_back(TruncateZero ? 'E' : 'e');
4222 
4223     Str.push_back(exp >= 0 ? '+' : '-');
4224     if (exp < 0) exp = -exp;
4225     SmallVector<char, 6> expbuf;
4226     do {
4227       expbuf.push_back((char) ('0' + (exp % 10)));
4228       exp /= 10;
4229     } while (exp);
4230     // Exponent always at least two digits if we do not truncate zeros.
4231     if (!TruncateZero && expbuf.size() < 2)
4232       expbuf.push_back('0');
4233     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4234       Str.push_back(expbuf[E-1-I]);
4235     return;
4236   }
4237 
4238   // Non-scientific, positive exponents.
4239   if (exp >= 0) {
4240     for (unsigned I = 0; I != NDigits; ++I)
4241       Str.push_back(buffer[NDigits-1-I]);
4242     for (unsigned I = 0; I != (unsigned) exp; ++I)
4243       Str.push_back('0');
4244     return;
4245   }
4246 
4247   // Non-scientific, negative exponents.
4248 
4249   // The number of digits to the left of the decimal point.
4250   int NWholeDigits = exp + (int) NDigits;
4251 
4252   unsigned I = 0;
4253   if (NWholeDigits > 0) {
4254     for (; I != (unsigned) NWholeDigits; ++I)
4255       Str.push_back(buffer[NDigits-I-1]);
4256     Str.push_back('.');
4257   } else {
4258     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4259 
4260     Str.push_back('0');
4261     Str.push_back('.');
4262     for (unsigned Z = 1; Z != NZeros; ++Z)
4263       Str.push_back('0');
4264   }
4265 
4266   for (; I != NDigits; ++I)
4267     Str.push_back(buffer[NDigits-I-1]);
4268 }
4269 
4270 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4271   // Special floats and denormals have no exact inverse.
4272   if (!isFiniteNonZero())
4273     return false;
4274 
4275   // Check that the number is a power of two by making sure that only the
4276   // integer bit is set in the significand.
4277   if (significandLSB() != semantics->precision - 1)
4278     return false;
4279 
4280   // Get the inverse.
4281   IEEEFloat reciprocal(*semantics, 1ULL);
4282   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4283     return false;
4284 
4285   // Avoid multiplication with a denormal, it is not safe on all platforms and
4286   // may be slower than a normal division.
4287   if (reciprocal.isDenormal())
4288     return false;
4289 
4290   assert(reciprocal.isFiniteNonZero() &&
4291          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4292 
4293   if (inv)
4294     *inv = APFloat(reciprocal, *semantics);
4295 
4296   return true;
4297 }
4298 
4299 int IEEEFloat::getExactLog2Abs() const {
4300   if (!isFinite() || isZero())
4301     return INT_MIN;
4302 
4303   const integerPart *Parts = significandParts();
4304   const int PartCount = partCountForBits(semantics->precision);
4305 
4306   int PopCount = 0;
4307   for (int i = 0; i < PartCount; ++i) {
4308     PopCount += llvm::popcount(Parts[i]);
4309     if (PopCount > 1)
4310       return INT_MIN;
4311   }
4312 
4313   if (exponent != semantics->minExponent)
4314     return exponent;
4315 
4316   int CountrParts = 0;
4317   for (int i = 0; i < PartCount;
4318        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4319     if (Parts[i] != 0) {
4320       return exponent - semantics->precision + CountrParts +
4321              llvm::countr_zero(Parts[i]) + 1;
4322     }
4323   }
4324 
4325   llvm_unreachable("didn't find the set bit");
4326 }
4327 
4328 bool IEEEFloat::isSignaling() const {
4329   if (!isNaN())
4330     return false;
4331   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4332     return false;
4333 
4334   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4335   // first bit of the trailing significand being 0.
4336   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4337 }
4338 
4339 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4340 ///
4341 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4342 /// appropriate sign switching before/after the computation.
4343 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4344   // If we are performing nextDown, swap sign so we have -x.
4345   if (nextDown)
4346     changeSign();
4347 
4348   // Compute nextUp(x)
4349   opStatus result = opOK;
4350 
4351   // Handle each float category separately.
4352   switch (category) {
4353   case fcInfinity:
4354     // nextUp(+inf) = +inf
4355     if (!isNegative())
4356       break;
4357     // nextUp(-inf) = -getLargest()
4358     makeLargest(true);
4359     break;
4360   case fcNaN:
4361     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4362     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4363     //                     change the payload.
4364     if (isSignaling()) {
4365       result = opInvalidOp;
4366       // For consistency, propagate the sign of the sNaN to the qNaN.
4367       makeNaN(false, isNegative(), nullptr);
4368     }
4369     break;
4370   case fcZero:
4371     // nextUp(pm 0) = +getSmallest()
4372     makeSmallest(false);
4373     break;
4374   case fcNormal:
4375     // nextUp(-getSmallest()) = -0
4376     if (isSmallest() && isNegative()) {
4377       APInt::tcSet(significandParts(), 0, partCount());
4378       category = fcZero;
4379       exponent = 0;
4380       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4381         sign = false;
4382       break;
4383     }
4384 
4385     if (isLargest() && !isNegative()) {
4386       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4387         // nextUp(getLargest()) == NAN
4388         makeNaN();
4389         break;
4390       } else {
4391         // nextUp(getLargest()) == INFINITY
4392         APInt::tcSet(significandParts(), 0, partCount());
4393         category = fcInfinity;
4394         exponent = semantics->maxExponent + 1;
4395         break;
4396       }
4397     }
4398 
4399     // nextUp(normal) == normal + inc.
4400     if (isNegative()) {
4401       // If we are negative, we need to decrement the significand.
4402 
4403       // We only cross a binade boundary that requires adjusting the exponent
4404       // if:
4405       //   1. exponent != semantics->minExponent. This implies we are not in the
4406       //   smallest binade or are dealing with denormals.
4407       //   2. Our significand excluding the integral bit is all zeros.
4408       bool WillCrossBinadeBoundary =
4409         exponent != semantics->minExponent && isSignificandAllZeros();
4410 
4411       // Decrement the significand.
4412       //
4413       // We always do this since:
4414       //   1. If we are dealing with a non-binade decrement, by definition we
4415       //   just decrement the significand.
4416       //   2. If we are dealing with a normal -> normal binade decrement, since
4417       //   we have an explicit integral bit the fact that all bits but the
4418       //   integral bit are zero implies that subtracting one will yield a
4419       //   significand with 0 integral bit and 1 in all other spots. Thus we
4420       //   must just adjust the exponent and set the integral bit to 1.
4421       //   3. If we are dealing with a normal -> denormal binade decrement,
4422       //   since we set the integral bit to 0 when we represent denormals, we
4423       //   just decrement the significand.
4424       integerPart *Parts = significandParts();
4425       APInt::tcDecrement(Parts, partCount());
4426 
4427       if (WillCrossBinadeBoundary) {
4428         // Our result is a normal number. Do the following:
4429         // 1. Set the integral bit to 1.
4430         // 2. Decrement the exponent.
4431         APInt::tcSetBit(Parts, semantics->precision - 1);
4432         exponent--;
4433       }
4434     } else {
4435       // If we are positive, we need to increment the significand.
4436 
4437       // We only cross a binade boundary that requires adjusting the exponent if
4438       // the input is not a denormal and all of said input's significand bits
4439       // are set. If all of said conditions are true: clear the significand, set
4440       // the integral bit to 1, and increment the exponent. If we have a
4441       // denormal always increment since moving denormals and the numbers in the
4442       // smallest normal binade have the same exponent in our representation.
4443       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4444 
4445       if (WillCrossBinadeBoundary) {
4446         integerPart *Parts = significandParts();
4447         APInt::tcSet(Parts, 0, partCount());
4448         APInt::tcSetBit(Parts, semantics->precision - 1);
4449         assert(exponent != semantics->maxExponent &&
4450                "We can not increment an exponent beyond the maxExponent allowed"
4451                " by the given floating point semantics.");
4452         exponent++;
4453       } else {
4454         incrementSignificand();
4455       }
4456     }
4457     break;
4458   }
4459 
4460   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4461   if (nextDown)
4462     changeSign();
4463 
4464   return result;
4465 }
4466 
4467 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4468   return ::exponentNaN(*semantics);
4469 }
4470 
4471 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4472   return ::exponentInf(*semantics);
4473 }
4474 
4475 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4476   return ::exponentZero(*semantics);
4477 }
4478 
4479 void IEEEFloat::makeInf(bool Negative) {
4480   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4481     // There is no Inf, so make NaN instead.
4482     makeNaN(false, Negative);
4483     return;
4484   }
4485   category = fcInfinity;
4486   sign = Negative;
4487   exponent = exponentInf();
4488   APInt::tcSet(significandParts(), 0, partCount());
4489 }
4490 
4491 void IEEEFloat::makeZero(bool Negative) {
4492   category = fcZero;
4493   sign = Negative;
4494   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4495     // Merge negative zero to positive because 0b10000...000 is used for NaN
4496     sign = false;
4497   }
4498   exponent = exponentZero();
4499   APInt::tcSet(significandParts(), 0, partCount());
4500 }
4501 
4502 void IEEEFloat::makeQuiet() {
4503   assert(isNaN());
4504   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4505     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4506 }
4507 
4508 int ilogb(const IEEEFloat &Arg) {
4509   if (Arg.isNaN())
4510     return IEEEFloat::IEK_NaN;
4511   if (Arg.isZero())
4512     return IEEEFloat::IEK_Zero;
4513   if (Arg.isInfinity())
4514     return IEEEFloat::IEK_Inf;
4515   if (!Arg.isDenormal())
4516     return Arg.exponent;
4517 
4518   IEEEFloat Normalized(Arg);
4519   int SignificandBits = Arg.getSemantics().precision - 1;
4520 
4521   Normalized.exponent += SignificandBits;
4522   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4523   return Normalized.exponent - SignificandBits;
4524 }
4525 
4526 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4527   auto MaxExp = X.getSemantics().maxExponent;
4528   auto MinExp = X.getSemantics().minExponent;
4529 
4530   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4531   // overflow; clamp it to a safe range before adding, but ensure that the range
4532   // is large enough that the clamp does not change the result. The range we
4533   // need to support is the difference between the largest possible exponent and
4534   // the normalized exponent of half the smallest denormal.
4535 
4536   int SignificandBits = X.getSemantics().precision - 1;
4537   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4538 
4539   // Clamp to one past the range ends to let normalize handle overlflow.
4540   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4541   X.normalize(RoundingMode, lfExactlyZero);
4542   if (X.isNaN())
4543     X.makeQuiet();
4544   return X;
4545 }
4546 
4547 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4548   Exp = ilogb(Val);
4549 
4550   // Quiet signalling nans.
4551   if (Exp == IEEEFloat::IEK_NaN) {
4552     IEEEFloat Quiet(Val);
4553     Quiet.makeQuiet();
4554     return Quiet;
4555   }
4556 
4557   if (Exp == IEEEFloat::IEK_Inf)
4558     return Val;
4559 
4560   // 1 is added because frexp is defined to return a normalized fraction in
4561   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4562   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4563   return scalbn(Val, -Exp, RM);
4564 }
4565 
4566 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4567     : Semantics(&S),
4568       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4569   assert(Semantics == &semPPCDoubleDouble);
4570 }
4571 
4572 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4573     : Semantics(&S),
4574       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4575                             APFloat(semIEEEdouble, uninitialized)}) {
4576   assert(Semantics == &semPPCDoubleDouble);
4577 }
4578 
4579 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4580     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4581                                            APFloat(semIEEEdouble)}) {
4582   assert(Semantics == &semPPCDoubleDouble);
4583 }
4584 
4585 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4586     : Semantics(&S),
4587       Floats(new APFloat[2]{
4588           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4589           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4590   assert(Semantics == &semPPCDoubleDouble);
4591 }
4592 
4593 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4594                              APFloat &&Second)
4595     : Semantics(&S),
4596       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4597   assert(Semantics == &semPPCDoubleDouble);
4598   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4599   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4600 }
4601 
4602 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4603     : Semantics(RHS.Semantics),
4604       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4605                                          APFloat(RHS.Floats[1])}
4606                         : nullptr) {
4607   assert(Semantics == &semPPCDoubleDouble);
4608 }
4609 
4610 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4611     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4612   RHS.Semantics = &semBogus;
4613   assert(Semantics == &semPPCDoubleDouble);
4614 }
4615 
4616 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4617   if (Semantics == RHS.Semantics && RHS.Floats) {
4618     Floats[0] = RHS.Floats[0];
4619     Floats[1] = RHS.Floats[1];
4620   } else if (this != &RHS) {
4621     this->~DoubleAPFloat();
4622     new (this) DoubleAPFloat(RHS);
4623   }
4624   return *this;
4625 }
4626 
4627 // Implement addition, subtraction, multiplication and division based on:
4628 // "Software for Doubled-Precision Floating-Point Computations",
4629 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4630 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4631                                          const APFloat &c, const APFloat &cc,
4632                                          roundingMode RM) {
4633   int Status = opOK;
4634   APFloat z = a;
4635   Status |= z.add(c, RM);
4636   if (!z.isFinite()) {
4637     if (!z.isInfinity()) {
4638       Floats[0] = std::move(z);
4639       Floats[1].makeZero(/* Neg = */ false);
4640       return (opStatus)Status;
4641     }
4642     Status = opOK;
4643     auto AComparedToC = a.compareAbsoluteValue(c);
4644     z = cc;
4645     Status |= z.add(aa, RM);
4646     if (AComparedToC == APFloat::cmpGreaterThan) {
4647       // z = cc + aa + c + a;
4648       Status |= z.add(c, RM);
4649       Status |= z.add(a, RM);
4650     } else {
4651       // z = cc + aa + a + c;
4652       Status |= z.add(a, RM);
4653       Status |= z.add(c, RM);
4654     }
4655     if (!z.isFinite()) {
4656       Floats[0] = std::move(z);
4657       Floats[1].makeZero(/* Neg = */ false);
4658       return (opStatus)Status;
4659     }
4660     Floats[0] = z;
4661     APFloat zz = aa;
4662     Status |= zz.add(cc, RM);
4663     if (AComparedToC == APFloat::cmpGreaterThan) {
4664       // Floats[1] = a - z + c + zz;
4665       Floats[1] = a;
4666       Status |= Floats[1].subtract(z, RM);
4667       Status |= Floats[1].add(c, RM);
4668       Status |= Floats[1].add(zz, RM);
4669     } else {
4670       // Floats[1] = c - z + a + zz;
4671       Floats[1] = c;
4672       Status |= Floats[1].subtract(z, RM);
4673       Status |= Floats[1].add(a, RM);
4674       Status |= Floats[1].add(zz, RM);
4675     }
4676   } else {
4677     // q = a - z;
4678     APFloat q = a;
4679     Status |= q.subtract(z, RM);
4680 
4681     // zz = q + c + (a - (q + z)) + aa + cc;
4682     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4683     auto zz = q;
4684     Status |= zz.add(c, RM);
4685     Status |= q.add(z, RM);
4686     Status |= q.subtract(a, RM);
4687     q.changeSign();
4688     Status |= zz.add(q, RM);
4689     Status |= zz.add(aa, RM);
4690     Status |= zz.add(cc, RM);
4691     if (zz.isZero() && !zz.isNegative()) {
4692       Floats[0] = std::move(z);
4693       Floats[1].makeZero(/* Neg = */ false);
4694       return opOK;
4695     }
4696     Floats[0] = z;
4697     Status |= Floats[0].add(zz, RM);
4698     if (!Floats[0].isFinite()) {
4699       Floats[1].makeZero(/* Neg = */ false);
4700       return (opStatus)Status;
4701     }
4702     Floats[1] = std::move(z);
4703     Status |= Floats[1].subtract(Floats[0], RM);
4704     Status |= Floats[1].add(zz, RM);
4705   }
4706   return (opStatus)Status;
4707 }
4708 
4709 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4710                                                 const DoubleAPFloat &RHS,
4711                                                 DoubleAPFloat &Out,
4712                                                 roundingMode RM) {
4713   if (LHS.getCategory() == fcNaN) {
4714     Out = LHS;
4715     return opOK;
4716   }
4717   if (RHS.getCategory() == fcNaN) {
4718     Out = RHS;
4719     return opOK;
4720   }
4721   if (LHS.getCategory() == fcZero) {
4722     Out = RHS;
4723     return opOK;
4724   }
4725   if (RHS.getCategory() == fcZero) {
4726     Out = LHS;
4727     return opOK;
4728   }
4729   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4730       LHS.isNegative() != RHS.isNegative()) {
4731     Out.makeNaN(false, Out.isNegative(), nullptr);
4732     return opInvalidOp;
4733   }
4734   if (LHS.getCategory() == fcInfinity) {
4735     Out = LHS;
4736     return opOK;
4737   }
4738   if (RHS.getCategory() == fcInfinity) {
4739     Out = RHS;
4740     return opOK;
4741   }
4742   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4743 
4744   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4745       CC(RHS.Floats[1]);
4746   assert(&A.getSemantics() == &semIEEEdouble);
4747   assert(&AA.getSemantics() == &semIEEEdouble);
4748   assert(&C.getSemantics() == &semIEEEdouble);
4749   assert(&CC.getSemantics() == &semIEEEdouble);
4750   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4751   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4752   return Out.addImpl(A, AA, C, CC, RM);
4753 }
4754 
4755 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4756                                      roundingMode RM) {
4757   return addWithSpecial(*this, RHS, *this, RM);
4758 }
4759 
4760 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4761                                           roundingMode RM) {
4762   changeSign();
4763   auto Ret = add(RHS, RM);
4764   changeSign();
4765   return Ret;
4766 }
4767 
4768 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4769                                           APFloat::roundingMode RM) {
4770   const auto &LHS = *this;
4771   auto &Out = *this;
4772   /* Interesting observation: For special categories, finding the lowest
4773      common ancestor of the following layered graph gives the correct
4774      return category:
4775 
4776         NaN
4777        /   \
4778      Zero  Inf
4779        \   /
4780        Normal
4781 
4782      e.g. NaN * NaN = NaN
4783           Zero * Inf = NaN
4784           Normal * Zero = Zero
4785           Normal * Inf = Inf
4786   */
4787   if (LHS.getCategory() == fcNaN) {
4788     Out = LHS;
4789     return opOK;
4790   }
4791   if (RHS.getCategory() == fcNaN) {
4792     Out = RHS;
4793     return opOK;
4794   }
4795   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4796       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4797     Out.makeNaN(false, false, nullptr);
4798     return opOK;
4799   }
4800   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4801     Out = LHS;
4802     return opOK;
4803   }
4804   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4805     Out = RHS;
4806     return opOK;
4807   }
4808   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4809          "Special cases not handled exhaustively");
4810 
4811   int Status = opOK;
4812   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4813   // t = a * c
4814   APFloat T = A;
4815   Status |= T.multiply(C, RM);
4816   if (!T.isFiniteNonZero()) {
4817     Floats[0] = T;
4818     Floats[1].makeZero(/* Neg = */ false);
4819     return (opStatus)Status;
4820   }
4821 
4822   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4823   APFloat Tau = A;
4824   T.changeSign();
4825   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4826   T.changeSign();
4827   {
4828     // v = a * d
4829     APFloat V = A;
4830     Status |= V.multiply(D, RM);
4831     // w = b * c
4832     APFloat W = B;
4833     Status |= W.multiply(C, RM);
4834     Status |= V.add(W, RM);
4835     // tau += v + w
4836     Status |= Tau.add(V, RM);
4837   }
4838   // u = t + tau
4839   APFloat U = T;
4840   Status |= U.add(Tau, RM);
4841 
4842   Floats[0] = U;
4843   if (!U.isFinite()) {
4844     Floats[1].makeZero(/* Neg = */ false);
4845   } else {
4846     // Floats[1] = (t - u) + tau
4847     Status |= T.subtract(U, RM);
4848     Status |= T.add(Tau, RM);
4849     Floats[1] = T;
4850   }
4851   return (opStatus)Status;
4852 }
4853 
4854 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4855                                         APFloat::roundingMode RM) {
4856   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4857   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4858   auto Ret =
4859       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4860   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4861   return Ret;
4862 }
4863 
4864 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4865   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4866   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4867   auto Ret =
4868       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4869   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4870   return Ret;
4871 }
4872 
4873 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4874   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4875   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4876   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4877   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4878   return Ret;
4879 }
4880 
4881 APFloat::opStatus
4882 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4883                                 const DoubleAPFloat &Addend,
4884                                 APFloat::roundingMode RM) {
4885   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4886   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4887   auto Ret = Tmp.fusedMultiplyAdd(
4888       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4889       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4890   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4891   return Ret;
4892 }
4893 
4894 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4895   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4896   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4897   auto Ret = Tmp.roundToIntegral(RM);
4898   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4899   return Ret;
4900 }
4901 
4902 void DoubleAPFloat::changeSign() {
4903   Floats[0].changeSign();
4904   Floats[1].changeSign();
4905 }
4906 
4907 APFloat::cmpResult
4908 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4909   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4910   if (Result != cmpEqual)
4911     return Result;
4912   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4913   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4914     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4915     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4916     if (Against && !RHSAgainst)
4917       return cmpLessThan;
4918     if (!Against && RHSAgainst)
4919       return cmpGreaterThan;
4920     if (!Against && !RHSAgainst)
4921       return Result;
4922     if (Against && RHSAgainst)
4923       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4924   }
4925   return Result;
4926 }
4927 
4928 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4929   return Floats[0].getCategory();
4930 }
4931 
4932 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4933 
4934 void DoubleAPFloat::makeInf(bool Neg) {
4935   Floats[0].makeInf(Neg);
4936   Floats[1].makeZero(/* Neg = */ false);
4937 }
4938 
4939 void DoubleAPFloat::makeZero(bool Neg) {
4940   Floats[0].makeZero(Neg);
4941   Floats[1].makeZero(/* Neg = */ false);
4942 }
4943 
4944 void DoubleAPFloat::makeLargest(bool Neg) {
4945   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4946   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4947   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4948   if (Neg)
4949     changeSign();
4950 }
4951 
4952 void DoubleAPFloat::makeSmallest(bool Neg) {
4953   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4954   Floats[0].makeSmallest(Neg);
4955   Floats[1].makeZero(/* Neg = */ false);
4956 }
4957 
4958 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4959   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4960   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4961   if (Neg)
4962     Floats[0].changeSign();
4963   Floats[1].makeZero(/* Neg = */ false);
4964 }
4965 
4966 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4967   Floats[0].makeNaN(SNaN, Neg, fill);
4968   Floats[1].makeZero(/* Neg = */ false);
4969 }
4970 
4971 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4972   auto Result = Floats[0].compare(RHS.Floats[0]);
4973   // |Float[0]| > |Float[1]|
4974   if (Result == APFloat::cmpEqual)
4975     return Floats[1].compare(RHS.Floats[1]);
4976   return Result;
4977 }
4978 
4979 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4980   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4981          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4982 }
4983 
4984 hash_code hash_value(const DoubleAPFloat &Arg) {
4985   if (Arg.Floats)
4986     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4987   return hash_combine(Arg.Semantics);
4988 }
4989 
4990 APInt DoubleAPFloat::bitcastToAPInt() const {
4991   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4992   uint64_t Data[] = {
4993       Floats[0].bitcastToAPInt().getRawData()[0],
4994       Floats[1].bitcastToAPInt().getRawData()[0],
4995   };
4996   return APInt(128, 2, Data);
4997 }
4998 
4999 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5000                                                              roundingMode RM) {
5001   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5002   APFloat Tmp(semPPCDoubleDoubleLegacy);
5003   auto Ret = Tmp.convertFromString(S, RM);
5004   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5005   return Ret;
5006 }
5007 
5008 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5009   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5010   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5011   auto Ret = Tmp.next(nextDown);
5012   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5013   return Ret;
5014 }
5015 
5016 APFloat::opStatus
5017 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5018                                 unsigned int Width, bool IsSigned,
5019                                 roundingMode RM, bool *IsExact) const {
5020   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5021   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5022       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5023 }
5024 
5025 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5026                                                   bool IsSigned,
5027                                                   roundingMode RM) {
5028   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5029   APFloat Tmp(semPPCDoubleDoubleLegacy);
5030   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5031   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5032   return Ret;
5033 }
5034 
5035 APFloat::opStatus
5036 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5037                                               unsigned int InputSize,
5038                                               bool IsSigned, roundingMode RM) {
5039   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5040   APFloat Tmp(semPPCDoubleDoubleLegacy);
5041   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5042   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5043   return Ret;
5044 }
5045 
5046 APFloat::opStatus
5047 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5048                                               unsigned int InputSize,
5049                                               bool IsSigned, roundingMode RM) {
5050   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5051   APFloat Tmp(semPPCDoubleDoubleLegacy);
5052   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5053   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5054   return Ret;
5055 }
5056 
5057 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5058                                                unsigned int HexDigits,
5059                                                bool UpperCase,
5060                                                roundingMode RM) const {
5061   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5062   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5063       .convertToHexString(DST, HexDigits, UpperCase, RM);
5064 }
5065 
5066 bool DoubleAPFloat::isDenormal() const {
5067   return getCategory() == fcNormal &&
5068          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5069           // (double)(Hi + Lo) == Hi defines a normal number.
5070           Floats[0] != Floats[0] + Floats[1]);
5071 }
5072 
5073 bool DoubleAPFloat::isSmallest() const {
5074   if (getCategory() != fcNormal)
5075     return false;
5076   DoubleAPFloat Tmp(*this);
5077   Tmp.makeSmallest(this->isNegative());
5078   return Tmp.compare(*this) == cmpEqual;
5079 }
5080 
5081 bool DoubleAPFloat::isSmallestNormalized() const {
5082   if (getCategory() != fcNormal)
5083     return false;
5084 
5085   DoubleAPFloat Tmp(*this);
5086   Tmp.makeSmallestNormalized(this->isNegative());
5087   return Tmp.compare(*this) == cmpEqual;
5088 }
5089 
5090 bool DoubleAPFloat::isLargest() const {
5091   if (getCategory() != fcNormal)
5092     return false;
5093   DoubleAPFloat Tmp(*this);
5094   Tmp.makeLargest(this->isNegative());
5095   return Tmp.compare(*this) == cmpEqual;
5096 }
5097 
5098 bool DoubleAPFloat::isInteger() const {
5099   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5100   return Floats[0].isInteger() && Floats[1].isInteger();
5101 }
5102 
5103 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5104                              unsigned FormatPrecision,
5105                              unsigned FormatMaxPadding,
5106                              bool TruncateZero) const {
5107   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5108   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5109       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5110 }
5111 
5112 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5113   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5114   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5115   if (!inv)
5116     return Tmp.getExactInverse(nullptr);
5117   APFloat Inv(semPPCDoubleDoubleLegacy);
5118   auto Ret = Tmp.getExactInverse(&Inv);
5119   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5120   return Ret;
5121 }
5122 
5123 int DoubleAPFloat::getExactLog2() const {
5124   // TODO: Implement me
5125   return INT_MIN;
5126 }
5127 
5128 int DoubleAPFloat::getExactLog2Abs() const {
5129   // TODO: Implement me
5130   return INT_MIN;
5131 }
5132 
5133 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5134                      APFloat::roundingMode RM) {
5135   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5136   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5137                        scalbn(Arg.Floats[1], Exp, RM));
5138 }
5139 
5140 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5141                     APFloat::roundingMode RM) {
5142   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5143   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5144   APFloat Second = Arg.Floats[1];
5145   if (Arg.getCategory() == APFloat::fcNormal)
5146     Second = scalbn(Second, -Exp, RM);
5147   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5148 }
5149 
5150 } // namespace detail
5151 
5152 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5153   if (usesLayout<IEEEFloat>(Semantics)) {
5154     new (&IEEE) IEEEFloat(std::move(F));
5155     return;
5156   }
5157   if (usesLayout<DoubleAPFloat>(Semantics)) {
5158     const fltSemantics& S = F.getSemantics();
5159     new (&Double)
5160         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5161                       APFloat(semIEEEdouble));
5162     return;
5163   }
5164   llvm_unreachable("Unexpected semantics");
5165 }
5166 
5167 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5168                                                        roundingMode RM) {
5169   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5170 }
5171 
5172 hash_code hash_value(const APFloat &Arg) {
5173   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5174     return hash_value(Arg.U.IEEE);
5175   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5176     return hash_value(Arg.U.Double);
5177   llvm_unreachable("Unexpected semantics");
5178 }
5179 
5180 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5181     : APFloat(Semantics) {
5182   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5183   assert(StatusOrErr && "Invalid floating point representation");
5184   consumeError(StatusOrErr.takeError());
5185 }
5186 
5187 FPClassTest APFloat::classify() const {
5188   if (isZero())
5189     return isNegative() ? fcNegZero : fcPosZero;
5190   if (isNormal())
5191     return isNegative() ? fcNegNormal : fcPosNormal;
5192   if (isDenormal())
5193     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5194   if (isInfinity())
5195     return isNegative() ? fcNegInf : fcPosInf;
5196   assert(isNaN() && "Other class of FP constant");
5197   return isSignaling() ? fcSNan : fcQNan;
5198 }
5199 
5200 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5201                                    roundingMode RM, bool *losesInfo) {
5202   if (&getSemantics() == &ToSemantics) {
5203     *losesInfo = false;
5204     return opOK;
5205   }
5206   if (usesLayout<IEEEFloat>(getSemantics()) &&
5207       usesLayout<IEEEFloat>(ToSemantics))
5208     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5209   if (usesLayout<IEEEFloat>(getSemantics()) &&
5210       usesLayout<DoubleAPFloat>(ToSemantics)) {
5211     assert(&ToSemantics == &semPPCDoubleDouble);
5212     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5213     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5214     return Ret;
5215   }
5216   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5217       usesLayout<IEEEFloat>(ToSemantics)) {
5218     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5219     *this = APFloat(std::move(getIEEE()), ToSemantics);
5220     return Ret;
5221   }
5222   llvm_unreachable("Unexpected semantics");
5223 }
5224 
5225 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5226   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5227 }
5228 
5229 void APFloat::print(raw_ostream &OS) const {
5230   SmallVector<char, 16> Buffer;
5231   toString(Buffer);
5232   OS << Buffer << "\n";
5233 }
5234 
5235 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5236 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5237 #endif
5238 
5239 void APFloat::Profile(FoldingSetNodeID &NID) const {
5240   NID.Add(bitcastToAPInt());
5241 }
5242 
5243 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5244    an APSInt, whose initial bit-width and signed-ness are used to determine the
5245    precision of the conversion.
5246  */
5247 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5248                                             roundingMode rounding_mode,
5249                                             bool *isExact) const {
5250   unsigned bitWidth = result.getBitWidth();
5251   SmallVector<uint64_t, 4> parts(result.getNumWords());
5252   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5253                                      rounding_mode, isExact);
5254   // Keeps the original signed-ness.
5255   result = APInt(bitWidth, parts);
5256   return status;
5257 }
5258 
5259 double APFloat::convertToDouble() const {
5260   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5261     return getIEEE().convertToDouble();
5262   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5263          "Float semantics is not representable by IEEEdouble");
5264   APFloat Temp = *this;
5265   bool LosesInfo;
5266   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5267   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5268   (void)St;
5269   return Temp.getIEEE().convertToDouble();
5270 }
5271 
5272 #ifdef HAS_IEE754_FLOAT128
5273 float128 APFloat::convertToQuad() const {
5274   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5275     return getIEEE().convertToQuad();
5276   assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5277          "Float semantics is not representable by IEEEquad");
5278   APFloat Temp = *this;
5279   bool LosesInfo;
5280   opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5281   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5282   (void)St;
5283   return Temp.getIEEE().convertToQuad();
5284 }
5285 #endif
5286 
5287 float APFloat::convertToFloat() const {
5288   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5289     return getIEEE().convertToFloat();
5290   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5291          "Float semantics is not representable by IEEEsingle");
5292   APFloat Temp = *this;
5293   bool LosesInfo;
5294   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5295   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5296   (void)St;
5297   return Temp.getIEEE().convertToFloat();
5298 }
5299 
5300 } // namespace llvm
5301 
5302 #undef APFLOAT_DISPATCH_ON_SEMANTICS
5303